From f26e1de5ec487c040efa845f280d110c29baea32 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 20 Sep 2018 21:51:28 +0300 Subject: video/hdmi: Constify 'buffer' to the unpack functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unpack functions just read from the passed in buffer, so make it const. Cc: Thierry Reding Cc: Hans Verkuil Cc: linux-media@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180920185145.1912-2-ville.syrjala@linux.intel.com Acked-by: Hans Verkuil --- include/linux/hdmi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index d271ff23984f..d3816170c062 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -332,7 +332,8 @@ union hdmi_infoframe { ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size); -int hdmi_infoframe_unpack(union hdmi_infoframe *frame, void *buffer); +int hdmi_infoframe_unpack(union hdmi_infoframe *frame, + const void *buffer); void hdmi_infoframe_log(const char *level, struct device *dev, union hdmi_infoframe *frame); -- cgit v1.2.3 From 480b8b3e42c3d959f8b6346c24c088eb70ef9fc2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 20 Sep 2018 21:51:29 +0300 Subject: video/hdmi: Pass buffer size to infoframe unpack functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make sure the infoframe unpack functions don't end up examining stack garbage or oopsing, let's pass in the size of the buffer. v2: Convert tda1997x.c as well (kbuild test robot) Cc: Thierry Reding Cc: Hans Verkuil Cc: linux-media@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180920185145.1912-3-ville.syrjala@linux.intel.com Acked-by: Hans Verkuil --- drivers/media/i2c/adv7511.c | 2 +- drivers/media/i2c/adv7604.c | 2 +- drivers/media/i2c/adv7842.c | 2 +- drivers/media/i2c/tc358743.c | 2 +- drivers/media/i2c/tda1997x.c | 4 ++-- drivers/video/hdmi.c | 51 ++++++++++++++++++++++++++++++++------------ include/linux/hdmi.h | 2 +- 7 files changed, 44 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c index 55c2ea0720d9..b85b181bbb6c 100644 --- a/drivers/media/i2c/adv7511.c +++ b/drivers/media/i2c/adv7511.c @@ -550,7 +550,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7511_cfg_read_ buffer[3] = 0; buffer[3] = hdmi_infoframe_checksum(buffer, len + 4); - if (hdmi_infoframe_unpack(&frame, buffer) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index 668be2bca57a..2e7a28dbad4e 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2418,7 +2418,7 @@ static int adv76xx_read_infoframe(struct v4l2_subdev *sd, int index, buffer[i + 3] = infoframe_read(sd, adv76xx_cri[index].payload_addr + i); - if (hdmi_infoframe_unpack(frame, buffer) < 0) { + if (hdmi_infoframe_unpack(frame, buffer, sizeof(buffer)) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, adv76xx_cri[index].desc); return -ENOENT; diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c index 4f8fbdd00e35..2cfd03f929b2 100644 --- a/drivers/media/i2c/adv7842.c +++ b/drivers/media/i2c/adv7842.c @@ -2563,7 +2563,7 @@ static void log_infoframe(struct v4l2_subdev *sd, struct adv7842_cfg_read_infofr for (i = 0; i < len; i++) buffer[i + 3] = infoframe_read(sd, cri->payload_addr + i); - if (hdmi_infoframe_unpack(&frame, buffer) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c index 44c41933415a..519bf92508d5 100644 --- a/drivers/media/i2c/tc358743.c +++ b/drivers/media/i2c/tc358743.c @@ -444,7 +444,7 @@ static void print_avi_infoframe(struct v4l2_subdev *sd) i2c_rd(sd, PK_AVI_0HEAD, buffer, HDMI_INFOFRAME_SIZE(AVI)); - if (hdmi_infoframe_unpack(&frame, buffer) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { v4l2_err(sd, "%s: unpack of AVI infoframe failed\n", __func__); return; } diff --git a/drivers/media/i2c/tda1997x.c b/drivers/media/i2c/tda1997x.c index d114ac5243ec..195a1fc74ee8 100644 --- a/drivers/media/i2c/tda1997x.c +++ b/drivers/media/i2c/tda1997x.c @@ -1253,7 +1253,7 @@ tda1997x_parse_infoframe(struct tda1997x_state *state, u16 addr) /* read data */ len = io_readn(sd, addr, sizeof(buffer), buffer); - err = hdmi_infoframe_unpack(&frame, buffer); + err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)); if (err) { v4l_err(state->client, "failed parsing %d byte infoframe: 0x%04x/0x%02x\n", @@ -1928,7 +1928,7 @@ static int tda1997x_log_infoframe(struct v4l2_subdev *sd, int addr) /* read data */ len = io_readn(sd, addr, sizeof(buffer), buffer); v4l2_dbg(1, debug, sd, "infoframe: addr=%d len=%d\n", addr, len); - err = hdmi_infoframe_unpack(&frame, buffer); + err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)); if (err) { v4l_err(state->client, "failed parsing %d byte infoframe: 0x%04x/0x%02x\n", diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 65b915ea4936..b5d491014b0b 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -1005,8 +1005,9 @@ EXPORT_SYMBOL(hdmi_infoframe_log); /** * hdmi_avi_infoframe_unpack() - unpack binary buffer to a HDMI AVI infoframe - * @buffer: source buffer * @frame: HDMI AVI infoframe + * @buffer: source buffer + * @size: size of buffer * * Unpacks the information contained in binary @buffer into a structured * @frame of the HDMI Auxiliary Video (AVI) information frame. @@ -1016,11 +1017,14 @@ EXPORT_SYMBOL(hdmi_infoframe_log); * Returns 0 on success or a negative error code on failure. */ static int hdmi_avi_infoframe_unpack(struct hdmi_avi_infoframe *frame, - const void *buffer) + const void *buffer, size_t size) { const u8 *ptr = buffer; int ret; + if (size < HDMI_INFOFRAME_SIZE(AVI)) + return -EINVAL; + if (ptr[0] != HDMI_INFOFRAME_TYPE_AVI || ptr[1] != 2 || ptr[2] != HDMI_AVI_INFOFRAME_SIZE) @@ -1068,8 +1072,9 @@ static int hdmi_avi_infoframe_unpack(struct hdmi_avi_infoframe *frame, /** * hdmi_spd_infoframe_unpack() - unpack binary buffer to a HDMI SPD infoframe - * @buffer: source buffer * @frame: HDMI SPD infoframe + * @buffer: source buffer + * @size: size of buffer * * Unpacks the information contained in binary @buffer into a structured * @frame of the HDMI Source Product Description (SPD) information frame. @@ -1079,11 +1084,14 @@ static int hdmi_avi_infoframe_unpack(struct hdmi_avi_infoframe *frame, * Returns 0 on success or a negative error code on failure. */ static int hdmi_spd_infoframe_unpack(struct hdmi_spd_infoframe *frame, - const void *buffer) + const void *buffer, size_t size) { const u8 *ptr = buffer; int ret; + if (size < HDMI_INFOFRAME_SIZE(SPD)) + return -EINVAL; + if (ptr[0] != HDMI_INFOFRAME_TYPE_SPD || ptr[1] != 1 || ptr[2] != HDMI_SPD_INFOFRAME_SIZE) { @@ -1106,8 +1114,9 @@ static int hdmi_spd_infoframe_unpack(struct hdmi_spd_infoframe *frame, /** * hdmi_audio_infoframe_unpack() - unpack binary buffer to a HDMI AUDIO infoframe - * @buffer: source buffer * @frame: HDMI Audio infoframe + * @buffer: source buffer + * @size: size of buffer * * Unpacks the information contained in binary @buffer into a structured * @frame of the HDMI Audio information frame. @@ -1117,11 +1126,14 @@ static int hdmi_spd_infoframe_unpack(struct hdmi_spd_infoframe *frame, * Returns 0 on success or a negative error code on failure. */ static int hdmi_audio_infoframe_unpack(struct hdmi_audio_infoframe *frame, - const void *buffer) + const void *buffer, size_t size) { const u8 *ptr = buffer; int ret; + if (size < HDMI_INFOFRAME_SIZE(AUDIO)) + return -EINVAL; + if (ptr[0] != HDMI_INFOFRAME_TYPE_AUDIO || ptr[1] != 1 || ptr[2] != HDMI_AUDIO_INFOFRAME_SIZE) { @@ -1151,8 +1163,9 @@ static int hdmi_audio_infoframe_unpack(struct hdmi_audio_infoframe *frame, /** * hdmi_vendor_infoframe_unpack() - unpack binary buffer to a HDMI vendor infoframe - * @buffer: source buffer * @frame: HDMI Vendor infoframe + * @buffer: source buffer + * @size: size of buffer * * Unpacks the information contained in binary @buffer into a structured * @frame of the HDMI Vendor information frame. @@ -1163,7 +1176,7 @@ static int hdmi_audio_infoframe_unpack(struct hdmi_audio_infoframe *frame, */ static int hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame, - const void *buffer) + const void *buffer, size_t size) { const u8 *ptr = buffer; size_t length; @@ -1171,6 +1184,9 @@ hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame, u8 hdmi_video_format; struct hdmi_vendor_infoframe *hvf = &frame->hdmi; + if (size < HDMI_INFOFRAME_HEADER_SIZE) + return -EINVAL; + if (ptr[0] != HDMI_INFOFRAME_TYPE_VENDOR || ptr[1] != 1 || (ptr[2] != 4 && ptr[2] != 5 && ptr[2] != 6)) @@ -1178,6 +1194,9 @@ hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame, length = ptr[2]; + if (size < HDMI_INFOFRAME_HEADER_SIZE + length) + return -EINVAL; + if (hdmi_infoframe_checksum(buffer, HDMI_INFOFRAME_HEADER_SIZE + length) != 0) return -EINVAL; @@ -1224,8 +1243,9 @@ hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame, /** * hdmi_infoframe_unpack() - unpack binary buffer to a HDMI infoframe - * @buffer: source buffer * @frame: HDMI infoframe + * @buffer: source buffer + * @size: size of buffer * * Unpacks the information contained in binary buffer @buffer into a structured * @frame of a HDMI infoframe. @@ -1235,23 +1255,26 @@ hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame, * Returns 0 on success or a negative error code on failure. */ int hdmi_infoframe_unpack(union hdmi_infoframe *frame, - const void *buffer) + const void *buffer, size_t size) { int ret; const u8 *ptr = buffer; + if (size < HDMI_INFOFRAME_HEADER_SIZE) + return -EINVAL; + switch (ptr[0]) { case HDMI_INFOFRAME_TYPE_AVI: - ret = hdmi_avi_infoframe_unpack(&frame->avi, buffer); + ret = hdmi_avi_infoframe_unpack(&frame->avi, buffer, size); break; case HDMI_INFOFRAME_TYPE_SPD: - ret = hdmi_spd_infoframe_unpack(&frame->spd, buffer); + ret = hdmi_spd_infoframe_unpack(&frame->spd, buffer, size); break; case HDMI_INFOFRAME_TYPE_AUDIO: - ret = hdmi_audio_infoframe_unpack(&frame->audio, buffer); + ret = hdmi_audio_infoframe_unpack(&frame->audio, buffer, size); break; case HDMI_INFOFRAME_TYPE_VENDOR: - ret = hdmi_vendor_any_infoframe_unpack(&frame->vendor, buffer); + ret = hdmi_vendor_any_infoframe_unpack(&frame->vendor, buffer, size); break; default: ret = -EINVAL; diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index d3816170c062..a577d4ae2570 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -333,7 +333,7 @@ union hdmi_infoframe { ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size); int hdmi_infoframe_unpack(union hdmi_infoframe *frame, - const void *buffer); + const void *buffer, size_t size); void hdmi_infoframe_log(const char *level, struct device *dev, union hdmi_infoframe *frame); -- cgit v1.2.3 From 468d6a4996fb67228e94c9ffd90a715e754a8283 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 20 Sep 2018 21:51:30 +0300 Subject: video/hdmi: Constify infoframe passed to the log functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The log functions don't modify the passed in infoframe so make it const. Cc: Thierry Reding Cc: Hans Verkuil Cc: linux-media@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180920185145.1912-4-ville.syrjala@linux.intel.com Acked-by: Hans Verkuil --- drivers/video/hdmi.c | 22 +++++++++++----------- include/linux/hdmi.h | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index b5d491014b0b..53e7ee2c83fc 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -471,7 +471,7 @@ static const char *hdmi_infoframe_type_get_name(enum hdmi_infoframe_type type) static void hdmi_infoframe_log_header(const char *level, struct device *dev, - struct hdmi_any_infoframe *frame) + const struct hdmi_any_infoframe *frame) { hdmi_log("HDMI infoframe: %s, version %u, length %u\n", hdmi_infoframe_type_get_name(frame->type), @@ -673,10 +673,10 @@ hdmi_content_type_get_name(enum hdmi_content_type content_type) */ static void hdmi_avi_infoframe_log(const char *level, struct device *dev, - struct hdmi_avi_infoframe *frame) + const struct hdmi_avi_infoframe *frame) { hdmi_infoframe_log_header(level, dev, - (struct hdmi_any_infoframe *)frame); + (const struct hdmi_any_infoframe *)frame); hdmi_log(" colorspace: %s\n", hdmi_colorspace_get_name(frame->colorspace)); @@ -750,12 +750,12 @@ static const char *hdmi_spd_sdi_get_name(enum hdmi_spd_sdi sdi) */ static void hdmi_spd_infoframe_log(const char *level, struct device *dev, - struct hdmi_spd_infoframe *frame) + const struct hdmi_spd_infoframe *frame) { u8 buf[17]; hdmi_infoframe_log_header(level, dev, - (struct hdmi_any_infoframe *)frame); + (const struct hdmi_any_infoframe *)frame); memset(buf, 0, sizeof(buf)); @@ -886,10 +886,10 @@ hdmi_audio_coding_type_ext_get_name(enum hdmi_audio_coding_type_ext ctx) */ static void hdmi_audio_infoframe_log(const char *level, struct device *dev, - struct hdmi_audio_infoframe *frame) + const struct hdmi_audio_infoframe *frame) { hdmi_infoframe_log_header(level, dev, - (struct hdmi_any_infoframe *)frame); + (const struct hdmi_any_infoframe *)frame); if (frame->channels) hdmi_log(" channels: %u\n", frame->channels - 1); @@ -949,12 +949,12 @@ hdmi_3d_structure_get_name(enum hdmi_3d_structure s3d_struct) static void hdmi_vendor_any_infoframe_log(const char *level, struct device *dev, - union hdmi_vendor_any_infoframe *frame) + const union hdmi_vendor_any_infoframe *frame) { - struct hdmi_vendor_infoframe *hvf = &frame->hdmi; + const struct hdmi_vendor_infoframe *hvf = &frame->hdmi; hdmi_infoframe_log_header(level, dev, - (struct hdmi_any_infoframe *)frame); + (const struct hdmi_any_infoframe *)frame); if (frame->any.oui != HDMI_IEEE_OUI) { hdmi_log(" not a HDMI vendor infoframe\n"); @@ -984,7 +984,7 @@ hdmi_vendor_any_infoframe_log(const char *level, */ void hdmi_infoframe_log(const char *level, struct device *dev, - union hdmi_infoframe *frame) + const union hdmi_infoframe *frame) { switch (frame->any.type) { case HDMI_INFOFRAME_TYPE_AVI: diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index a577d4ae2570..bce1abb1fe57 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -335,6 +335,6 @@ hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size); int hdmi_infoframe_unpack(union hdmi_infoframe *frame, const void *buffer, size_t size); void hdmi_infoframe_log(const char *level, struct device *dev, - union hdmi_infoframe *frame); + const union hdmi_infoframe *frame); #endif /* _DRM_HDMI_H */ -- cgit v1.2.3 From c5e69ab35c0d7069ad860c5cb44a5986e2322160 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 21 Sep 2018 17:33:32 +0300 Subject: video/hdmi: Constify infoframe passed to the pack functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's make the infoframe pack functions usable with a const infoframe structure. This allows us to precompute the infoframe earlier, and still pack it later when we're no longer allowed to modify the structure. So now we end up with a _check()+_pack_only() or _pack() functions depending on whether you want to precompute the infoframes or not. The names aren't great but I was lazy and didn't want to change all the drivers. v2: Deal with exynos churn Actually export the new funcs v3: Fix various documentation fails (Hans) Cc: Thierry Reding Cc: Hans Verkuil Cc: linux-media@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180921143332.28970-1-ville.syrjala@linux.intel.com Acked-by: Hans Verkuil --- drivers/video/hdmi.c | 425 +++++++++++++++++++++++++++++++++++++++++++++++---- include/linux/hdmi.h | 19 ++- 2 files changed, 416 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 53e7ee2c83fc..08d94ab00467 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -68,8 +68,36 @@ int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame) } EXPORT_SYMBOL(hdmi_avi_infoframe_init); +static int hdmi_avi_infoframe_check_only(const struct hdmi_avi_infoframe *frame) +{ + if (frame->type != HDMI_INFOFRAME_TYPE_AVI || + frame->version != 2 || + frame->length != HDMI_AVI_INFOFRAME_SIZE) + return -EINVAL; + + if (frame->picture_aspect > HDMI_PICTURE_ASPECT_16_9) + return -EINVAL; + + return 0; +} + /** - * hdmi_avi_infoframe_pack() - write HDMI AVI infoframe to binary buffer + * hdmi_avi_infoframe_check() - check a HDMI AVI infoframe + * @frame: HDMI AVI infoframe + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields. + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_avi_infoframe_check(struct hdmi_avi_infoframe *frame) +{ + return hdmi_avi_infoframe_check_only(frame); +} +EXPORT_SYMBOL(hdmi_avi_infoframe_check); + +/** + * hdmi_avi_infoframe_pack_only() - write HDMI AVI infoframe to binary buffer * @frame: HDMI AVI infoframe * @buffer: destination buffer * @size: size of buffer @@ -82,20 +110,22 @@ EXPORT_SYMBOL(hdmi_avi_infoframe_init); * Returns the number of bytes packed into the binary buffer or a negative * error code on failure. */ -ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, - size_t size) +ssize_t hdmi_avi_infoframe_pack_only(const struct hdmi_avi_infoframe *frame, + void *buffer, size_t size) { u8 *ptr = buffer; size_t length; + int ret; + + ret = hdmi_avi_infoframe_check_only(frame); + if (ret) + return ret; length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; if (size < length) return -ENOSPC; - if (frame->picture_aspect > HDMI_PICTURE_ASPECT_16_9) - return -EINVAL; - memset(buffer, 0, size); ptr[0] = frame->type; @@ -152,6 +182,36 @@ ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, return length; } +EXPORT_SYMBOL(hdmi_avi_infoframe_pack_only); + +/** + * hdmi_avi_infoframe_pack() - check a HDMI AVI infoframe, + * and write it to binary buffer + * @frame: HDMI AVI infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields, after which it packs the information + * contained in the @frame structure into a binary representation that + * can be written into the corresponding controller registers. This function + * also computes the checksum as required by section 5.3.5 of the HDMI 1.4 + * specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, + void *buffer, size_t size) +{ + int ret; + + ret = hdmi_avi_infoframe_check(frame); + if (ret) + return ret; + + return hdmi_avi_infoframe_pack_only(frame, buffer, size); +} EXPORT_SYMBOL(hdmi_avi_infoframe_pack); /** @@ -178,8 +238,33 @@ int hdmi_spd_infoframe_init(struct hdmi_spd_infoframe *frame, } EXPORT_SYMBOL(hdmi_spd_infoframe_init); +static int hdmi_spd_infoframe_check_only(const struct hdmi_spd_infoframe *frame) +{ + if (frame->type != HDMI_INFOFRAME_TYPE_SPD || + frame->version != 1 || + frame->length != HDMI_SPD_INFOFRAME_SIZE) + return -EINVAL; + + return 0; +} + /** - * hdmi_spd_infoframe_pack() - write HDMI SPD infoframe to binary buffer + * hdmi_spd_infoframe_check() - check a HDMI SPD infoframe + * @frame: HDMI SPD infoframe + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields. + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_spd_infoframe_check(struct hdmi_spd_infoframe *frame) +{ + return hdmi_spd_infoframe_check_only(frame); +} +EXPORT_SYMBOL(hdmi_spd_infoframe_check); + +/** + * hdmi_spd_infoframe_pack_only() - write HDMI SPD infoframe to binary buffer * @frame: HDMI SPD infoframe * @buffer: destination buffer * @size: size of buffer @@ -192,11 +277,16 @@ EXPORT_SYMBOL(hdmi_spd_infoframe_init); * Returns the number of bytes packed into the binary buffer or a negative * error code on failure. */ -ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, void *buffer, - size_t size) +ssize_t hdmi_spd_infoframe_pack_only(const struct hdmi_spd_infoframe *frame, + void *buffer, size_t size) { u8 *ptr = buffer; size_t length; + int ret; + + ret = hdmi_spd_infoframe_check_only(frame); + if (ret) + return ret; length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; @@ -222,6 +312,36 @@ ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, void *buffer, return length; } +EXPORT_SYMBOL(hdmi_spd_infoframe_pack_only); + +/** + * hdmi_spd_infoframe_pack() - check a HDMI SPD infoframe, + * and write it to binary buffer + * @frame: HDMI SPD infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields, after which it packs the information + * contained in the @frame structure into a binary representation that + * can be written into the corresponding controller registers. This function + * also computes the checksum as required by section 5.3.5 of the HDMI 1.4 + * specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, + void *buffer, size_t size) +{ + int ret; + + ret = hdmi_spd_infoframe_check(frame); + if (ret) + return ret; + + return hdmi_spd_infoframe_pack_only(frame, buffer, size); +} EXPORT_SYMBOL(hdmi_spd_infoframe_pack); /** @@ -242,8 +362,33 @@ int hdmi_audio_infoframe_init(struct hdmi_audio_infoframe *frame) } EXPORT_SYMBOL(hdmi_audio_infoframe_init); +static int hdmi_audio_infoframe_check_only(const struct hdmi_audio_infoframe *frame) +{ + if (frame->type != HDMI_INFOFRAME_TYPE_AUDIO || + frame->version != 1 || + frame->length != HDMI_AUDIO_INFOFRAME_SIZE) + return -EINVAL; + + return 0; +} + +/** + * hdmi_audio_infoframe_check() - check a HDMI audio infoframe + * @frame: HDMI audio infoframe + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields. + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_audio_infoframe_check(struct hdmi_audio_infoframe *frame) +{ + return hdmi_audio_infoframe_check_only(frame); +} +EXPORT_SYMBOL(hdmi_audio_infoframe_check); + /** - * hdmi_audio_infoframe_pack() - write HDMI audio infoframe to binary buffer + * hdmi_audio_infoframe_pack_only() - write HDMI audio infoframe to binary buffer * @frame: HDMI audio infoframe * @buffer: destination buffer * @size: size of buffer @@ -256,12 +401,17 @@ EXPORT_SYMBOL(hdmi_audio_infoframe_init); * Returns the number of bytes packed into the binary buffer or a negative * error code on failure. */ -ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame, - void *buffer, size_t size) +ssize_t hdmi_audio_infoframe_pack_only(const struct hdmi_audio_infoframe *frame, + void *buffer, size_t size) { unsigned char channels; u8 *ptr = buffer; size_t length; + int ret; + + ret = hdmi_audio_infoframe_check_only(frame); + if (ret) + return ret; length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; @@ -297,6 +447,36 @@ ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame, return length; } +EXPORT_SYMBOL(hdmi_audio_infoframe_pack_only); + +/** + * hdmi_audio_infoframe_pack() - check a HDMI Audio infoframe, + * and write it to binary buffer + * @frame: HDMI Audio infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields, after which it packs the information + * contained in the @frame structure into a binary representation that + * can be written into the corresponding controller registers. This function + * also computes the checksum as required by section 5.3.5 of the HDMI 1.4 + * specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame, + void *buffer, size_t size) +{ + int ret; + + ret = hdmi_audio_infoframe_check(frame); + if (ret) + return ret; + + return hdmi_audio_infoframe_pack_only(frame, buffer, size); +} EXPORT_SYMBOL(hdmi_audio_infoframe_pack); /** @@ -319,6 +499,7 @@ int hdmi_vendor_infoframe_init(struct hdmi_vendor_infoframe *frame) * value */ frame->s3d_struct = HDMI_3D_STRUCTURE_INVALID; + frame->length = 4; return 0; } @@ -335,8 +516,42 @@ static int hdmi_vendor_infoframe_length(const struct hdmi_vendor_infoframe *fram return 4; } +static int hdmi_vendor_infoframe_check_only(const struct hdmi_vendor_infoframe *frame) +{ + if (frame->type != HDMI_INFOFRAME_TYPE_VENDOR || + frame->version != 1 || + frame->oui != HDMI_IEEE_OUI) + return -EINVAL; + + /* only one of those can be supplied */ + if (frame->vic != 0 && frame->s3d_struct != HDMI_3D_STRUCTURE_INVALID) + return -EINVAL; + + if (frame->length != hdmi_vendor_infoframe_length(frame)) + return -EINVAL; + + return 0; +} + /** - * hdmi_vendor_infoframe_pack() - write a HDMI vendor infoframe to binary buffer + * hdmi_vendor_infoframe_check() - check a HDMI vendor infoframe + * @frame: HDMI infoframe + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields. + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_vendor_infoframe_check(struct hdmi_vendor_infoframe *frame) +{ + frame->length = hdmi_vendor_infoframe_length(frame); + + return hdmi_vendor_infoframe_check_only(frame); +} +EXPORT_SYMBOL(hdmi_vendor_infoframe_check); + +/** + * hdmi_vendor_infoframe_pack_only() - write a HDMI vendor infoframe to binary buffer * @frame: HDMI infoframe * @buffer: destination buffer * @size: size of buffer @@ -349,17 +564,16 @@ static int hdmi_vendor_infoframe_length(const struct hdmi_vendor_infoframe *fram * Returns the number of bytes packed into the binary buffer or a negative * error code on failure. */ -ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame, - void *buffer, size_t size) +ssize_t hdmi_vendor_infoframe_pack_only(const struct hdmi_vendor_infoframe *frame, + void *buffer, size_t size) { u8 *ptr = buffer; size_t length; + int ret; - /* only one of those can be supplied */ - if (frame->vic != 0 && frame->s3d_struct != HDMI_3D_STRUCTURE_INVALID) - return -EINVAL; - - frame->length = hdmi_vendor_infoframe_length(frame); + ret = hdmi_vendor_infoframe_check_only(frame); + if (ret) + return ret; length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; @@ -394,24 +608,134 @@ ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame, return length; } +EXPORT_SYMBOL(hdmi_vendor_infoframe_pack_only); + +/** + * hdmi_vendor_infoframe_pack() - check a HDMI Vendor infoframe, + * and write it to binary buffer + * @frame: HDMI Vendor infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields, after which it packs the information + * contained in the @frame structure into a binary representation that + * can be written into the corresponding controller registers. This function + * also computes the checksum as required by section 5.3.5 of the HDMI 1.4 + * specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame, + void *buffer, size_t size) +{ + int ret; + + ret = hdmi_vendor_infoframe_check(frame); + if (ret) + return ret; + + return hdmi_vendor_infoframe_pack_only(frame, buffer, size); +} EXPORT_SYMBOL(hdmi_vendor_infoframe_pack); +static int +hdmi_vendor_any_infoframe_check_only(const union hdmi_vendor_any_infoframe *frame) +{ + if (frame->any.type != HDMI_INFOFRAME_TYPE_VENDOR || + frame->any.version != 1) + return -EINVAL; + + return 0; +} + /* - * hdmi_vendor_any_infoframe_pack() - write a vendor infoframe to binary buffer + * hdmi_vendor_any_infoframe_check() - check a vendor infoframe + */ +static int +hdmi_vendor_any_infoframe_check(union hdmi_vendor_any_infoframe *frame) +{ + int ret; + + ret = hdmi_vendor_any_infoframe_check_only(frame); + if (ret) + return ret; + + /* we only know about HDMI vendor infoframes */ + if (frame->any.oui != HDMI_IEEE_OUI) + return -EINVAL; + + return hdmi_vendor_infoframe_check(&frame->hdmi); +} + +/* + * hdmi_vendor_any_infoframe_pack_only() - write a vendor infoframe to binary buffer */ static ssize_t -hdmi_vendor_any_infoframe_pack(union hdmi_vendor_any_infoframe *frame, - void *buffer, size_t size) +hdmi_vendor_any_infoframe_pack_only(const union hdmi_vendor_any_infoframe *frame, + void *buffer, size_t size) { + int ret; + + ret = hdmi_vendor_any_infoframe_check_only(frame); + if (ret) + return ret; + /* we only know about HDMI vendor infoframes */ if (frame->any.oui != HDMI_IEEE_OUI) return -EINVAL; - return hdmi_vendor_infoframe_pack(&frame->hdmi, buffer, size); + return hdmi_vendor_infoframe_pack_only(&frame->hdmi, buffer, size); +} + +/* + * hdmi_vendor_any_infoframe_pack() - check a vendor infoframe, + * and write it to binary buffer + */ +static ssize_t +hdmi_vendor_any_infoframe_pack(union hdmi_vendor_any_infoframe *frame, + void *buffer, size_t size) +{ + int ret; + + ret = hdmi_vendor_any_infoframe_check(frame); + if (ret) + return ret; + + return hdmi_vendor_any_infoframe_pack_only(frame, buffer, size); +} + +/** + * hdmi_infoframe_check() - check a HDMI infoframe + * @frame: HDMI infoframe + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields. + * + * Returns 0 on success or a negative error code on failure. + */ +int +hdmi_infoframe_check(union hdmi_infoframe *frame) +{ + switch (frame->any.type) { + case HDMI_INFOFRAME_TYPE_AVI: + return hdmi_avi_infoframe_check(&frame->avi); + case HDMI_INFOFRAME_TYPE_SPD: + return hdmi_spd_infoframe_check(&frame->spd); + case HDMI_INFOFRAME_TYPE_AUDIO: + return hdmi_audio_infoframe_check(&frame->audio); + case HDMI_INFOFRAME_TYPE_VENDOR: + return hdmi_vendor_any_infoframe_check(&frame->vendor); + default: + WARN(1, "Bad infoframe type %d\n", frame->any.type); + return -EINVAL; + } } +EXPORT_SYMBOL(hdmi_infoframe_check); /** - * hdmi_infoframe_pack() - write a HDMI infoframe to binary buffer + * hdmi_infoframe_pack_only() - write a HDMI infoframe to binary buffer * @frame: HDMI infoframe * @buffer: destination buffer * @size: size of buffer @@ -425,7 +749,56 @@ hdmi_vendor_any_infoframe_pack(union hdmi_vendor_any_infoframe *frame, * error code on failure. */ ssize_t -hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size) +hdmi_infoframe_pack_only(const union hdmi_infoframe *frame, void *buffer, size_t size) +{ + ssize_t length; + + switch (frame->any.type) { + case HDMI_INFOFRAME_TYPE_AVI: + length = hdmi_avi_infoframe_pack_only(&frame->avi, + buffer, size); + break; + case HDMI_INFOFRAME_TYPE_SPD: + length = hdmi_spd_infoframe_pack_only(&frame->spd, + buffer, size); + break; + case HDMI_INFOFRAME_TYPE_AUDIO: + length = hdmi_audio_infoframe_pack_only(&frame->audio, + buffer, size); + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + length = hdmi_vendor_any_infoframe_pack_only(&frame->vendor, + buffer, size); + break; + default: + WARN(1, "Bad infoframe type %d\n", frame->any.type); + length = -EINVAL; + } + + return length; +} +EXPORT_SYMBOL(hdmi_infoframe_pack_only); + +/** + * hdmi_infoframe_pack() - check a HDMI infoframe, + * and write it to binary buffer + * @frame: HDMI infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Validates that the infoframe is consistent and updates derived fields + * (eg. length) based on other fields, after which it packs the information + * contained in the @frame structure into a binary representation that + * can be written into the corresponding controller registers. This function + * also computes the checksum as required by section 5.3.5 of the HDMI 1.4 + * specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t +hdmi_infoframe_pack(union hdmi_infoframe *frame, + void *buffer, size_t size) { ssize_t length; diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index bce1abb1fe57..c76b50a48e48 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -163,6 +163,9 @@ struct hdmi_avi_infoframe { int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame); ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, size_t size); +ssize_t hdmi_avi_infoframe_pack_only(const struct hdmi_avi_infoframe *frame, + void *buffer, size_t size); +int hdmi_avi_infoframe_check(struct hdmi_avi_infoframe *frame); enum hdmi_spd_sdi { HDMI_SPD_SDI_UNKNOWN, @@ -194,6 +197,9 @@ int hdmi_spd_infoframe_init(struct hdmi_spd_infoframe *frame, const char *vendor, const char *product); ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, void *buffer, size_t size); +ssize_t hdmi_spd_infoframe_pack_only(const struct hdmi_spd_infoframe *frame, + void *buffer, size_t size); +int hdmi_spd_infoframe_check(struct hdmi_spd_infoframe *frame); enum hdmi_audio_coding_type { HDMI_AUDIO_CODING_TYPE_STREAM, @@ -272,6 +278,9 @@ struct hdmi_audio_infoframe { int hdmi_audio_infoframe_init(struct hdmi_audio_infoframe *frame); ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame, void *buffer, size_t size); +ssize_t hdmi_audio_infoframe_pack_only(const struct hdmi_audio_infoframe *frame, + void *buffer, size_t size); +int hdmi_audio_infoframe_check(struct hdmi_audio_infoframe *frame); enum hdmi_3d_structure { HDMI_3D_STRUCTURE_INVALID = -1, @@ -299,6 +308,9 @@ struct hdmi_vendor_infoframe { int hdmi_vendor_infoframe_init(struct hdmi_vendor_infoframe *frame); ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame, void *buffer, size_t size); +ssize_t hdmi_vendor_infoframe_pack_only(const struct hdmi_vendor_infoframe *frame, + void *buffer, size_t size); +int hdmi_vendor_infoframe_check(struct hdmi_vendor_infoframe *frame); union hdmi_vendor_any_infoframe { struct { @@ -330,8 +342,11 @@ union hdmi_infoframe { struct hdmi_audio_infoframe audio; }; -ssize_t -hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size); +ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, + size_t size); +ssize_t hdmi_infoframe_pack_only(const union hdmi_infoframe *frame, + void *buffer, size_t size); +int hdmi_infoframe_check(union hdmi_infoframe *frame); int hdmi_infoframe_unpack(union hdmi_infoframe *frame, const void *buffer, size_t size); void hdmi_infoframe_log(const char *level, struct device *dev, -- cgit v1.2.3 From 2d12df47eafe74bf2e22cbbebc0265db7cd47082 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Sat, 6 Oct 2018 18:40:59 +0200 Subject: PM / AVS: SmartReflex: remove unused function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit omap_sr_register_pmic() was introduced in 2010 in commit 984aa6dbf4ca ("OMAP3: PM: Adding smartreflex driver support.") . There was never any caller of this function in mainline resulting in a warning sr_init: No PMIC hook to init smartreflex for each machine where this driver is enabled. So remove the unused function and the pr_warn. Signed-off-by: Uwe Kleine-König Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/smartreflex.c | 31 ------------------------------- include/linux/power/smartreflex.h | 5 ----- 2 files changed, 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c index 1360a7fa542c..536d99dc0008 100644 --- a/drivers/power/avs/smartreflex.c +++ b/drivers/power/avs/smartreflex.c @@ -37,7 +37,6 @@ static LIST_HEAD(sr_list); static struct omap_sr_class_data *sr_class; -static struct omap_sr_pmic_data *sr_pmic_data; static struct dentry *sr_dbg_dir; static inline void sr_write_reg(struct omap_sr *sr, unsigned offset, u32 value) @@ -780,25 +779,6 @@ void omap_sr_disable_reset_volt(struct voltagedomain *voltdm) sr_class->disable(sr, 1); } -/** - * omap_sr_register_pmic() - API to register pmic specific info. - * @pmic_data: The structure containing pmic specific data. - * - * This API is to be called from the PMIC specific code to register with - * smartreflex driver pmic specific info. Currently the only info required - * is the smartreflex init on the PMIC side. - */ -void omap_sr_register_pmic(struct omap_sr_pmic_data *pmic_data) -{ - if (!pmic_data) { - pr_warn("%s: Trying to register NULL PMIC data structure with smartreflex\n", - __func__); - return; - } - - sr_pmic_data = pmic_data; -} - /* PM Debug FS entries to enable and disable smartreflex. */ static int omap_sr_autocomp_show(void *data, u64 *val) { @@ -1065,17 +1045,6 @@ static int __init sr_init(void) { int ret = 0; - /* - * sr_init is a late init. If by then a pmic specific API is not - * registered either there is no need for anything to be done on - * the PMIC side or somebody has forgotten to register a PMIC - * handler. Warn for the second condition. - */ - if (sr_pmic_data && sr_pmic_data->sr_pmic_init) - sr_pmic_data->sr_pmic_init(); - else - pr_warn("%s: No PMIC hook to init smartreflex\n", __func__); - ret = platform_driver_register(&smartreflex_driver); if (ret) { pr_err("%s: platform driver register failed for SR\n", diff --git a/include/linux/power/smartreflex.h b/include/linux/power/smartreflex.h index 7b81dad712de..a586976f4784 100644 --- a/include/linux/power/smartreflex.h +++ b/include/linux/power/smartreflex.h @@ -303,9 +303,6 @@ void omap_sr_enable(struct voltagedomain *voltdm); void omap_sr_disable(struct voltagedomain *voltdm); void omap_sr_disable_reset_volt(struct voltagedomain *voltdm); -/* API to register the pmic specific data with the smartreflex driver. */ -void omap_sr_register_pmic(struct omap_sr_pmic_data *pmic_data); - /* Smartreflex driver hooks to be called from Smartreflex class driver */ int sr_enable(struct omap_sr *sr, unsigned long volt); void sr_disable(struct omap_sr *sr); @@ -320,7 +317,5 @@ static inline void omap_sr_enable(struct voltagedomain *voltdm) {} static inline void omap_sr_disable(struct voltagedomain *voltdm) {} static inline void omap_sr_disable_reset_volt( struct voltagedomain *voltdm) {} -static inline void omap_sr_register_pmic( - struct omap_sr_pmic_data *pmic_data) {} #endif #endif -- cgit v1.2.3 From 27836b641c1bf693c96c627388497b4e0f57441b Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 8 Aug 2018 16:01:22 +0200 Subject: dma-buf: remove shared fence staging in reservation object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need for that any more. Just replace the list when there isn't enough room any more for the additional fence. Signed-off-by: Christian König Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Link: https://patchwork.kernel.org/patch/10626143/ --- drivers/dma-buf/reservation.c | 178 ++++++++++++++---------------------------- include/linux/reservation.h | 4 - 2 files changed, 58 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 6c95f61a32e7..5825fc336a13 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -68,105 +68,23 @@ EXPORT_SYMBOL(reservation_seqcount_string); */ int reservation_object_reserve_shared(struct reservation_object *obj) { - struct reservation_object_list *fobj, *old; - u32 max; + struct reservation_object_list *old, *new; + unsigned int i, j, k, max; old = reservation_object_get_list(obj); if (old && old->shared_max) { - if (old->shared_count < old->shared_max) { - /* perform an in-place update */ - kfree(obj->staged); - obj->staged = NULL; + if (old->shared_count < old->shared_max) return 0; - } else + else max = old->shared_max * 2; - } else - max = 4; - - /* - * resize obj->staged or allocate if it doesn't exist, - * noop if already correct size - */ - fobj = krealloc(obj->staged, offsetof(typeof(*fobj), shared[max]), - GFP_KERNEL); - if (!fobj) - return -ENOMEM; - - obj->staged = fobj; - fobj->shared_max = max; - return 0; -} -EXPORT_SYMBOL(reservation_object_reserve_shared); - -static void -reservation_object_add_shared_inplace(struct reservation_object *obj, - struct reservation_object_list *fobj, - struct dma_fence *fence) -{ - struct dma_fence *signaled = NULL; - u32 i, signaled_idx; - - dma_fence_get(fence); - - preempt_disable(); - write_seqcount_begin(&obj->seq); - - for (i = 0; i < fobj->shared_count; ++i) { - struct dma_fence *old_fence; - - old_fence = rcu_dereference_protected(fobj->shared[i], - reservation_object_held(obj)); - - if (old_fence->context == fence->context) { - /* memory barrier is added by write_seqcount_begin */ - RCU_INIT_POINTER(fobj->shared[i], fence); - write_seqcount_end(&obj->seq); - preempt_enable(); - - dma_fence_put(old_fence); - return; - } - - if (!signaled && dma_fence_is_signaled(old_fence)) { - signaled = old_fence; - signaled_idx = i; - } - } - - /* - * memory barrier is added by write_seqcount_begin, - * fobj->shared_count is protected by this lock too - */ - if (signaled) { - RCU_INIT_POINTER(fobj->shared[signaled_idx], fence); } else { - BUG_ON(fobj->shared_count >= fobj->shared_max); - RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); - fobj->shared_count++; + max = 4; } - write_seqcount_end(&obj->seq); - preempt_enable(); - - dma_fence_put(signaled); -} - -static void -reservation_object_add_shared_replace(struct reservation_object *obj, - struct reservation_object_list *old, - struct reservation_object_list *fobj, - struct dma_fence *fence) -{ - unsigned i, j, k; - - dma_fence_get(fence); - - if (!old) { - RCU_INIT_POINTER(fobj->shared[0], fence); - fobj->shared_count = 1; - goto done; - } + new = kmalloc(offsetof(typeof(*new), shared[max]), GFP_KERNEL); + if (!new) + return -ENOMEM; /* * no need to bump fence refcounts, rcu_read access @@ -174,46 +92,45 @@ reservation_object_add_shared_replace(struct reservation_object *obj, * references from the old struct are carried over to * the new. */ - for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) { - struct dma_fence *check; + for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) { + struct dma_fence *fence; - check = rcu_dereference_protected(old->shared[i], - reservation_object_held(obj)); - - if (check->context == fence->context || - dma_fence_is_signaled(check)) - RCU_INIT_POINTER(fobj->shared[--k], check); + fence = rcu_dereference_protected(old->shared[i], + reservation_object_held(obj)); + if (dma_fence_is_signaled(fence)) + RCU_INIT_POINTER(new->shared[--k], fence); else - RCU_INIT_POINTER(fobj->shared[j++], check); + RCU_INIT_POINTER(new->shared[j++], fence); } - fobj->shared_count = j; - RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); - fobj->shared_count++; + new->shared_count = j; + new->shared_max = max; -done: preempt_disable(); write_seqcount_begin(&obj->seq); /* * RCU_INIT_POINTER can be used here, * seqcount provides the necessary barriers */ - RCU_INIT_POINTER(obj->fence, fobj); + RCU_INIT_POINTER(obj->fence, new); write_seqcount_end(&obj->seq); preempt_enable(); if (!old) - return; + return 0; /* Drop the references to the signaled fences */ - for (i = k; i < fobj->shared_max; ++i) { - struct dma_fence *f; + for (i = k; i < new->shared_max; ++i) { + struct dma_fence *fence; - f = rcu_dereference_protected(fobj->shared[i], - reservation_object_held(obj)); - dma_fence_put(f); + fence = rcu_dereference_protected(new->shared[i], + reservation_object_held(obj)); + dma_fence_put(fence); } kfree_rcu(old, rcu); + + return 0; } +EXPORT_SYMBOL(reservation_object_reserve_shared); /** * reservation_object_add_shared_fence - Add a fence to a shared slot @@ -226,15 +143,39 @@ done: void reservation_object_add_shared_fence(struct reservation_object *obj, struct dma_fence *fence) { - struct reservation_object_list *old, *fobj = obj->staged; + struct reservation_object_list *fobj; + unsigned int i; - old = reservation_object_get_list(obj); - obj->staged = NULL; + dma_fence_get(fence); + + fobj = reservation_object_get_list(obj); - if (!fobj) - reservation_object_add_shared_inplace(obj, old, fence); - else - reservation_object_add_shared_replace(obj, old, fobj, fence); + preempt_disable(); + write_seqcount_begin(&obj->seq); + + for (i = 0; i < fobj->shared_count; ++i) { + struct dma_fence *old_fence; + + old_fence = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + if (old_fence->context == fence->context || + dma_fence_is_signaled(old_fence)) { + dma_fence_put(old_fence); + goto replace; + } + } + + BUG_ON(fobj->shared_count >= fobj->shared_max); + fobj->shared_count++; + +replace: + /* + * memory barrier is added by write_seqcount_begin, + * fobj->shared_count is protected by this lock too + */ + RCU_INIT_POINTER(fobj->shared[i], fence); + write_seqcount_end(&obj->seq); + preempt_enable(); } EXPORT_SYMBOL(reservation_object_add_shared_fence); @@ -343,9 +284,6 @@ retry: new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock(); - kfree(dst->staged); - dst->staged = NULL; - src_list = reservation_object_get_list(dst); old = reservation_object_get_excl(dst); diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 02166e815afb..54cf6773a14c 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -68,7 +68,6 @@ struct reservation_object_list { * @seq: sequence count for managing RCU read-side synchronization * @fence_excl: the exclusive fence, if there is one currently * @fence: list of current shared fences - * @staged: staged copy of shared fences for RCU updates */ struct reservation_object { struct ww_mutex lock; @@ -76,7 +75,6 @@ struct reservation_object { struct dma_fence __rcu *fence_excl; struct reservation_object_list __rcu *fence; - struct reservation_object_list *staged; }; #define reservation_object_held(obj) lockdep_is_held(&(obj)->lock.base) @@ -95,7 +93,6 @@ reservation_object_init(struct reservation_object *obj) __seqcount_init(&obj->seq, reservation_seqcount_string, &reservation_seqcount_class); RCU_INIT_POINTER(obj->fence, NULL); RCU_INIT_POINTER(obj->fence_excl, NULL); - obj->staged = NULL; } /** @@ -124,7 +121,6 @@ reservation_object_fini(struct reservation_object *obj) kfree(fobj); } - kfree(obj->staged); ww_mutex_destroy(&obj->lock); } -- cgit v1.2.3 From ca05359f1e64cf8303ee532e50efe4ab7563d4a9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 19 Sep 2018 16:12:25 +0200 Subject: dma-buf: allow reserving more than one shared fence slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's support simultaneous submissions to multiple engines. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Link: https://patchwork.kernel.org/patch/10626149/ --- drivers/dma-buf/reservation.c | 13 ++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/msm/msm_gem_submit.c | 3 ++- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- drivers/gpu/drm/qxl/qxl_release.c | 2 +- drivers/gpu/drm/radeon/radeon_vm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++-- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 4 ++-- drivers/gpu/drm/v3d/v3d_gem.c | 2 +- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 2 +- include/linux/reservation.h | 3 ++- 16 files changed, 28 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 5825fc336a13..5fb4fd461908 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -56,9 +56,10 @@ const char reservation_seqcount_string[] = "reservation_seqcount"; EXPORT_SYMBOL(reservation_seqcount_string); /** - * reservation_object_reserve_shared - Reserve space to add a shared - * fence to a reservation_object. + * reservation_object_reserve_shared - Reserve space to add shared fences to + * a reservation_object. * @obj: reservation object + * @num_fences: number of fences we want to add * * Should be called before reservation_object_add_shared_fence(). Must * be called with obj->lock held. @@ -66,7 +67,8 @@ EXPORT_SYMBOL(reservation_seqcount_string); * RETURNS * Zero for success, or -errno */ -int reservation_object_reserve_shared(struct reservation_object *obj) +int reservation_object_reserve_shared(struct reservation_object *obj, + unsigned int num_fences) { struct reservation_object_list *old, *new; unsigned int i, j, k, max; @@ -74,10 +76,11 @@ int reservation_object_reserve_shared(struct reservation_object *obj) old = reservation_object_get_list(obj); if (old && old->shared_max) { - if (old->shared_count < old->shared_max) + if ((old->shared_count + num_fences) <= old->shared_max) return 0; else - max = old->shared_max * 2; + max = max(old->shared_count + num_fences, + old->shared_max * 2); } else { max = 4; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8e9a65a15875..35bc8fc3bc70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -955,7 +955,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); + r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 904014dc5915..cf768acb51dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -640,7 +640,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, bo_addr = amdgpu_bo_gpu_offset(bo); shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); - r = reservation_object_reserve_shared(bo->tbo.resv); + r = reservation_object_reserve_shared(bo->tbo.resv, 1); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6904d794d60a..bdce05183edb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -772,7 +772,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - r = reservation_object_reserve_shared(bo->tbo.resv); + r = reservation_object_reserve_shared(bo->tbo.resv, 1); if (r) return r; @@ -1839,7 +1839,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); + r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); if (r) goto error_free; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 983e67f19e45..30875f8f2933 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -179,7 +179,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) struct reservation_object *robj = bo->obj->resv; if (!(bo->flags & ETNA_SUBMIT_BO_WRITE)) { - ret = reservation_object_reserve_shared(robj); + ret = reservation_object_reserve_shared(robj, 1); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 31efc971a3a8..35fce4c88629 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -892,7 +892,7 @@ static void export_fence(struct i915_vma *vma, reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) reservation_object_add_excl_fence(resv, &rq->fence); - else if (reservation_object_reserve_shared(resv) == 0) + else if (reservation_object_reserve_shared(resv, 1) == 0) reservation_object_add_shared_fence(resv, &rq->fence); reservation_object_unlock(resv); } diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 7a7923e6220d..a90aedd6883a 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -241,7 +241,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) * strange place to call it. OTOH this is a * convenient can-fail point to hook it in. */ - ret = reservation_object_reserve_shared(msm_obj->resv); + ret = reservation_object_reserve_shared(msm_obj->resv, + 1); if (ret) return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 99be61ddeb75..d4964f3397a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -341,7 +341,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e int ret = 0, i; if (!exclusive) { - ret = reservation_object_reserve_shared(resv); + ret = reservation_object_reserve_shared(resv, 1); if (ret) return ret; diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index e37f0097f744..a8d5457a1af9 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -234,7 +234,7 @@ static int qxl_release_validate_bo(struct qxl_bo *bo) return ret; } - ret = reservation_object_reserve_shared(bo->tbo.resv); + ret = reservation_object_reserve_shared(bo->tbo.resv, 1); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 7f1a9c787bd1..fed11ece0de6 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -831,7 +831,7 @@ static int radeon_vm_update_ptes(struct radeon_device *rdev, int r; radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true); - r = reservation_object_reserve_shared(pt->tbo.resv); + r = reservation_object_reserve_shared(pt->tbo.resv, 1); if (r) return r; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 26b889f86670..83b4657ffb10 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -872,7 +872,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, if (fence) { reservation_object_add_shared_fence(bo->resv, fence); - ret = reservation_object_reserve_shared(bo->resv); + ret = reservation_object_reserve_shared(bo->resv, 1); if (unlikely(ret)) return ret; @@ -977,7 +977,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, bool has_erestartsys = false; int i, ret; - ret = reservation_object_reserve_shared(bo->resv); + ret = reservation_object_reserve_shared(bo->resv, 1); if (unlikely(ret)) return ret; diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index e73ae0d22897..e493edb0d3e7 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -129,7 +129,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, if (!entry->shared) continue; - ret = reservation_object_reserve_shared(bo->resv); + ret = reservation_object_reserve_shared(bo->resv, 1); if (!ret) continue; } @@ -151,7 +151,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, } if (!ret && entry->shared) - ret = reservation_object_reserve_shared(bo->resv); + ret = reservation_object_reserve_shared(bo->resv, 1); if (unlikely(ret != 0)) { if (ret == -EINTR) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index e688369ca82b..b88c96911453 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -305,7 +305,7 @@ retry: for (i = 0; i < exec->bo_count; i++) { bo = to_v3d_bo(&exec->bo[i]->base); - ret = reservation_object_reserve_shared(bo->resv); + ret = reservation_object_reserve_shared(bo->resv, 1); if (ret) { v3d_unlock_bo_reservations(dev, exec, acquire_ctx); return ret; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 251198194c38..41881ce4132d 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -635,7 +635,7 @@ retry: for (i = 0; i < exec->bo_count; i++) { bo = to_vc4_bo(&exec->bo[i]->base); - ret = reservation_object_reserve_shared(bo->resv); + ret = reservation_object_reserve_shared(bo->resv, 1); if (ret) { vc4_unlock_bo_reservations(dev, exec, acquire_ctx); return ret; diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index e6ee71323a66..c1c420afe2dd 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -180,7 +180,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, reservation_object_lock(resv, NULL); if (arg->flags & VGEM_FENCE_WRITE) reservation_object_add_excl_fence(resv, fence); - else if ((ret = reservation_object_reserve_shared(resv)) == 0) + else if ((ret = reservation_object_reserve_shared(resv, 1)) == 0) reservation_object_add_shared_fence(resv, fence); reservation_object_unlock(resv); diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 54cf6773a14c..5ddb0e143721 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -261,7 +261,8 @@ reservation_object_get_excl_rcu(struct reservation_object *obj) return fence; } -int reservation_object_reserve_shared(struct reservation_object *obj); +int reservation_object_reserve_shared(struct reservation_object *obj, + unsigned int num_fences); void reservation_object_add_shared_fence(struct reservation_object *obj, struct dma_fence *fence); -- cgit v1.2.3 From 99fe21a76f0f2d36e5f9afc95ce6ed5cc354ebad Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 4 Oct 2018 14:45:17 +0200 Subject: dma-buf: test shared slot allocation when mutex debugging is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set shared_max to the number of shared fences right before we release the lock. This way every attempt to add a shared fence without previously reserving a slot will cause an error. Signed-off-by: Christian König Reviewed-by: Huang Rui Acked-by: Junwei Zhang Link: https://patchwork.kernel.org/patch/10626147/ --- include/linux/reservation.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 5ddb0e143721..2f0ffca35780 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -214,6 +214,11 @@ reservation_object_trylock(struct reservation_object *obj) static inline void reservation_object_unlock(struct reservation_object *obj) { +#ifdef CONFIG_DEBUG_MUTEXES + /* Test shared fence slot reservation */ + if (obj->fence) + obj->fence->shared_max = obj->fence->shared_count; +#endif ww_mutex_unlock(&obj->lock); } -- cgit v1.2.3 From 23c42a403a9cfdbad6004a556c927be7dd61a8ee Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Oct 2018 15:07:40 +0200 Subject: netfilter: ipset: Introduction of new commands and protocol version 7 Two new commands (IPSET_CMD_GET_BYNAME, IPSET_CMD_GET_BYINDEX) are introduced. The new commands makes possible to eliminate the getsockopt operation (in iptables set/SET match/target) and thus use only netlink communication between userspace and kernel for ipset. With the new protocol version, userspace can exactly know which functionality is supported by the running kernel. Both the kernel and userspace is fully backward compatible. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 +- include/uapi/linux/netfilter/ipset/ip_set.h | 19 ++-- net/netfilter/ipset/ip_set_core.c | 164 +++++++++++++++++++++++++--- 3 files changed, 160 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 34fc80f3eb90..c4ce07402c24 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -303,11 +303,11 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ + IPSET_CB_PROTO, /* ipset protocol */ IPSET_CB_DUMP, /* dump single set/all sets */ IPSET_CB_INDEX, /* set index */ IPSET_CB_PRIVATE, /* set private data */ IPSET_CB_ARG0, /* type specific */ - IPSET_CB_ARG1, }; /* register and unregister set references */ diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 60236f694143..ea69ca21ff23 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -13,8 +13,9 @@ #include -/* The protocol version */ -#define IPSET_PROTOCOL 6 +/* The protocol versions */ +#define IPSET_PROTOCOL 7 +#define IPSET_PROTOCOL_MIN 6 /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -38,17 +39,19 @@ enum ipset_cmd { IPSET_CMD_TEST, /* 11: Test an element in a set */ IPSET_CMD_HEADER, /* 12: Get set header data only */ IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */ + IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */ IPSET_MSG_MAX, /* Netlink message commands */ /* Commands in userspace: */ - IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ - IPSET_CMD_HELP, /* 15: Get help */ - IPSET_CMD_VERSION, /* 16: Get program version */ - IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */ + IPSET_CMD_HELP, /* 17: Get help */ + IPSET_CMD_VERSION, /* 18: Get program version */ + IPSET_CMD_QUIT, /* 19: Quit from interactive mode */ IPSET_CMD_MAX, - IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */ }; /* Attributes at command level */ @@ -66,6 +69,7 @@ enum { IPSET_ATTR_LINENO, /* 9: Restore lineno */ IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + IPSET_ATTR_INDEX, /* 11: Kernel index of set */ __IPSET_ATTR_CMD_MAX, }; #define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) @@ -223,6 +227,7 @@ enum ipset_adt { /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t * and IPSET_INVALID_ID if you want to increase the max number of sets. + * Also, IPSET_ATTR_INDEX must be changed. */ typedef __u16 ip_set_id_t; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bc4bd247bb7d..847f764b2aeb 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -768,11 +768,21 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); * The commands are serialized by the nfnl mutex. */ +static inline u8 protocol(const struct nlattr * const tb[]) +{ + return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]); +} + static inline bool protocol_failed(const struct nlattr * const tb[]) { - return !tb[IPSET_ATTR_PROTOCOL] || - nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; + return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL; +} + +static inline bool +protocol_min_failed(const struct nlattr * const tb[]) +{ + return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN; } static inline u32 @@ -886,7 +896,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, u32 flags = flag_exist(nlh); int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_REVISION] || @@ -1024,7 +1034,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, ip_set_id_t i; int ret = 0; - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; /* Must wait for flush to be really finished in list:set */ @@ -1102,7 +1112,7 @@ static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct ip_set *s; ip_set_id_t i; - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { @@ -1144,7 +1154,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, ip_set_id_t i; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; @@ -1193,7 +1203,7 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; @@ -1288,6 +1298,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy, NULL); + cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1389,7 +1400,8 @@ dump_last: ret = -EMSGSIZE; goto release_refcount; } - if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, + cb->args[IPSET_CB_PROTO]) || nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) @@ -1404,6 +1416,9 @@ dump_last: nla_put_u8(skb, IPSET_ATTR_REVISION, set->revision)) goto nla_put_failure; + if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN && + nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index))) + goto nla_put_failure; ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; @@ -1463,7 +1478,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[], struct netlink_ext_ack *extack) { - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; { @@ -1557,7 +1572,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool use_lineno; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || @@ -1612,7 +1627,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool use_lineno; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || @@ -1664,7 +1679,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_DATA] || !flag_nested(attr[IPSET_ATTR_DATA]))) @@ -1701,7 +1716,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl, struct nlmsghdr *nlh2; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; @@ -1717,7 +1732,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; - if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || @@ -1758,7 +1773,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, const char *typename; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_FAMILY])) return -IPSET_ERR_PROTOCOL; @@ -1777,7 +1792,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; - if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) || nla_put_u8(skb2, IPSET_ATTR_REVISION, max) || @@ -1828,6 +1843,111 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl, goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL)) goto nla_put_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN)) + goto nla_put_failure; + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +/* Get set by name or index, from userspace */ + +static int ip_set_byname(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) +{ + struct ip_set_net *inst = ip_set_pernet(net); + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t id = IPSET_INVALID_ID; + const struct ip_set *set; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + !attr[IPSET_ATTR_SETNAME])) + return -IPSET_ERR_PROTOCOL; + + set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id); + if (id == IPSET_INVALID_ID) + return -ENOENT; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + IPSET_CMD_GET_BYNAME); + if (!nlh2) + goto nlmsg_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || + nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || + nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id))) + goto nla_put_failure; + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_INDEX] = { .type = NLA_U16 }, +}; + +static int ip_set_byindex(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) +{ + struct ip_set_net *inst = ip_set_pernet(net); + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t id = IPSET_INVALID_ID; + const struct ip_set *set; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + !attr[IPSET_ATTR_INDEX])) + return -IPSET_ERR_PROTOCOL; + + id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]); + if (id >= inst->ip_set_max) + return -ENOENT; + set = ip_set(inst, id); + if (set == NULL) + return -ENOENT; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + IPSET_CMD_GET_BYINDEX); + if (!nlh2) + goto nlmsg_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || + nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) + goto nla_put_failure; nlmsg_end(skb2, nlh2); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); @@ -1913,6 +2033,16 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_protocol_policy, }, + [IPSET_CMD_GET_BYNAME] = { + .call = ip_set_byname, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_GET_BYINDEX] = { + .call = ip_set_byindex, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_index_policy, + }, }; static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { @@ -1958,7 +2088,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } - if (req_version->version != IPSET_PROTOCOL) { + if (req_version->version < IPSET_PROTOCOL_MIN) { ret = -EPROTO; goto done; } -- cgit v1.2.3 From 9fa45070a2e59a871e1cd3370173369f3a4f61e2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 4 Sep 2018 11:48:26 +0100 Subject: locking/atomics: Switch to generated fallbacks As a step to ensuring the atomic* APIs are consistent, switch to fallbacks generated by gen-atomic-fallback.sh. These are checked in rather than generated with Kbuild, since: * This allows inspection of the atomics with git grep and ctags on a pristine tree, which Linus strongly prefers being able to do. * The fallbacks are not affected by machine details or configuration options, so it is not necessary to regenerate them to take these into account. * These are included by files required *very* early in the build process (e.g. for generating bounds.h), and we'd rather not complicate the top-level Kbuild file with dependencies. The new fallback header should be equivalent to the old fallbacks in , but: * It is formatted a little differently due to scripting ensuring things are more regular than they used to be. * Fallbacks are now expanded in-place as static inline functions rather than macros. * The prototypes for fallbacks are arragned consistently with the return type on a separate line to try to keep to a sensible line length. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: catalin.marinas@arm.com Cc: linuxdrivers@attotech.com Cc: dvyukov@google.com Cc: Boqun Feng Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: glider@google.com Link: http://lkml.kernel.org/r/20180904104830.2975-3-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic-fallback.h | 2294 +++++++++++++++++++++++++++++++++++++++ include/linux/atomic.h | 1241 +-------------------- 2 files changed, 2295 insertions(+), 1240 deletions(-) create mode 100644 include/linux/atomic-fallback.h (limited to 'include/linux') diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h new file mode 100644 index 000000000000..1c02c0112fbb --- /dev/null +++ b/include/linux/atomic-fallback.h @@ -0,0 +1,2294 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Generated by scripts/atomic/gen-atomic-fallback.sh +// DO NOT MODIFY THIS FILE DIRECTLY + +#ifndef _LINUX_ATOMIC_FALLBACK_H +#define _LINUX_ATOMIC_FALLBACK_H + +#ifndef xchg_relaxed +#define xchg_relaxed xchg +#define xchg_acquire xchg +#define xchg_release xchg +#else /* xchg_relaxed */ + +#ifndef xchg_acquire +#define xchg_acquire(...) \ + __atomic_op_acquire(xchg, __VA_ARGS__) +#endif + +#ifndef xchg_release +#define xchg_release(...) \ + __atomic_op_release(xchg, __VA_ARGS__) +#endif + +#ifndef xchg +#define xchg(...) \ + __atomic_op_fence(xchg, __VA_ARGS__) +#endif + +#endif /* xchg_relaxed */ + +#ifndef cmpxchg_relaxed +#define cmpxchg_relaxed cmpxchg +#define cmpxchg_acquire cmpxchg +#define cmpxchg_release cmpxchg +#else /* cmpxchg_relaxed */ + +#ifndef cmpxchg_acquire +#define cmpxchg_acquire(...) \ + __atomic_op_acquire(cmpxchg, __VA_ARGS__) +#endif + +#ifndef cmpxchg_release +#define cmpxchg_release(...) \ + __atomic_op_release(cmpxchg, __VA_ARGS__) +#endif + +#ifndef cmpxchg +#define cmpxchg(...) \ + __atomic_op_fence(cmpxchg, __VA_ARGS__) +#endif + +#endif /* cmpxchg_relaxed */ + +#ifndef cmpxchg64_relaxed +#define cmpxchg64_relaxed cmpxchg64 +#define cmpxchg64_acquire cmpxchg64 +#define cmpxchg64_release cmpxchg64 +#else /* cmpxchg64_relaxed */ + +#ifndef cmpxchg64_acquire +#define cmpxchg64_acquire(...) \ + __atomic_op_acquire(cmpxchg64, __VA_ARGS__) +#endif + +#ifndef cmpxchg64_release +#define cmpxchg64_release(...) \ + __atomic_op_release(cmpxchg64, __VA_ARGS__) +#endif + +#ifndef cmpxchg64 +#define cmpxchg64(...) \ + __atomic_op_fence(cmpxchg64, __VA_ARGS__) +#endif + +#endif /* cmpxchg64_relaxed */ + +#ifndef atomic_read_acquire +static inline int +atomic_read_acquire(const atomic_t *v) +{ + return smp_load_acquire(&(v)->counter); +} +#define atomic_read_acquire atomic_read_acquire +#endif + +#ifndef atomic_set_release +static inline void +atomic_set_release(atomic_t *v, int i) +{ + smp_store_release(&(v)->counter, i); +} +#define atomic_set_release atomic_set_release +#endif + +#ifndef atomic_add_return_relaxed +#define atomic_add_return_acquire atomic_add_return +#define atomic_add_return_release atomic_add_return +#define atomic_add_return_relaxed atomic_add_return +#else /* atomic_add_return_relaxed */ + +#ifndef atomic_add_return_acquire +static inline int +atomic_add_return_acquire(int i, atomic_t *v) +{ + int ret = atomic_add_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_add_return_acquire atomic_add_return_acquire +#endif + +#ifndef atomic_add_return_release +static inline int +atomic_add_return_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_add_return_relaxed(i, v); +} +#define atomic_add_return_release atomic_add_return_release +#endif + +#ifndef atomic_add_return +static inline int +atomic_add_return(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_add_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_add_return atomic_add_return +#endif + +#endif /* atomic_add_return_relaxed */ + +#ifndef atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add +#define atomic_fetch_add_release atomic_fetch_add +#define atomic_fetch_add_relaxed atomic_fetch_add +#else /* atomic_fetch_add_relaxed */ + +#ifndef atomic_fetch_add_acquire +static inline int +atomic_fetch_add_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_add_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#endif + +#ifndef atomic_fetch_add_release +static inline int +atomic_fetch_add_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_add_relaxed(i, v); +} +#define atomic_fetch_add_release atomic_fetch_add_release +#endif + +#ifndef atomic_fetch_add +static inline int +atomic_fetch_add(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_add_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_add atomic_fetch_add +#endif + +#endif /* atomic_fetch_add_relaxed */ + +#ifndef atomic_sub_return_relaxed +#define atomic_sub_return_acquire atomic_sub_return +#define atomic_sub_return_release atomic_sub_return +#define atomic_sub_return_relaxed atomic_sub_return +#else /* atomic_sub_return_relaxed */ + +#ifndef atomic_sub_return_acquire +static inline int +atomic_sub_return_acquire(int i, atomic_t *v) +{ + int ret = atomic_sub_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_sub_return_acquire atomic_sub_return_acquire +#endif + +#ifndef atomic_sub_return_release +static inline int +atomic_sub_return_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_sub_return_relaxed(i, v); +} +#define atomic_sub_return_release atomic_sub_return_release +#endif + +#ifndef atomic_sub_return +static inline int +atomic_sub_return(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_sub_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_sub_return atomic_sub_return +#endif + +#endif /* atomic_sub_return_relaxed */ + +#ifndef atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub +#define atomic_fetch_sub_release atomic_fetch_sub +#define atomic_fetch_sub_relaxed atomic_fetch_sub +#else /* atomic_fetch_sub_relaxed */ + +#ifndef atomic_fetch_sub_acquire +static inline int +atomic_fetch_sub_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_sub_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#endif + +#ifndef atomic_fetch_sub_release +static inline int +atomic_fetch_sub_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_sub_relaxed(i, v); +} +#define atomic_fetch_sub_release atomic_fetch_sub_release +#endif + +#ifndef atomic_fetch_sub +static inline int +atomic_fetch_sub(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_sub_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_sub atomic_fetch_sub +#endif + +#endif /* atomic_fetch_sub_relaxed */ + +#ifndef atomic_inc +static inline void +atomic_inc(atomic_t *v) +{ + atomic_add(1, v); +} +#define atomic_inc atomic_inc +#endif + +#ifndef atomic_inc_return_relaxed +#ifdef atomic_inc_return +#define atomic_inc_return_acquire atomic_inc_return +#define atomic_inc_return_release atomic_inc_return +#define atomic_inc_return_relaxed atomic_inc_return +#endif /* atomic_inc_return */ + +#ifndef atomic_inc_return +static inline int +atomic_inc_return(atomic_t *v) +{ + return atomic_add_return(1, v); +} +#define atomic_inc_return atomic_inc_return +#endif + +#ifndef atomic_inc_return_acquire +static inline int +atomic_inc_return_acquire(atomic_t *v) +{ + return atomic_add_return_acquire(1, v); +} +#define atomic_inc_return_acquire atomic_inc_return_acquire +#endif + +#ifndef atomic_inc_return_release +static inline int +atomic_inc_return_release(atomic_t *v) +{ + return atomic_add_return_release(1, v); +} +#define atomic_inc_return_release atomic_inc_return_release +#endif + +#ifndef atomic_inc_return_relaxed +static inline int +atomic_inc_return_relaxed(atomic_t *v) +{ + return atomic_add_return_relaxed(1, v); +} +#define atomic_inc_return_relaxed atomic_inc_return_relaxed +#endif + +#else /* atomic_inc_return_relaxed */ + +#ifndef atomic_inc_return_acquire +static inline int +atomic_inc_return_acquire(atomic_t *v) +{ + int ret = atomic_inc_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_inc_return_acquire atomic_inc_return_acquire +#endif + +#ifndef atomic_inc_return_release +static inline int +atomic_inc_return_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_inc_return_relaxed(v); +} +#define atomic_inc_return_release atomic_inc_return_release +#endif + +#ifndef atomic_inc_return +static inline int +atomic_inc_return(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_inc_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_inc_return atomic_inc_return +#endif + +#endif /* atomic_inc_return_relaxed */ + +#ifndef atomic_fetch_inc_relaxed +#ifdef atomic_fetch_inc +#define atomic_fetch_inc_acquire atomic_fetch_inc +#define atomic_fetch_inc_release atomic_fetch_inc +#define atomic_fetch_inc_relaxed atomic_fetch_inc +#endif /* atomic_fetch_inc */ + +#ifndef atomic_fetch_inc +static inline int +atomic_fetch_inc(atomic_t *v) +{ + return atomic_fetch_add(1, v); +} +#define atomic_fetch_inc atomic_fetch_inc +#endif + +#ifndef atomic_fetch_inc_acquire +static inline int +atomic_fetch_inc_acquire(atomic_t *v) +{ + return atomic_fetch_add_acquire(1, v); +} +#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#endif + +#ifndef atomic_fetch_inc_release +static inline int +atomic_fetch_inc_release(atomic_t *v) +{ + return atomic_fetch_add_release(1, v); +} +#define atomic_fetch_inc_release atomic_fetch_inc_release +#endif + +#ifndef atomic_fetch_inc_relaxed +static inline int +atomic_fetch_inc_relaxed(atomic_t *v) +{ + return atomic_fetch_add_relaxed(1, v); +} +#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed +#endif + +#else /* atomic_fetch_inc_relaxed */ + +#ifndef atomic_fetch_inc_acquire +static inline int +atomic_fetch_inc_acquire(atomic_t *v) +{ + int ret = atomic_fetch_inc_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#endif + +#ifndef atomic_fetch_inc_release +static inline int +atomic_fetch_inc_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_inc_relaxed(v); +} +#define atomic_fetch_inc_release atomic_fetch_inc_release +#endif + +#ifndef atomic_fetch_inc +static inline int +atomic_fetch_inc(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_inc_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_inc atomic_fetch_inc +#endif + +#endif /* atomic_fetch_inc_relaxed */ + +#ifndef atomic_dec +static inline void +atomic_dec(atomic_t *v) +{ + atomic_sub(1, v); +} +#define atomic_dec atomic_dec +#endif + +#ifndef atomic_dec_return_relaxed +#ifdef atomic_dec_return +#define atomic_dec_return_acquire atomic_dec_return +#define atomic_dec_return_release atomic_dec_return +#define atomic_dec_return_relaxed atomic_dec_return +#endif /* atomic_dec_return */ + +#ifndef atomic_dec_return +static inline int +atomic_dec_return(atomic_t *v) +{ + return atomic_sub_return(1, v); +} +#define atomic_dec_return atomic_dec_return +#endif + +#ifndef atomic_dec_return_acquire +static inline int +atomic_dec_return_acquire(atomic_t *v) +{ + return atomic_sub_return_acquire(1, v); +} +#define atomic_dec_return_acquire atomic_dec_return_acquire +#endif + +#ifndef atomic_dec_return_release +static inline int +atomic_dec_return_release(atomic_t *v) +{ + return atomic_sub_return_release(1, v); +} +#define atomic_dec_return_release atomic_dec_return_release +#endif + +#ifndef atomic_dec_return_relaxed +static inline int +atomic_dec_return_relaxed(atomic_t *v) +{ + return atomic_sub_return_relaxed(1, v); +} +#define atomic_dec_return_relaxed atomic_dec_return_relaxed +#endif + +#else /* atomic_dec_return_relaxed */ + +#ifndef atomic_dec_return_acquire +static inline int +atomic_dec_return_acquire(atomic_t *v) +{ + int ret = atomic_dec_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_dec_return_acquire atomic_dec_return_acquire +#endif + +#ifndef atomic_dec_return_release +static inline int +atomic_dec_return_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_dec_return_relaxed(v); +} +#define atomic_dec_return_release atomic_dec_return_release +#endif + +#ifndef atomic_dec_return +static inline int +atomic_dec_return(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_dec_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_dec_return atomic_dec_return +#endif + +#endif /* atomic_dec_return_relaxed */ + +#ifndef atomic_fetch_dec_relaxed +#ifdef atomic_fetch_dec +#define atomic_fetch_dec_acquire atomic_fetch_dec +#define atomic_fetch_dec_release atomic_fetch_dec +#define atomic_fetch_dec_relaxed atomic_fetch_dec +#endif /* atomic_fetch_dec */ + +#ifndef atomic_fetch_dec +static inline int +atomic_fetch_dec(atomic_t *v) +{ + return atomic_fetch_sub(1, v); +} +#define atomic_fetch_dec atomic_fetch_dec +#endif + +#ifndef atomic_fetch_dec_acquire +static inline int +atomic_fetch_dec_acquire(atomic_t *v) +{ + return atomic_fetch_sub_acquire(1, v); +} +#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#endif + +#ifndef atomic_fetch_dec_release +static inline int +atomic_fetch_dec_release(atomic_t *v) +{ + return atomic_fetch_sub_release(1, v); +} +#define atomic_fetch_dec_release atomic_fetch_dec_release +#endif + +#ifndef atomic_fetch_dec_relaxed +static inline int +atomic_fetch_dec_relaxed(atomic_t *v) +{ + return atomic_fetch_sub_relaxed(1, v); +} +#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed +#endif + +#else /* atomic_fetch_dec_relaxed */ + +#ifndef atomic_fetch_dec_acquire +static inline int +atomic_fetch_dec_acquire(atomic_t *v) +{ + int ret = atomic_fetch_dec_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#endif + +#ifndef atomic_fetch_dec_release +static inline int +atomic_fetch_dec_release(atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_dec_relaxed(v); +} +#define atomic_fetch_dec_release atomic_fetch_dec_release +#endif + +#ifndef atomic_fetch_dec +static inline int +atomic_fetch_dec(atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_dec_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_dec atomic_fetch_dec +#endif + +#endif /* atomic_fetch_dec_relaxed */ + +#ifndef atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and +#define atomic_fetch_and_release atomic_fetch_and +#define atomic_fetch_and_relaxed atomic_fetch_and +#else /* atomic_fetch_and_relaxed */ + +#ifndef atomic_fetch_and_acquire +static inline int +atomic_fetch_and_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_and_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#endif + +#ifndef atomic_fetch_and_release +static inline int +atomic_fetch_and_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_and_relaxed(i, v); +} +#define atomic_fetch_and_release atomic_fetch_and_release +#endif + +#ifndef atomic_fetch_and +static inline int +atomic_fetch_and(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_and_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_and atomic_fetch_and +#endif + +#endif /* atomic_fetch_and_relaxed */ + +#ifndef atomic_andnot +static inline void +atomic_andnot(int i, atomic_t *v) +{ + atomic_and(~i, v); +} +#define atomic_andnot atomic_andnot +#endif + +#ifndef atomic_fetch_andnot_relaxed +#ifdef atomic_fetch_andnot +#define atomic_fetch_andnot_acquire atomic_fetch_andnot +#define atomic_fetch_andnot_release atomic_fetch_andnot +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot +#endif /* atomic_fetch_andnot */ + +#ifndef atomic_fetch_andnot +static inline int +atomic_fetch_andnot(int i, atomic_t *v) +{ + return atomic_fetch_and(~i, v); +} +#define atomic_fetch_andnot atomic_fetch_andnot +#endif + +#ifndef atomic_fetch_andnot_acquire +static inline int +atomic_fetch_andnot_acquire(int i, atomic_t *v) +{ + return atomic_fetch_and_acquire(~i, v); +} +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#endif + +#ifndef atomic_fetch_andnot_release +static inline int +atomic_fetch_andnot_release(int i, atomic_t *v) +{ + return atomic_fetch_and_release(~i, v); +} +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#endif + +#ifndef atomic_fetch_andnot_relaxed +static inline int +atomic_fetch_andnot_relaxed(int i, atomic_t *v) +{ + return atomic_fetch_and_relaxed(~i, v); +} +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#endif + +#else /* atomic_fetch_andnot_relaxed */ + +#ifndef atomic_fetch_andnot_acquire +static inline int +atomic_fetch_andnot_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_andnot_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#endif + +#ifndef atomic_fetch_andnot_release +static inline int +atomic_fetch_andnot_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_andnot_relaxed(i, v); +} +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#endif + +#ifndef atomic_fetch_andnot +static inline int +atomic_fetch_andnot(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_andnot_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_andnot atomic_fetch_andnot +#endif + +#endif /* atomic_fetch_andnot_relaxed */ + +#ifndef atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or +#define atomic_fetch_or_release atomic_fetch_or +#define atomic_fetch_or_relaxed atomic_fetch_or +#else /* atomic_fetch_or_relaxed */ + +#ifndef atomic_fetch_or_acquire +static inline int +atomic_fetch_or_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_or_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#endif + +#ifndef atomic_fetch_or_release +static inline int +atomic_fetch_or_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_or_relaxed(i, v); +} +#define atomic_fetch_or_release atomic_fetch_or_release +#endif + +#ifndef atomic_fetch_or +static inline int +atomic_fetch_or(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_or_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_or atomic_fetch_or +#endif + +#endif /* atomic_fetch_or_relaxed */ + +#ifndef atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor +#define atomic_fetch_xor_release atomic_fetch_xor +#define atomic_fetch_xor_relaxed atomic_fetch_xor +#else /* atomic_fetch_xor_relaxed */ + +#ifndef atomic_fetch_xor_acquire +static inline int +atomic_fetch_xor_acquire(int i, atomic_t *v) +{ + int ret = atomic_fetch_xor_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#endif + +#ifndef atomic_fetch_xor_release +static inline int +atomic_fetch_xor_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return atomic_fetch_xor_relaxed(i, v); +} +#define atomic_fetch_xor_release atomic_fetch_xor_release +#endif + +#ifndef atomic_fetch_xor +static inline int +atomic_fetch_xor(int i, atomic_t *v) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_fetch_xor_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic_fetch_xor atomic_fetch_xor +#endif + +#endif /* atomic_fetch_xor_relaxed */ + +#ifndef atomic_xchg_relaxed +#define atomic_xchg_acquire atomic_xchg +#define atomic_xchg_release atomic_xchg +#define atomic_xchg_relaxed atomic_xchg +#else /* atomic_xchg_relaxed */ + +#ifndef atomic_xchg_acquire +static inline int +atomic_xchg_acquire(atomic_t *v, int i) +{ + int ret = atomic_xchg_relaxed(v, i); + __atomic_acquire_fence(); + return ret; +} +#define atomic_xchg_acquire atomic_xchg_acquire +#endif + +#ifndef atomic_xchg_release +static inline int +atomic_xchg_release(atomic_t *v, int i) +{ + __atomic_release_fence(); + return atomic_xchg_relaxed(v, i); +} +#define atomic_xchg_release atomic_xchg_release +#endif + +#ifndef atomic_xchg +static inline int +atomic_xchg(atomic_t *v, int i) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_xchg_relaxed(v, i); + __atomic_post_full_fence(); + return ret; +} +#define atomic_xchg atomic_xchg +#endif + +#endif /* atomic_xchg_relaxed */ + +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_acquire atomic_cmpxchg +#define atomic_cmpxchg_release atomic_cmpxchg +#define atomic_cmpxchg_relaxed atomic_cmpxchg +#else /* atomic_cmpxchg_relaxed */ + +#ifndef atomic_cmpxchg_acquire +static inline int +atomic_cmpxchg_acquire(atomic_t *v, int old, int new) +{ + int ret = atomic_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire +#endif + +#ifndef atomic_cmpxchg_release +static inline int +atomic_cmpxchg_release(atomic_t *v, int old, int new) +{ + __atomic_release_fence(); + return atomic_cmpxchg_relaxed(v, old, new); +} +#define atomic_cmpxchg_release atomic_cmpxchg_release +#endif + +#ifndef atomic_cmpxchg +static inline int +atomic_cmpxchg(atomic_t *v, int old, int new) +{ + int ret; + __atomic_pre_full_fence(); + ret = atomic_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic_cmpxchg atomic_cmpxchg +#endif + +#endif /* atomic_cmpxchg_relaxed */ + +#ifndef atomic_try_cmpxchg_relaxed +#ifdef atomic_try_cmpxchg +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#define atomic_try_cmpxchg_release atomic_try_cmpxchg +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#endif /* atomic_try_cmpxchg */ + +#ifndef atomic_try_cmpxchg +static inline bool +atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg atomic_try_cmpxchg +#endif + +#ifndef atomic_try_cmpxchg_acquire +static inline bool +atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_acquire(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#endif + +#ifndef atomic_try_cmpxchg_release +static inline bool +atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_release(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#endif + +#ifndef atomic_try_cmpxchg_relaxed +static inline bool +atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) +{ + int r, o = *old; + r = atomic_cmpxchg_relaxed(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed +#endif + +#else /* atomic_try_cmpxchg_relaxed */ + +#ifndef atomic_try_cmpxchg_acquire +static inline bool +atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) +{ + bool ret = atomic_try_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#endif + +#ifndef atomic_try_cmpxchg_release +static inline bool +atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) +{ + __atomic_release_fence(); + return atomic_try_cmpxchg_relaxed(v, old, new); +} +#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#endif + +#ifndef atomic_try_cmpxchg +static inline bool +atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + bool ret; + __atomic_pre_full_fence(); + ret = atomic_try_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic_try_cmpxchg atomic_try_cmpxchg +#endif + +#endif /* atomic_try_cmpxchg_relaxed */ + +#ifndef atomic_sub_and_test +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic_sub_and_test(int i, atomic_t *v) +{ + return atomic_sub_return(i, v) == 0; +} +#define atomic_sub_and_test atomic_sub_and_test +#endif + +#ifndef atomic_dec_and_test +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool +atomic_dec_and_test(atomic_t *v) +{ + return atomic_dec_return(v) == 0; +} +#define atomic_dec_and_test atomic_dec_and_test +#endif + +#ifndef atomic_inc_and_test +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic_inc_and_test(atomic_t *v) +{ + return atomic_inc_return(v) == 0; +} +#define atomic_inc_and_test atomic_inc_and_test +#endif + +#ifndef atomic_add_negative +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool +atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} +#define atomic_add_negative atomic_add_negative +#endif + +#ifndef atomic_fetch_add_unless +/** + * atomic_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns original value of @v + */ +static inline int +atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int c = atomic_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic_try_cmpxchg(v, &c, c + a)); + + return c; +} +#define atomic_fetch_add_unless atomic_fetch_add_unless +#endif + +#ifndef atomic_add_unless +/** + * atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +static inline bool +atomic_add_unless(atomic_t *v, int a, int u) +{ + return atomic_fetch_add_unless(v, a, u) != u; +} +#define atomic_add_unless atomic_add_unless +#endif + +#ifndef atomic_inc_not_zero +/** + * atomic_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +static inline bool +atomic_inc_not_zero(atomic_t *v) +{ + return atomic_add_unless(v, 1, 0); +} +#define atomic_inc_not_zero atomic_inc_not_zero +#endif + +#ifndef atomic_inc_unless_negative +static inline bool +atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#define atomic_inc_unless_negative atomic_inc_unless_negative +#endif + +#ifndef atomic_dec_unless_positive +static inline bool +atomic_dec_unless_positive(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#define atomic_dec_unless_positive atomic_dec_unless_positive +#endif + +#ifndef atomic_dec_if_positive +static inline int +atomic_dec_if_positive(atomic_t *v) +{ + int dec, c = atomic_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic_try_cmpxchg(v, &c, dec)); + + return dec; +} +#define atomic_dec_if_positive atomic_dec_if_positive +#endif + +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) + +#ifdef CONFIG_GENERIC_ATOMIC64 +#include +#endif + +#ifndef atomic64_read_acquire +static inline s64 +atomic64_read_acquire(const atomic64_t *v) +{ + return smp_load_acquire(&(v)->counter); +} +#define atomic64_read_acquire atomic64_read_acquire +#endif + +#ifndef atomic64_set_release +static inline void +atomic64_set_release(atomic64_t *v, s64 i) +{ + smp_store_release(&(v)->counter, i); +} +#define atomic64_set_release atomic64_set_release +#endif + +#ifndef atomic64_add_return_relaxed +#define atomic64_add_return_acquire atomic64_add_return +#define atomic64_add_return_release atomic64_add_return +#define atomic64_add_return_relaxed atomic64_add_return +#else /* atomic64_add_return_relaxed */ + +#ifndef atomic64_add_return_acquire +static inline s64 +atomic64_add_return_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_add_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_add_return_acquire atomic64_add_return_acquire +#endif + +#ifndef atomic64_add_return_release +static inline s64 +atomic64_add_return_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_add_return_relaxed(i, v); +} +#define atomic64_add_return_release atomic64_add_return_release +#endif + +#ifndef atomic64_add_return +static inline s64 +atomic64_add_return(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_add_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_add_return atomic64_add_return +#endif + +#endif /* atomic64_add_return_relaxed */ + +#ifndef atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add +#define atomic64_fetch_add_release atomic64_fetch_add +#define atomic64_fetch_add_relaxed atomic64_fetch_add +#else /* atomic64_fetch_add_relaxed */ + +#ifndef atomic64_fetch_add_acquire +static inline s64 +atomic64_fetch_add_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_add_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#endif + +#ifndef atomic64_fetch_add_release +static inline s64 +atomic64_fetch_add_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_add_relaxed(i, v); +} +#define atomic64_fetch_add_release atomic64_fetch_add_release +#endif + +#ifndef atomic64_fetch_add +static inline s64 +atomic64_fetch_add(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_add_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_add atomic64_fetch_add +#endif + +#endif /* atomic64_fetch_add_relaxed */ + +#ifndef atomic64_sub_return_relaxed +#define atomic64_sub_return_acquire atomic64_sub_return +#define atomic64_sub_return_release atomic64_sub_return +#define atomic64_sub_return_relaxed atomic64_sub_return +#else /* atomic64_sub_return_relaxed */ + +#ifndef atomic64_sub_return_acquire +static inline s64 +atomic64_sub_return_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_sub_return_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_sub_return_acquire atomic64_sub_return_acquire +#endif + +#ifndef atomic64_sub_return_release +static inline s64 +atomic64_sub_return_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_sub_return_relaxed(i, v); +} +#define atomic64_sub_return_release atomic64_sub_return_release +#endif + +#ifndef atomic64_sub_return +static inline s64 +atomic64_sub_return(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_sub_return_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_sub_return atomic64_sub_return +#endif + +#endif /* atomic64_sub_return_relaxed */ + +#ifndef atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub +#define atomic64_fetch_sub_release atomic64_fetch_sub +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub +#else /* atomic64_fetch_sub_relaxed */ + +#ifndef atomic64_fetch_sub_acquire +static inline s64 +atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_sub_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#endif + +#ifndef atomic64_fetch_sub_release +static inline s64 +atomic64_fetch_sub_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_sub_relaxed(i, v); +} +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#endif + +#ifndef atomic64_fetch_sub +static inline s64 +atomic64_fetch_sub(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_sub_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_sub atomic64_fetch_sub +#endif + +#endif /* atomic64_fetch_sub_relaxed */ + +#ifndef atomic64_inc +static inline void +atomic64_inc(atomic64_t *v) +{ + atomic64_add(1, v); +} +#define atomic64_inc atomic64_inc +#endif + +#ifndef atomic64_inc_return_relaxed +#ifdef atomic64_inc_return +#define atomic64_inc_return_acquire atomic64_inc_return +#define atomic64_inc_return_release atomic64_inc_return +#define atomic64_inc_return_relaxed atomic64_inc_return +#endif /* atomic64_inc_return */ + +#ifndef atomic64_inc_return +static inline s64 +atomic64_inc_return(atomic64_t *v) +{ + return atomic64_add_return(1, v); +} +#define atomic64_inc_return atomic64_inc_return +#endif + +#ifndef atomic64_inc_return_acquire +static inline s64 +atomic64_inc_return_acquire(atomic64_t *v) +{ + return atomic64_add_return_acquire(1, v); +} +#define atomic64_inc_return_acquire atomic64_inc_return_acquire +#endif + +#ifndef atomic64_inc_return_release +static inline s64 +atomic64_inc_return_release(atomic64_t *v) +{ + return atomic64_add_return_release(1, v); +} +#define atomic64_inc_return_release atomic64_inc_return_release +#endif + +#ifndef atomic64_inc_return_relaxed +static inline s64 +atomic64_inc_return_relaxed(atomic64_t *v) +{ + return atomic64_add_return_relaxed(1, v); +} +#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed +#endif + +#else /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_inc_return_acquire +static inline s64 +atomic64_inc_return_acquire(atomic64_t *v) +{ + s64 ret = atomic64_inc_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_inc_return_acquire atomic64_inc_return_acquire +#endif + +#ifndef atomic64_inc_return_release +static inline s64 +atomic64_inc_return_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_inc_return_relaxed(v); +} +#define atomic64_inc_return_release atomic64_inc_return_release +#endif + +#ifndef atomic64_inc_return +static inline s64 +atomic64_inc_return(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_inc_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_inc_return atomic64_inc_return +#endif + +#endif /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_fetch_inc_relaxed +#ifdef atomic64_fetch_inc +#define atomic64_fetch_inc_acquire atomic64_fetch_inc +#define atomic64_fetch_inc_release atomic64_fetch_inc +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc +#endif /* atomic64_fetch_inc */ + +#ifndef atomic64_fetch_inc +static inline s64 +atomic64_fetch_inc(atomic64_t *v) +{ + return atomic64_fetch_add(1, v); +} +#define atomic64_fetch_inc atomic64_fetch_inc +#endif + +#ifndef atomic64_fetch_inc_acquire +static inline s64 +atomic64_fetch_inc_acquire(atomic64_t *v) +{ + return atomic64_fetch_add_acquire(1, v); +} +#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#endif + +#ifndef atomic64_fetch_inc_release +static inline s64 +atomic64_fetch_inc_release(atomic64_t *v) +{ + return atomic64_fetch_add_release(1, v); +} +#define atomic64_fetch_inc_release atomic64_fetch_inc_release +#endif + +#ifndef atomic64_fetch_inc_relaxed +static inline s64 +atomic64_fetch_inc_relaxed(atomic64_t *v) +{ + return atomic64_fetch_add_relaxed(1, v); +} +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed +#endif + +#else /* atomic64_fetch_inc_relaxed */ + +#ifndef atomic64_fetch_inc_acquire +static inline s64 +atomic64_fetch_inc_acquire(atomic64_t *v) +{ + s64 ret = atomic64_fetch_inc_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#endif + +#ifndef atomic64_fetch_inc_release +static inline s64 +atomic64_fetch_inc_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_inc_relaxed(v); +} +#define atomic64_fetch_inc_release atomic64_fetch_inc_release +#endif + +#ifndef atomic64_fetch_inc +static inline s64 +atomic64_fetch_inc(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_inc_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_inc atomic64_fetch_inc +#endif + +#endif /* atomic64_fetch_inc_relaxed */ + +#ifndef atomic64_dec +static inline void +atomic64_dec(atomic64_t *v) +{ + atomic64_sub(1, v); +} +#define atomic64_dec atomic64_dec +#endif + +#ifndef atomic64_dec_return_relaxed +#ifdef atomic64_dec_return +#define atomic64_dec_return_acquire atomic64_dec_return +#define atomic64_dec_return_release atomic64_dec_return +#define atomic64_dec_return_relaxed atomic64_dec_return +#endif /* atomic64_dec_return */ + +#ifndef atomic64_dec_return +static inline s64 +atomic64_dec_return(atomic64_t *v) +{ + return atomic64_sub_return(1, v); +} +#define atomic64_dec_return atomic64_dec_return +#endif + +#ifndef atomic64_dec_return_acquire +static inline s64 +atomic64_dec_return_acquire(atomic64_t *v) +{ + return atomic64_sub_return_acquire(1, v); +} +#define atomic64_dec_return_acquire atomic64_dec_return_acquire +#endif + +#ifndef atomic64_dec_return_release +static inline s64 +atomic64_dec_return_release(atomic64_t *v) +{ + return atomic64_sub_return_release(1, v); +} +#define atomic64_dec_return_release atomic64_dec_return_release +#endif + +#ifndef atomic64_dec_return_relaxed +static inline s64 +atomic64_dec_return_relaxed(atomic64_t *v) +{ + return atomic64_sub_return_relaxed(1, v); +} +#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed +#endif + +#else /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_dec_return_acquire +static inline s64 +atomic64_dec_return_acquire(atomic64_t *v) +{ + s64 ret = atomic64_dec_return_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_dec_return_acquire atomic64_dec_return_acquire +#endif + +#ifndef atomic64_dec_return_release +static inline s64 +atomic64_dec_return_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_dec_return_relaxed(v); +} +#define atomic64_dec_return_release atomic64_dec_return_release +#endif + +#ifndef atomic64_dec_return +static inline s64 +atomic64_dec_return(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_dec_return_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_dec_return atomic64_dec_return +#endif + +#endif /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_fetch_dec_relaxed +#ifdef atomic64_fetch_dec +#define atomic64_fetch_dec_acquire atomic64_fetch_dec +#define atomic64_fetch_dec_release atomic64_fetch_dec +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec +#endif /* atomic64_fetch_dec */ + +#ifndef atomic64_fetch_dec +static inline s64 +atomic64_fetch_dec(atomic64_t *v) +{ + return atomic64_fetch_sub(1, v); +} +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + +#ifndef atomic64_fetch_dec_acquire +static inline s64 +atomic64_fetch_dec_acquire(atomic64_t *v) +{ + return atomic64_fetch_sub_acquire(1, v); +} +#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#endif + +#ifndef atomic64_fetch_dec_release +static inline s64 +atomic64_fetch_dec_release(atomic64_t *v) +{ + return atomic64_fetch_sub_release(1, v); +} +#define atomic64_fetch_dec_release atomic64_fetch_dec_release +#endif + +#ifndef atomic64_fetch_dec_relaxed +static inline s64 +atomic64_fetch_dec_relaxed(atomic64_t *v) +{ + return atomic64_fetch_sub_relaxed(1, v); +} +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed +#endif + +#else /* atomic64_fetch_dec_relaxed */ + +#ifndef atomic64_fetch_dec_acquire +static inline s64 +atomic64_fetch_dec_acquire(atomic64_t *v) +{ + s64 ret = atomic64_fetch_dec_relaxed(v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#endif + +#ifndef atomic64_fetch_dec_release +static inline s64 +atomic64_fetch_dec_release(atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_dec_relaxed(v); +} +#define atomic64_fetch_dec_release atomic64_fetch_dec_release +#endif + +#ifndef atomic64_fetch_dec +static inline s64 +atomic64_fetch_dec(atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_dec_relaxed(v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + +#endif /* atomic64_fetch_dec_relaxed */ + +#ifndef atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and +#define atomic64_fetch_and_release atomic64_fetch_and +#define atomic64_fetch_and_relaxed atomic64_fetch_and +#else /* atomic64_fetch_and_relaxed */ + +#ifndef atomic64_fetch_and_acquire +static inline s64 +atomic64_fetch_and_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_and_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#endif + +#ifndef atomic64_fetch_and_release +static inline s64 +atomic64_fetch_and_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_and_relaxed(i, v); +} +#define atomic64_fetch_and_release atomic64_fetch_and_release +#endif + +#ifndef atomic64_fetch_and +static inline s64 +atomic64_fetch_and(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_and_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_and atomic64_fetch_and +#endif + +#endif /* atomic64_fetch_and_relaxed */ + +#ifndef atomic64_andnot +static inline void +atomic64_andnot(s64 i, atomic64_t *v) +{ + atomic64_and(~i, v); +} +#define atomic64_andnot atomic64_andnot +#endif + +#ifndef atomic64_fetch_andnot_relaxed +#ifdef atomic64_fetch_andnot +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot +#define atomic64_fetch_andnot_release atomic64_fetch_andnot +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot +#endif /* atomic64_fetch_andnot */ + +#ifndef atomic64_fetch_andnot +static inline s64 +atomic64_fetch_andnot(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and(~i, v); +} +#define atomic64_fetch_andnot atomic64_fetch_andnot +#endif + +#ifndef atomic64_fetch_andnot_acquire +static inline s64 +atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_acquire(~i, v); +} +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#endif + +#ifndef atomic64_fetch_andnot_release +static inline s64 +atomic64_fetch_andnot_release(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_release(~i, v); +} +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#endif + +#ifndef atomic64_fetch_andnot_relaxed +static inline s64 +atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) +{ + return atomic64_fetch_and_relaxed(~i, v); +} +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#endif + +#else /* atomic64_fetch_andnot_relaxed */ + +#ifndef atomic64_fetch_andnot_acquire +static inline s64 +atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_andnot_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#endif + +#ifndef atomic64_fetch_andnot_release +static inline s64 +atomic64_fetch_andnot_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_andnot_relaxed(i, v); +} +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#endif + +#ifndef atomic64_fetch_andnot +static inline s64 +atomic64_fetch_andnot(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_andnot_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_andnot atomic64_fetch_andnot +#endif + +#endif /* atomic64_fetch_andnot_relaxed */ + +#ifndef atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or +#define atomic64_fetch_or_release atomic64_fetch_or +#define atomic64_fetch_or_relaxed atomic64_fetch_or +#else /* atomic64_fetch_or_relaxed */ + +#ifndef atomic64_fetch_or_acquire +static inline s64 +atomic64_fetch_or_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_or_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#endif + +#ifndef atomic64_fetch_or_release +static inline s64 +atomic64_fetch_or_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_or_relaxed(i, v); +} +#define atomic64_fetch_or_release atomic64_fetch_or_release +#endif + +#ifndef atomic64_fetch_or +static inline s64 +atomic64_fetch_or(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_or_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_or atomic64_fetch_or +#endif + +#endif /* atomic64_fetch_or_relaxed */ + +#ifndef atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor +#define atomic64_fetch_xor_release atomic64_fetch_xor +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor +#else /* atomic64_fetch_xor_relaxed */ + +#ifndef atomic64_fetch_xor_acquire +static inline s64 +atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) +{ + s64 ret = atomic64_fetch_xor_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#endif + +#ifndef atomic64_fetch_xor_release +static inline s64 +atomic64_fetch_xor_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return atomic64_fetch_xor_relaxed(i, v); +} +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#endif + +#ifndef atomic64_fetch_xor +static inline s64 +atomic64_fetch_xor(s64 i, atomic64_t *v) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_fetch_xor_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_fetch_xor atomic64_fetch_xor +#endif + +#endif /* atomic64_fetch_xor_relaxed */ + +#ifndef atomic64_xchg_relaxed +#define atomic64_xchg_acquire atomic64_xchg +#define atomic64_xchg_release atomic64_xchg +#define atomic64_xchg_relaxed atomic64_xchg +#else /* atomic64_xchg_relaxed */ + +#ifndef atomic64_xchg_acquire +static inline s64 +atomic64_xchg_acquire(atomic64_t *v, s64 i) +{ + s64 ret = atomic64_xchg_relaxed(v, i); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_xchg_acquire atomic64_xchg_acquire +#endif + +#ifndef atomic64_xchg_release +static inline s64 +atomic64_xchg_release(atomic64_t *v, s64 i) +{ + __atomic_release_fence(); + return atomic64_xchg_relaxed(v, i); +} +#define atomic64_xchg_release atomic64_xchg_release +#endif + +#ifndef atomic64_xchg +static inline s64 +atomic64_xchg(atomic64_t *v, s64 i) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_xchg_relaxed(v, i); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_xchg atomic64_xchg +#endif + +#endif /* atomic64_xchg_relaxed */ + +#ifndef atomic64_cmpxchg_relaxed +#define atomic64_cmpxchg_acquire atomic64_cmpxchg +#define atomic64_cmpxchg_release atomic64_cmpxchg +#define atomic64_cmpxchg_relaxed atomic64_cmpxchg +#else /* atomic64_cmpxchg_relaxed */ + +#ifndef atomic64_cmpxchg_acquire +static inline s64 +atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) +{ + s64 ret = atomic64_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire +#endif + +#ifndef atomic64_cmpxchg_release +static inline s64 +atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) +{ + __atomic_release_fence(); + return atomic64_cmpxchg_relaxed(v, old, new); +} +#define atomic64_cmpxchg_release atomic64_cmpxchg_release +#endif + +#ifndef atomic64_cmpxchg +static inline s64 +atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +{ + s64 ret; + __atomic_pre_full_fence(); + ret = atomic64_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_cmpxchg atomic64_cmpxchg +#endif + +#endif /* atomic64_cmpxchg_relaxed */ + +#ifndef atomic64_try_cmpxchg_relaxed +#ifdef atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg +#endif /* atomic64_try_cmpxchg */ + +#ifndef atomic64_try_cmpxchg +static inline bool +atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg atomic64_try_cmpxchg +#endif + +#ifndef atomic64_try_cmpxchg_acquire +static inline bool +atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_acquire(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#endif + +#ifndef atomic64_try_cmpxchg_release +static inline bool +atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_release(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#endif + +#ifndef atomic64_try_cmpxchg_relaxed +static inline bool +atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) +{ + s64 r, o = *old; + r = atomic64_cmpxchg_relaxed(v, o, new); + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed +#endif + +#else /* atomic64_try_cmpxchg_relaxed */ + +#ifndef atomic64_try_cmpxchg_acquire +static inline bool +atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) +{ + bool ret = atomic64_try_cmpxchg_relaxed(v, old, new); + __atomic_acquire_fence(); + return ret; +} +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#endif + +#ifndef atomic64_try_cmpxchg_release +static inline bool +atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) +{ + __atomic_release_fence(); + return atomic64_try_cmpxchg_relaxed(v, old, new); +} +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#endif + +#ifndef atomic64_try_cmpxchg +static inline bool +atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) +{ + bool ret; + __atomic_pre_full_fence(); + ret = atomic64_try_cmpxchg_relaxed(v, old, new); + __atomic_post_full_fence(); + return ret; +} +#define atomic64_try_cmpxchg atomic64_try_cmpxchg +#endif + +#endif /* atomic64_try_cmpxchg_relaxed */ + +#ifndef atomic64_sub_and_test +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic64_sub_and_test(s64 i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} +#define atomic64_sub_and_test atomic64_sub_and_test +#endif + +#ifndef atomic64_dec_and_test +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer of type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline bool +atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} +#define atomic64_dec_and_test atomic64_dec_and_test +#endif + +#ifndef atomic64_inc_and_test +/** + * atomic64_inc_and_test - increment and test + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline bool +atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} +#define atomic64_inc_and_test atomic64_inc_and_test +#endif + +#ifndef atomic64_add_negative +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline bool +atomic64_add_negative(s64 i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} +#define atomic64_add_negative atomic64_add_negative +#endif + +#ifndef atomic64_fetch_add_unless +/** + * atomic64_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns original value of @v + */ +static inline s64 +atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic64_try_cmpxchg(v, &c, c + a)); + + return c; +} +#define atomic64_fetch_add_unless atomic64_fetch_add_unless +#endif + +#ifndef atomic64_add_unless +/** + * atomic64_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +static inline bool +atomic64_add_unless(atomic64_t *v, s64 a, s64 u) +{ + return atomic64_fetch_add_unless(v, a, u) != u; +} +#define atomic64_add_unless atomic64_add_unless +#endif + +#ifndef atomic64_inc_not_zero +/** + * atomic64_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +static inline bool +atomic64_inc_not_zero(atomic64_t *v) +{ + return atomic64_add_unless(v, 1, 0); +} +#define atomic64_inc_not_zero atomic64_inc_not_zero +#endif + +#ifndef atomic64_inc_unless_negative +static inline bool +atomic64_inc_unless_negative(atomic64_t *v) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#define atomic64_inc_unless_negative atomic64_inc_unless_negative +#endif + +#ifndef atomic64_dec_unless_positive +static inline bool +atomic64_dec_unless_positive(atomic64_t *v) +{ + s64 c = atomic64_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#define atomic64_dec_unless_positive atomic64_dec_unless_positive +#endif + +#ifndef atomic64_dec_if_positive +static inline s64 +atomic64_dec_if_positive(atomic64_t *v) +{ + s64 dec, c = atomic64_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic64_try_cmpxchg(v, &c, dec)); + + return dec; +} +#define atomic64_dec_if_positive atomic64_dec_if_positive +#endif + +#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) + +#endif /* _LINUX_ATOMIC_FALLBACK_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 1e8e88bdaf09..4c0d009a46f0 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -25,14 +25,6 @@ * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions. */ -#ifndef atomic_read_acquire -#define atomic_read_acquire(v) smp_load_acquire(&(v)->counter) -#endif - -#ifndef atomic_set_release -#define atomic_set_release(v, i) smp_store_release(&(v)->counter, (i)) -#endif - /* * The idea here is to build acquire/release variants by adding explicit * barriers on top of the relaxed variant. In the case where the relaxed @@ -79,1238 +71,7 @@ __ret; \ }) -/* atomic_add_return_relaxed */ -#ifndef atomic_add_return_relaxed -#define atomic_add_return_relaxed atomic_add_return -#define atomic_add_return_acquire atomic_add_return -#define atomic_add_return_release atomic_add_return - -#else /* atomic_add_return_relaxed */ - -#ifndef atomic_add_return_acquire -#define atomic_add_return_acquire(...) \ - __atomic_op_acquire(atomic_add_return, __VA_ARGS__) -#endif - -#ifndef atomic_add_return_release -#define atomic_add_return_release(...) \ - __atomic_op_release(atomic_add_return, __VA_ARGS__) -#endif - -#ifndef atomic_add_return -#define atomic_add_return(...) \ - __atomic_op_fence(atomic_add_return, __VA_ARGS__) -#endif -#endif /* atomic_add_return_relaxed */ - -#ifndef atomic_inc -#define atomic_inc(v) atomic_add(1, (v)) -#endif - -/* atomic_inc_return_relaxed */ -#ifndef atomic_inc_return_relaxed - -#ifndef atomic_inc_return -#define atomic_inc_return(v) atomic_add_return(1, (v)) -#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) -#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) -#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) -#else /* atomic_inc_return */ -#define atomic_inc_return_relaxed atomic_inc_return -#define atomic_inc_return_acquire atomic_inc_return -#define atomic_inc_return_release atomic_inc_return -#endif /* atomic_inc_return */ - -#else /* atomic_inc_return_relaxed */ - -#ifndef atomic_inc_return_acquire -#define atomic_inc_return_acquire(...) \ - __atomic_op_acquire(atomic_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic_inc_return_release -#define atomic_inc_return_release(...) \ - __atomic_op_release(atomic_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic_inc_return -#define atomic_inc_return(...) \ - __atomic_op_fence(atomic_inc_return, __VA_ARGS__) -#endif -#endif /* atomic_inc_return_relaxed */ - -/* atomic_sub_return_relaxed */ -#ifndef atomic_sub_return_relaxed -#define atomic_sub_return_relaxed atomic_sub_return -#define atomic_sub_return_acquire atomic_sub_return -#define atomic_sub_return_release atomic_sub_return - -#else /* atomic_sub_return_relaxed */ - -#ifndef atomic_sub_return_acquire -#define atomic_sub_return_acquire(...) \ - __atomic_op_acquire(atomic_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic_sub_return_release -#define atomic_sub_return_release(...) \ - __atomic_op_release(atomic_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic_sub_return -#define atomic_sub_return(...) \ - __atomic_op_fence(atomic_sub_return, __VA_ARGS__) -#endif -#endif /* atomic_sub_return_relaxed */ - -#ifndef atomic_dec -#define atomic_dec(v) atomic_sub(1, (v)) -#endif - -/* atomic_dec_return_relaxed */ -#ifndef atomic_dec_return_relaxed - -#ifndef atomic_dec_return -#define atomic_dec_return(v) atomic_sub_return(1, (v)) -#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) -#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) -#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) -#else /* atomic_dec_return */ -#define atomic_dec_return_relaxed atomic_dec_return -#define atomic_dec_return_acquire atomic_dec_return -#define atomic_dec_return_release atomic_dec_return -#endif /* atomic_dec_return */ - -#else /* atomic_dec_return_relaxed */ - -#ifndef atomic_dec_return_acquire -#define atomic_dec_return_acquire(...) \ - __atomic_op_acquire(atomic_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic_dec_return_release -#define atomic_dec_return_release(...) \ - __atomic_op_release(atomic_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic_dec_return -#define atomic_dec_return(...) \ - __atomic_op_fence(atomic_dec_return, __VA_ARGS__) -#endif -#endif /* atomic_dec_return_relaxed */ - - -/* atomic_fetch_add_relaxed */ -#ifndef atomic_fetch_add_relaxed -#define atomic_fetch_add_relaxed atomic_fetch_add -#define atomic_fetch_add_acquire atomic_fetch_add -#define atomic_fetch_add_release atomic_fetch_add - -#else /* atomic_fetch_add_relaxed */ - -#ifndef atomic_fetch_add_acquire -#define atomic_fetch_add_acquire(...) \ - __atomic_op_acquire(atomic_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_add_release -#define atomic_fetch_add_release(...) \ - __atomic_op_release(atomic_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_add -#define atomic_fetch_add(...) \ - __atomic_op_fence(atomic_fetch_add, __VA_ARGS__) -#endif -#endif /* atomic_fetch_add_relaxed */ - -/* atomic_fetch_inc_relaxed */ -#ifndef atomic_fetch_inc_relaxed - -#ifndef atomic_fetch_inc -#define atomic_fetch_inc(v) atomic_fetch_add(1, (v)) -#define atomic_fetch_inc_relaxed(v) atomic_fetch_add_relaxed(1, (v)) -#define atomic_fetch_inc_acquire(v) atomic_fetch_add_acquire(1, (v)) -#define atomic_fetch_inc_release(v) atomic_fetch_add_release(1, (v)) -#else /* atomic_fetch_inc */ -#define atomic_fetch_inc_relaxed atomic_fetch_inc -#define atomic_fetch_inc_acquire atomic_fetch_inc -#define atomic_fetch_inc_release atomic_fetch_inc -#endif /* atomic_fetch_inc */ - -#else /* atomic_fetch_inc_relaxed */ - -#ifndef atomic_fetch_inc_acquire -#define atomic_fetch_inc_acquire(...) \ - __atomic_op_acquire(atomic_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_inc_release -#define atomic_fetch_inc_release(...) \ - __atomic_op_release(atomic_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_inc -#define atomic_fetch_inc(...) \ - __atomic_op_fence(atomic_fetch_inc, __VA_ARGS__) -#endif -#endif /* atomic_fetch_inc_relaxed */ - -/* atomic_fetch_sub_relaxed */ -#ifndef atomic_fetch_sub_relaxed -#define atomic_fetch_sub_relaxed atomic_fetch_sub -#define atomic_fetch_sub_acquire atomic_fetch_sub -#define atomic_fetch_sub_release atomic_fetch_sub - -#else /* atomic_fetch_sub_relaxed */ - -#ifndef atomic_fetch_sub_acquire -#define atomic_fetch_sub_acquire(...) \ - __atomic_op_acquire(atomic_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_sub_release -#define atomic_fetch_sub_release(...) \ - __atomic_op_release(atomic_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_sub -#define atomic_fetch_sub(...) \ - __atomic_op_fence(atomic_fetch_sub, __VA_ARGS__) -#endif -#endif /* atomic_fetch_sub_relaxed */ - -/* atomic_fetch_dec_relaxed */ -#ifndef atomic_fetch_dec_relaxed - -#ifndef atomic_fetch_dec -#define atomic_fetch_dec(v) atomic_fetch_sub(1, (v)) -#define atomic_fetch_dec_relaxed(v) atomic_fetch_sub_relaxed(1, (v)) -#define atomic_fetch_dec_acquire(v) atomic_fetch_sub_acquire(1, (v)) -#define atomic_fetch_dec_release(v) atomic_fetch_sub_release(1, (v)) -#else /* atomic_fetch_dec */ -#define atomic_fetch_dec_relaxed atomic_fetch_dec -#define atomic_fetch_dec_acquire atomic_fetch_dec -#define atomic_fetch_dec_release atomic_fetch_dec -#endif /* atomic_fetch_dec */ - -#else /* atomic_fetch_dec_relaxed */ - -#ifndef atomic_fetch_dec_acquire -#define atomic_fetch_dec_acquire(...) \ - __atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_dec_release -#define atomic_fetch_dec_release(...) \ - __atomic_op_release(atomic_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_dec -#define atomic_fetch_dec(...) \ - __atomic_op_fence(atomic_fetch_dec, __VA_ARGS__) -#endif -#endif /* atomic_fetch_dec_relaxed */ - -/* atomic_fetch_or_relaxed */ -#ifndef atomic_fetch_or_relaxed -#define atomic_fetch_or_relaxed atomic_fetch_or -#define atomic_fetch_or_acquire atomic_fetch_or -#define atomic_fetch_or_release atomic_fetch_or - -#else /* atomic_fetch_or_relaxed */ - -#ifndef atomic_fetch_or_acquire -#define atomic_fetch_or_acquire(...) \ - __atomic_op_acquire(atomic_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_or_release -#define atomic_fetch_or_release(...) \ - __atomic_op_release(atomic_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_or -#define atomic_fetch_or(...) \ - __atomic_op_fence(atomic_fetch_or, __VA_ARGS__) -#endif -#endif /* atomic_fetch_or_relaxed */ - -/* atomic_fetch_and_relaxed */ -#ifndef atomic_fetch_and_relaxed -#define atomic_fetch_and_relaxed atomic_fetch_and -#define atomic_fetch_and_acquire atomic_fetch_and -#define atomic_fetch_and_release atomic_fetch_and - -#else /* atomic_fetch_and_relaxed */ - -#ifndef atomic_fetch_and_acquire -#define atomic_fetch_and_acquire(...) \ - __atomic_op_acquire(atomic_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_and_release -#define atomic_fetch_and_release(...) \ - __atomic_op_release(atomic_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_and -#define atomic_fetch_and(...) \ - __atomic_op_fence(atomic_fetch_and, __VA_ARGS__) -#endif -#endif /* atomic_fetch_and_relaxed */ - -#ifndef atomic_andnot -#define atomic_andnot(i, v) atomic_and(~(int)(i), (v)) -#endif - -#ifndef atomic_fetch_andnot_relaxed - -#ifndef atomic_fetch_andnot -#define atomic_fetch_andnot(i, v) atomic_fetch_and(~(int)(i), (v)) -#define atomic_fetch_andnot_relaxed(i, v) atomic_fetch_and_relaxed(~(int)(i), (v)) -#define atomic_fetch_andnot_acquire(i, v) atomic_fetch_and_acquire(~(int)(i), (v)) -#define atomic_fetch_andnot_release(i, v) atomic_fetch_and_release(~(int)(i), (v)) -#else /* atomic_fetch_andnot */ -#define atomic_fetch_andnot_relaxed atomic_fetch_andnot -#define atomic_fetch_andnot_acquire atomic_fetch_andnot -#define atomic_fetch_andnot_release atomic_fetch_andnot -#endif /* atomic_fetch_andnot */ - -#else /* atomic_fetch_andnot_relaxed */ - -#ifndef atomic_fetch_andnot_acquire -#define atomic_fetch_andnot_acquire(...) \ - __atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_andnot_release -#define atomic_fetch_andnot_release(...) \ - __atomic_op_release(atomic_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_andnot -#define atomic_fetch_andnot(...) \ - __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__) -#endif -#endif /* atomic_fetch_andnot_relaxed */ - -/* atomic_fetch_xor_relaxed */ -#ifndef atomic_fetch_xor_relaxed -#define atomic_fetch_xor_relaxed atomic_fetch_xor -#define atomic_fetch_xor_acquire atomic_fetch_xor -#define atomic_fetch_xor_release atomic_fetch_xor - -#else /* atomic_fetch_xor_relaxed */ - -#ifndef atomic_fetch_xor_acquire -#define atomic_fetch_xor_acquire(...) \ - __atomic_op_acquire(atomic_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_xor_release -#define atomic_fetch_xor_release(...) \ - __atomic_op_release(atomic_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic_fetch_xor -#define atomic_fetch_xor(...) \ - __atomic_op_fence(atomic_fetch_xor, __VA_ARGS__) -#endif -#endif /* atomic_fetch_xor_relaxed */ - - -/* atomic_xchg_relaxed */ -#ifndef atomic_xchg_relaxed -#define atomic_xchg_relaxed atomic_xchg -#define atomic_xchg_acquire atomic_xchg -#define atomic_xchg_release atomic_xchg - -#else /* atomic_xchg_relaxed */ - -#ifndef atomic_xchg_acquire -#define atomic_xchg_acquire(...) \ - __atomic_op_acquire(atomic_xchg, __VA_ARGS__) -#endif - -#ifndef atomic_xchg_release -#define atomic_xchg_release(...) \ - __atomic_op_release(atomic_xchg, __VA_ARGS__) -#endif - -#ifndef atomic_xchg -#define atomic_xchg(...) \ - __atomic_op_fence(atomic_xchg, __VA_ARGS__) -#endif -#endif /* atomic_xchg_relaxed */ - -/* atomic_cmpxchg_relaxed */ -#ifndef atomic_cmpxchg_relaxed -#define atomic_cmpxchg_relaxed atomic_cmpxchg -#define atomic_cmpxchg_acquire atomic_cmpxchg -#define atomic_cmpxchg_release atomic_cmpxchg - -#else /* atomic_cmpxchg_relaxed */ - -#ifndef atomic_cmpxchg_acquire -#define atomic_cmpxchg_acquire(...) \ - __atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic_cmpxchg_release -#define atomic_cmpxchg_release(...) \ - __atomic_op_release(atomic_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic_cmpxchg -#define atomic_cmpxchg(...) \ - __atomic_op_fence(atomic_cmpxchg, __VA_ARGS__) -#endif -#endif /* atomic_cmpxchg_relaxed */ - -#ifndef atomic_try_cmpxchg - -#define __atomic_try_cmpxchg(type, _p, _po, _n) \ -({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __r, __o = *__po; \ - __r = atomic_cmpxchg##type((_p), __o, (_n)); \ - if (unlikely(__r != __o)) \ - *__po = __r; \ - likely(__r == __o); \ -}) - -#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) -#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) - -#else /* atomic_try_cmpxchg */ -#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg -#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg -#define atomic_try_cmpxchg_release atomic_try_cmpxchg -#endif /* atomic_try_cmpxchg */ - -/* cmpxchg_relaxed */ -#ifndef cmpxchg_relaxed -#define cmpxchg_relaxed cmpxchg -#define cmpxchg_acquire cmpxchg -#define cmpxchg_release cmpxchg - -#else /* cmpxchg_relaxed */ - -#ifndef cmpxchg_acquire -#define cmpxchg_acquire(...) \ - __atomic_op_acquire(cmpxchg, __VA_ARGS__) -#endif - -#ifndef cmpxchg_release -#define cmpxchg_release(...) \ - __atomic_op_release(cmpxchg, __VA_ARGS__) -#endif - -#ifndef cmpxchg -#define cmpxchg(...) \ - __atomic_op_fence(cmpxchg, __VA_ARGS__) -#endif -#endif /* cmpxchg_relaxed */ - -/* cmpxchg64_relaxed */ -#ifndef cmpxchg64_relaxed -#define cmpxchg64_relaxed cmpxchg64 -#define cmpxchg64_acquire cmpxchg64 -#define cmpxchg64_release cmpxchg64 - -#else /* cmpxchg64_relaxed */ - -#ifndef cmpxchg64_acquire -#define cmpxchg64_acquire(...) \ - __atomic_op_acquire(cmpxchg64, __VA_ARGS__) -#endif - -#ifndef cmpxchg64_release -#define cmpxchg64_release(...) \ - __atomic_op_release(cmpxchg64, __VA_ARGS__) -#endif - -#ifndef cmpxchg64 -#define cmpxchg64(...) \ - __atomic_op_fence(cmpxchg64, __VA_ARGS__) -#endif -#endif /* cmpxchg64_relaxed */ - -/* xchg_relaxed */ -#ifndef xchg_relaxed -#define xchg_relaxed xchg -#define xchg_acquire xchg -#define xchg_release xchg - -#else /* xchg_relaxed */ - -#ifndef xchg_acquire -#define xchg_acquire(...) __atomic_op_acquire(xchg, __VA_ARGS__) -#endif - -#ifndef xchg_release -#define xchg_release(...) __atomic_op_release(xchg, __VA_ARGS__) -#endif - -#ifndef xchg -#define xchg(...) __atomic_op_fence(xchg, __VA_ARGS__) -#endif -#endif /* xchg_relaxed */ - -/** - * atomic_fetch_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns the original value of @v. - */ -#ifndef atomic_fetch_add_unless -static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) -{ - int c = atomic_read(v); - - do { - if (unlikely(c == u)) - break; - } while (!atomic_try_cmpxchg(v, &c, c + a)); - - return c; -} -#endif - -/** - * atomic_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns true if the addition was done. - */ -static inline bool atomic_add_unless(atomic_t *v, int a, int u) -{ - return atomic_fetch_add_unless(v, a, u) != u; -} - -/** - * atomic_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1, if @v is non-zero. - * Returns true if the increment was done. - */ -#ifndef atomic_inc_not_zero -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -#endif - -/** - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic_inc_and_test -static inline bool atomic_inc_and_test(atomic_t *v) -{ - return atomic_inc_return(v) == 0; -} -#endif - -/** - * atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -#ifndef atomic_dec_and_test -static inline bool atomic_dec_and_test(atomic_t *v) -{ - return atomic_dec_return(v) == 0; -} -#endif - -/** - * atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic_sub_and_test -static inline bool atomic_sub_and_test(int i, atomic_t *v) -{ - return atomic_sub_return(i, v) == 0; -} -#endif - -/** - * atomic_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -#ifndef atomic_add_negative -static inline bool atomic_add_negative(int i, atomic_t *v) -{ - return atomic_add_return(i, v) < 0; -} -#endif - -#ifndef atomic_inc_unless_negative -static inline bool atomic_inc_unless_negative(atomic_t *v) -{ - int c = atomic_read(v); - - do { - if (unlikely(c < 0)) - return false; - } while (!atomic_try_cmpxchg(v, &c, c + 1)); - - return true; -} -#endif - -#ifndef atomic_dec_unless_positive -static inline bool atomic_dec_unless_positive(atomic_t *v) -{ - int c = atomic_read(v); - - do { - if (unlikely(c > 0)) - return false; - } while (!atomic_try_cmpxchg(v, &c, c - 1)); - - return true; -} -#endif - -/* - * atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ -#ifndef atomic_dec_if_positive -static inline int atomic_dec_if_positive(atomic_t *v) -{ - int dec, c = atomic_read(v); - - do { - dec = c - 1; - if (unlikely(dec < 0)) - break; - } while (!atomic_try_cmpxchg(v, &c, dec)); - - return dec; -} -#endif - -#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) -#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) - -#ifdef CONFIG_GENERIC_ATOMIC64 -#include -#endif - -#ifndef atomic64_read_acquire -#define atomic64_read_acquire(v) smp_load_acquire(&(v)->counter) -#endif - -#ifndef atomic64_set_release -#define atomic64_set_release(v, i) smp_store_release(&(v)->counter, (i)) -#endif - -/* atomic64_add_return_relaxed */ -#ifndef atomic64_add_return_relaxed -#define atomic64_add_return_relaxed atomic64_add_return -#define atomic64_add_return_acquire atomic64_add_return -#define atomic64_add_return_release atomic64_add_return - -#else /* atomic64_add_return_relaxed */ - -#ifndef atomic64_add_return_acquire -#define atomic64_add_return_acquire(...) \ - __atomic_op_acquire(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return_release -#define atomic64_add_return_release(...) \ - __atomic_op_release(atomic64_add_return, __VA_ARGS__) -#endif - -#ifndef atomic64_add_return -#define atomic64_add_return(...) \ - __atomic_op_fence(atomic64_add_return, __VA_ARGS__) -#endif -#endif /* atomic64_add_return_relaxed */ - -#ifndef atomic64_inc -#define atomic64_inc(v) atomic64_add(1, (v)) -#endif - -/* atomic64_inc_return_relaxed */ -#ifndef atomic64_inc_return_relaxed - -#ifndef atomic64_inc_return -#define atomic64_inc_return(v) atomic64_add_return(1, (v)) -#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) -#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) -#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) -#else /* atomic64_inc_return */ -#define atomic64_inc_return_relaxed atomic64_inc_return -#define atomic64_inc_return_acquire atomic64_inc_return -#define atomic64_inc_return_release atomic64_inc_return -#endif /* atomic64_inc_return */ - -#else /* atomic64_inc_return_relaxed */ - -#ifndef atomic64_inc_return_acquire -#define atomic64_inc_return_acquire(...) \ - __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return_release -#define atomic64_inc_return_release(...) \ - __atomic_op_release(atomic64_inc_return, __VA_ARGS__) -#endif - -#ifndef atomic64_inc_return -#define atomic64_inc_return(...) \ - __atomic_op_fence(atomic64_inc_return, __VA_ARGS__) -#endif -#endif /* atomic64_inc_return_relaxed */ - - -/* atomic64_sub_return_relaxed */ -#ifndef atomic64_sub_return_relaxed -#define atomic64_sub_return_relaxed atomic64_sub_return -#define atomic64_sub_return_acquire atomic64_sub_return -#define atomic64_sub_return_release atomic64_sub_return - -#else /* atomic64_sub_return_relaxed */ - -#ifndef atomic64_sub_return_acquire -#define atomic64_sub_return_acquire(...) \ - __atomic_op_acquire(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return_release -#define atomic64_sub_return_release(...) \ - __atomic_op_release(atomic64_sub_return, __VA_ARGS__) -#endif - -#ifndef atomic64_sub_return -#define atomic64_sub_return(...) \ - __atomic_op_fence(atomic64_sub_return, __VA_ARGS__) -#endif -#endif /* atomic64_sub_return_relaxed */ - -#ifndef atomic64_dec -#define atomic64_dec(v) atomic64_sub(1, (v)) -#endif - -/* atomic64_dec_return_relaxed */ -#ifndef atomic64_dec_return_relaxed - -#ifndef atomic64_dec_return -#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) -#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) -#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) -#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) -#else /* atomic64_dec_return */ -#define atomic64_dec_return_relaxed atomic64_dec_return -#define atomic64_dec_return_acquire atomic64_dec_return -#define atomic64_dec_return_release atomic64_dec_return -#endif /* atomic64_dec_return */ - -#else /* atomic64_dec_return_relaxed */ - -#ifndef atomic64_dec_return_acquire -#define atomic64_dec_return_acquire(...) \ - __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return_release -#define atomic64_dec_return_release(...) \ - __atomic_op_release(atomic64_dec_return, __VA_ARGS__) -#endif - -#ifndef atomic64_dec_return -#define atomic64_dec_return(...) \ - __atomic_op_fence(atomic64_dec_return, __VA_ARGS__) -#endif -#endif /* atomic64_dec_return_relaxed */ - - -/* atomic64_fetch_add_relaxed */ -#ifndef atomic64_fetch_add_relaxed -#define atomic64_fetch_add_relaxed atomic64_fetch_add -#define atomic64_fetch_add_acquire atomic64_fetch_add -#define atomic64_fetch_add_release atomic64_fetch_add - -#else /* atomic64_fetch_add_relaxed */ - -#ifndef atomic64_fetch_add_acquire -#define atomic64_fetch_add_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_add_release -#define atomic64_fetch_add_release(...) \ - __atomic_op_release(atomic64_fetch_add, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_add -#define atomic64_fetch_add(...) \ - __atomic_op_fence(atomic64_fetch_add, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_add_relaxed */ - -/* atomic64_fetch_inc_relaxed */ -#ifndef atomic64_fetch_inc_relaxed - -#ifndef atomic64_fetch_inc -#define atomic64_fetch_inc(v) atomic64_fetch_add(1, (v)) -#define atomic64_fetch_inc_relaxed(v) atomic64_fetch_add_relaxed(1, (v)) -#define atomic64_fetch_inc_acquire(v) atomic64_fetch_add_acquire(1, (v)) -#define atomic64_fetch_inc_release(v) atomic64_fetch_add_release(1, (v)) -#else /* atomic64_fetch_inc */ -#define atomic64_fetch_inc_relaxed atomic64_fetch_inc -#define atomic64_fetch_inc_acquire atomic64_fetch_inc -#define atomic64_fetch_inc_release atomic64_fetch_inc -#endif /* atomic64_fetch_inc */ - -#else /* atomic64_fetch_inc_relaxed */ - -#ifndef atomic64_fetch_inc_acquire -#define atomic64_fetch_inc_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_inc_release -#define atomic64_fetch_inc_release(...) \ - __atomic_op_release(atomic64_fetch_inc, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_inc -#define atomic64_fetch_inc(...) \ - __atomic_op_fence(atomic64_fetch_inc, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_inc_relaxed */ - -/* atomic64_fetch_sub_relaxed */ -#ifndef atomic64_fetch_sub_relaxed -#define atomic64_fetch_sub_relaxed atomic64_fetch_sub -#define atomic64_fetch_sub_acquire atomic64_fetch_sub -#define atomic64_fetch_sub_release atomic64_fetch_sub - -#else /* atomic64_fetch_sub_relaxed */ - -#ifndef atomic64_fetch_sub_acquire -#define atomic64_fetch_sub_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_sub_release -#define atomic64_fetch_sub_release(...) \ - __atomic_op_release(atomic64_fetch_sub, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_sub -#define atomic64_fetch_sub(...) \ - __atomic_op_fence(atomic64_fetch_sub, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_sub_relaxed */ - -/* atomic64_fetch_dec_relaxed */ -#ifndef atomic64_fetch_dec_relaxed - -#ifndef atomic64_fetch_dec -#define atomic64_fetch_dec(v) atomic64_fetch_sub(1, (v)) -#define atomic64_fetch_dec_relaxed(v) atomic64_fetch_sub_relaxed(1, (v)) -#define atomic64_fetch_dec_acquire(v) atomic64_fetch_sub_acquire(1, (v)) -#define atomic64_fetch_dec_release(v) atomic64_fetch_sub_release(1, (v)) -#else /* atomic64_fetch_dec */ -#define atomic64_fetch_dec_relaxed atomic64_fetch_dec -#define atomic64_fetch_dec_acquire atomic64_fetch_dec -#define atomic64_fetch_dec_release atomic64_fetch_dec -#endif /* atomic64_fetch_dec */ - -#else /* atomic64_fetch_dec_relaxed */ - -#ifndef atomic64_fetch_dec_acquire -#define atomic64_fetch_dec_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_dec_release -#define atomic64_fetch_dec_release(...) \ - __atomic_op_release(atomic64_fetch_dec, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_dec -#define atomic64_fetch_dec(...) \ - __atomic_op_fence(atomic64_fetch_dec, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_dec_relaxed */ - -/* atomic64_fetch_or_relaxed */ -#ifndef atomic64_fetch_or_relaxed -#define atomic64_fetch_or_relaxed atomic64_fetch_or -#define atomic64_fetch_or_acquire atomic64_fetch_or -#define atomic64_fetch_or_release atomic64_fetch_or - -#else /* atomic64_fetch_or_relaxed */ - -#ifndef atomic64_fetch_or_acquire -#define atomic64_fetch_or_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_or_release -#define atomic64_fetch_or_release(...) \ - __atomic_op_release(atomic64_fetch_or, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_or -#define atomic64_fetch_or(...) \ - __atomic_op_fence(atomic64_fetch_or, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_or_relaxed */ - -/* atomic64_fetch_and_relaxed */ -#ifndef atomic64_fetch_and_relaxed -#define atomic64_fetch_and_relaxed atomic64_fetch_and -#define atomic64_fetch_and_acquire atomic64_fetch_and -#define atomic64_fetch_and_release atomic64_fetch_and - -#else /* atomic64_fetch_and_relaxed */ - -#ifndef atomic64_fetch_and_acquire -#define atomic64_fetch_and_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_and_release -#define atomic64_fetch_and_release(...) \ - __atomic_op_release(atomic64_fetch_and, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_and -#define atomic64_fetch_and(...) \ - __atomic_op_fence(atomic64_fetch_and, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_and_relaxed */ - -#ifndef atomic64_andnot -#define atomic64_andnot(i, v) atomic64_and(~(long long)(i), (v)) -#endif - -#ifndef atomic64_fetch_andnot_relaxed - -#ifndef atomic64_fetch_andnot -#define atomic64_fetch_andnot(i, v) atomic64_fetch_and(~(long long)(i), (v)) -#define atomic64_fetch_andnot_relaxed(i, v) atomic64_fetch_and_relaxed(~(long long)(i), (v)) -#define atomic64_fetch_andnot_acquire(i, v) atomic64_fetch_and_acquire(~(long long)(i), (v)) -#define atomic64_fetch_andnot_release(i, v) atomic64_fetch_and_release(~(long long)(i), (v)) -#else /* atomic64_fetch_andnot */ -#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot -#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot -#define atomic64_fetch_andnot_release atomic64_fetch_andnot -#endif /* atomic64_fetch_andnot */ - -#else /* atomic64_fetch_andnot_relaxed */ - -#ifndef atomic64_fetch_andnot_acquire -#define atomic64_fetch_andnot_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_andnot_release -#define atomic64_fetch_andnot_release(...) \ - __atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_andnot -#define atomic64_fetch_andnot(...) \ - __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_andnot_relaxed */ - -/* atomic64_fetch_xor_relaxed */ -#ifndef atomic64_fetch_xor_relaxed -#define atomic64_fetch_xor_relaxed atomic64_fetch_xor -#define atomic64_fetch_xor_acquire atomic64_fetch_xor -#define atomic64_fetch_xor_release atomic64_fetch_xor - -#else /* atomic64_fetch_xor_relaxed */ - -#ifndef atomic64_fetch_xor_acquire -#define atomic64_fetch_xor_acquire(...) \ - __atomic_op_acquire(atomic64_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_xor_release -#define atomic64_fetch_xor_release(...) \ - __atomic_op_release(atomic64_fetch_xor, __VA_ARGS__) -#endif - -#ifndef atomic64_fetch_xor -#define atomic64_fetch_xor(...) \ - __atomic_op_fence(atomic64_fetch_xor, __VA_ARGS__) -#endif -#endif /* atomic64_fetch_xor_relaxed */ - - -/* atomic64_xchg_relaxed */ -#ifndef atomic64_xchg_relaxed -#define atomic64_xchg_relaxed atomic64_xchg -#define atomic64_xchg_acquire atomic64_xchg -#define atomic64_xchg_release atomic64_xchg - -#else /* atomic64_xchg_relaxed */ - -#ifndef atomic64_xchg_acquire -#define atomic64_xchg_acquire(...) \ - __atomic_op_acquire(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg_release -#define atomic64_xchg_release(...) \ - __atomic_op_release(atomic64_xchg, __VA_ARGS__) -#endif - -#ifndef atomic64_xchg -#define atomic64_xchg(...) \ - __atomic_op_fence(atomic64_xchg, __VA_ARGS__) -#endif -#endif /* atomic64_xchg_relaxed */ - -/* atomic64_cmpxchg_relaxed */ -#ifndef atomic64_cmpxchg_relaxed -#define atomic64_cmpxchg_relaxed atomic64_cmpxchg -#define atomic64_cmpxchg_acquire atomic64_cmpxchg -#define atomic64_cmpxchg_release atomic64_cmpxchg - -#else /* atomic64_cmpxchg_relaxed */ - -#ifndef atomic64_cmpxchg_acquire -#define atomic64_cmpxchg_acquire(...) \ - __atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg_release -#define atomic64_cmpxchg_release(...) \ - __atomic_op_release(atomic64_cmpxchg, __VA_ARGS__) -#endif - -#ifndef atomic64_cmpxchg -#define atomic64_cmpxchg(...) \ - __atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__) -#endif -#endif /* atomic64_cmpxchg_relaxed */ - -#ifndef atomic64_try_cmpxchg - -#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ -({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __r, __o = *__po; \ - __r = atomic64_cmpxchg##type((_p), __o, (_n)); \ - if (unlikely(__r != __o)) \ - *__po = __r; \ - likely(__r == __o); \ -}) - -#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) -#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) - -#else /* atomic64_try_cmpxchg */ -#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg -#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg -#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg -#endif /* atomic64_try_cmpxchg */ - -/** - * atomic64_fetch_add_unless - add unless the number is already a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns the original value of @v. - */ -#ifndef atomic64_fetch_add_unless -static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, - long long u) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c == u)) - break; - } while (!atomic64_try_cmpxchg(v, &c, c + a)); - - return c; -} -#endif - -/** - * atomic64_add_unless - add unless the number is already a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if @v was not already @u. - * Returns true if the addition was done. - */ -static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) -{ - return atomic64_fetch_add_unless(v, a, u) != u; -} - -/** - * atomic64_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic64_t - * - * Atomically increments @v by 1, if @v is non-zero. - * Returns true if the increment was done. - */ -#ifndef atomic64_inc_not_zero -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -#endif - -/** - * atomic64_inc_and_test - increment and test - * @v: pointer of type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic64_inc_and_test -static inline bool atomic64_inc_and_test(atomic64_t *v) -{ - return atomic64_inc_return(v) == 0; -} -#endif - -/** - * atomic64_dec_and_test - decrement and test - * @v: pointer of type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -#ifndef atomic64_dec_and_test -static inline bool atomic64_dec_and_test(atomic64_t *v) -{ - return atomic64_dec_return(v) == 0; -} -#endif - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ -#ifndef atomic64_sub_and_test -static inline bool atomic64_sub_and_test(long long i, atomic64_t *v) -{ - return atomic64_sub_return(i, v) == 0; -} -#endif - -/** - * atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -#ifndef atomic64_add_negative -static inline bool atomic64_add_negative(long long i, atomic64_t *v) -{ - return atomic64_add_return(i, v) < 0; -} -#endif - -#ifndef atomic64_inc_unless_negative -static inline bool atomic64_inc_unless_negative(atomic64_t *v) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c < 0)) - return false; - } while (!atomic64_try_cmpxchg(v, &c, c + 1)); - - return true; -} -#endif - -#ifndef atomic64_dec_unless_positive -static inline bool atomic64_dec_unless_positive(atomic64_t *v) -{ - long long c = atomic64_read(v); - - do { - if (unlikely(c > 0)) - return false; - } while (!atomic64_try_cmpxchg(v, &c, c - 1)); - - return true; -} -#endif - -/* - * atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - * - * The function returns the old value of *v minus 1, even if - * the atomic64 variable, v, was not decremented. - */ -#ifndef atomic64_dec_if_positive -static inline long long atomic64_dec_if_positive(atomic64_t *v) -{ - long long dec, c = atomic64_read(v); - - do { - dec = c - 1; - if (unlikely(dec < 0)) - break; - } while (!atomic64_try_cmpxchg(v, &c, dec)); - - return dec; -} -#endif - -#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) -#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) +#include #include -- cgit v1.2.3 From e38f89d310fcc543b0b94594a92db1d6cfbd9376 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 13 Jun 2018 20:22:04 +0530 Subject: PM / Domains: Add genpd_opp_to_performance_state() The OPP core currently stores the performance state in the consumer device's OPP table, but that is going to change going forward and performance state will rather be set directly in the genpd's OPP table. For that we need to get the performance state for genpd's device structure (genpd->dev) instead of the consumer device's structure. Add a new helper to do that. Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar --- drivers/base/power/domain.c | 32 ++++++++++++++++++++++++++++++++ include/linux/pm_domain.h | 9 +++++++++ 2 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index fe9b0527b161..7be8c94c6b7f 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2520,6 +2520,38 @@ int of_genpd_parse_idle_states(struct device_node *dn, } EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); +/** + * pm_genpd_opp_to_performance_state - Gets performance state of the genpd from its OPP node. + * + * @genpd_dev: Genpd's device for which the performance-state needs to be found. + * @opp: struct dev_pm_opp of the OPP for which we need to find performance + * state. + * + * Returns performance state encoded in the OPP of the genpd. This calls + * platform specific genpd->opp_to_performance_state() callback to translate + * power domain OPP to performance state. + * + * Returns performance state on success and 0 on failure. + */ +unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev, + struct dev_pm_opp *opp) +{ + struct generic_pm_domain *genpd = NULL; + int state; + + genpd = container_of(genpd_dev, struct generic_pm_domain, dev); + + if (unlikely(!genpd->opp_to_performance_state)) + return 0; + + genpd_lock(genpd); + state = genpd->opp_to_performance_state(genpd, opp); + genpd_unlock(genpd); + + return state; +} +EXPORT_SYMBOL_GPL(pm_genpd_opp_to_performance_state); + /** * of_genpd_opp_to_performance_state- Gets performance state of device's * power domain corresponding to a DT node's "required-opps" property. diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 3b5d7280e52e..4f803f934308 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -258,6 +258,8 @@ int of_genpd_add_subdomain(struct of_phandle_args *parent, struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); +unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev, + struct dev_pm_opp *opp); unsigned int of_genpd_opp_to_performance_state(struct device *dev, struct device_node *np); @@ -299,6 +301,13 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, return -ENODEV; } +static inline unsigned int +pm_genpd_opp_to_performance_state(struct device *genpd_dev, + struct dev_pm_opp *opp) +{ + return 0; +} + static inline unsigned int of_genpd_opp_to_performance_state(struct device *dev, struct device_node *np) -- cgit v1.2.3 From 4f018bc0e1cfdec2e25072db9fecc1f363ba79ea Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 26 Jun 2018 16:29:34 +0530 Subject: OPP: Add dev_pm_opp_{set|put}_genpd_virt_dev() helper Multiple generic power domains for a consumer device are supported with the help of virtual devices, which are created for each consumer device - genpd pair. These are the device structures which are attached to the power domain and are required by the OPP core to set the performance state of the genpd. The helpers added by this commit are required to be called once for each of these virtual devices. These are required only if multiple domains are available for a device, otherwise the actual device structure will be used instead by the OPP core. The new helpers also support the complex cases where the consumer device wouldn't always require all the domains. For example, a camera may require only one power domain during normal operations but two during high resolution operations. The consumer driver can call dev_pm_opp_put_genpd_virt_dev(high_resolution_genpd_virt_dev) if it is currently operating in the normal mode and doesn't have any performance requirements from the genpd which manages high resolution power requirements. The consumer driver can later call dev_pm_opp_set_genpd_virt_dev(high_resolution_genpd_virt_dev) once it switches back to the high resolution mode. The new helpers differ from other OPP set/put helpers as the new ones can be called with OPPs initialized for the table as we may need to call them on the fly because of the complex case explained above. For this reason it is possible that the genpd virt_dev structure may be used in parallel while the new helpers are running and a new mutex is added to protect against that. We didn't use the existing opp_table->lock mutex as that is widely used in the OPP core and we will need this lock in the dev_pm_opp_set_rate() helper while changing OPP and we need to make sure there is not much contention while doing that as that's the hotpath. Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/opp/of.c | 16 ++++++++- drivers/opp/opp.h | 4 +++ include/linux/pm_opp.h | 8 +++++ 4 files changed, 115 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 02a69a62dac8..cef2ccda355d 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -823,6 +823,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) return NULL; mutex_init(&opp_table->lock); + mutex_init(&opp_table->genpd_virt_dev_lock); INIT_LIST_HEAD(&opp_table->dev_list); opp_dev = _add_opp_dev(dev, opp_table); @@ -920,6 +921,7 @@ static void _opp_table_kref_release(struct kref *kref) _remove_opp_dev(opp_dev, opp_table); } + mutex_destroy(&opp_table->genpd_virt_dev_lock); mutex_destroy(&opp_table->lock); list_del(&opp_table->node); kfree(opp_table); @@ -1602,6 +1604,92 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) } EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper); +/** + * dev_pm_opp_set_genpd_virt_dev - Set virtual genpd device for an index + * @dev: Consumer device for which the genpd device is getting set. + * @virt_dev: virtual genpd device. + * @index: index. + * + * Multiple generic power domains for a device are supported with the help of + * virtual genpd devices, which are created for each consumer device - genpd + * pair. These are the device structures which are attached to the power domain + * and are required by the OPP core to set the performance state of the genpd. + * + * This helper will normally be called by the consumer driver of the device + * "dev", as only that has details of the genpd devices. + * + * This helper needs to be called once for each of those virtual devices, but + * only if multiple domains are available for a device. Otherwise the original + * device structure will be used instead by the OPP core. + */ +struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, + struct device *virt_dev, + int index) +{ + struct opp_table *opp_table; + + opp_table = dev_pm_opp_get_opp_table(dev); + if (!opp_table) + return ERR_PTR(-ENOMEM); + + mutex_lock(&opp_table->genpd_virt_dev_lock); + + if (unlikely(!opp_table->genpd_virt_devs || + index >= opp_table->required_opp_count || + opp_table->genpd_virt_devs[index])) { + + dev_err(dev, "Invalid request to set required device\n"); + dev_pm_opp_put_opp_table(opp_table); + mutex_unlock(&opp_table->genpd_virt_dev_lock); + + return ERR_PTR(-EINVAL); + } + + opp_table->genpd_virt_devs[index] = virt_dev; + mutex_unlock(&opp_table->genpd_virt_dev_lock); + + return opp_table; +} + +/** + * dev_pm_opp_put_genpd_virt_dev() - Releases resources blocked for genpd device. + * @opp_table: OPP table returned by dev_pm_opp_set_genpd_virt_dev(). + * @virt_dev: virtual genpd device. + * + * This releases the resource previously acquired with a call to + * dev_pm_opp_set_genpd_virt_dev(). The consumer driver shall call this helper + * if it doesn't want OPP core to update performance state of a power domain + * anymore. + */ +void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, + struct device *virt_dev) +{ + int i; + + /* + * Acquire genpd_virt_dev_lock to make sure virt_dev isn't getting + * used in parallel. + */ + mutex_lock(&opp_table->genpd_virt_dev_lock); + + for (i = 0; i < opp_table->required_opp_count; i++) { + if (opp_table->genpd_virt_devs[i] != virt_dev) + continue; + + opp_table->genpd_virt_devs[i] = NULL; + dev_pm_opp_put_opp_table(opp_table); + + /* Drop the vote */ + dev_pm_genpd_set_performance_state(virt_dev, 0); + break; + } + + mutex_unlock(&opp_table->genpd_virt_dev_lock); + + if (unlikely(i == opp_table->required_opp_count)) + dev_err(virt_dev, "Failed to find required device entry\n"); +} + /** * dev_pm_opp_add() - Add an OPP table from a table definitions * @dev: device for which we do this operation diff --git a/drivers/opp/of.c b/drivers/opp/of.c index ffaeefef98ce..71aef28953c2 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -134,6 +134,7 @@ static struct opp_table *_find_table_of_opp_np(struct device_node *opp_np) static void _opp_table_free_required_tables(struct opp_table *opp_table) { struct opp_table **required_opp_tables = opp_table->required_opp_tables; + struct device **genpd_virt_devs = opp_table->genpd_virt_devs; int i; if (!required_opp_tables) @@ -147,8 +148,10 @@ static void _opp_table_free_required_tables(struct opp_table *opp_table) } kfree(required_opp_tables); + kfree(genpd_virt_devs); opp_table->required_opp_count = 0; + opp_table->genpd_virt_devs = NULL; opp_table->required_opp_tables = NULL; } @@ -161,6 +164,7 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, struct device_node *opp_np) { struct opp_table **required_opp_tables; + struct device **genpd_virt_devs = NULL; struct device_node *required_np, *np; int count, i; @@ -175,11 +179,21 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, if (!count) goto put_np; + if (count > 1) { + genpd_virt_devs = kcalloc(count, sizeof(*genpd_virt_devs), + GFP_KERNEL); + if (!genpd_virt_devs) + goto put_np; + } + required_opp_tables = kcalloc(count, sizeof(*required_opp_tables), GFP_KERNEL); - if (!required_opp_tables) + if (!required_opp_tables) { + kfree(genpd_virt_devs); goto put_np; + } + opp_table->genpd_virt_devs = genpd_virt_devs; opp_table->required_opp_tables = required_opp_tables; opp_table->required_opp_count = count; diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 24b340ad18d1..8aec38792cae 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -135,6 +135,8 @@ enum opp_table_access { * @parsed_static_opps: True if OPPs are initialized from DT. * @shared_opp: OPP is shared between multiple devices. * @suspend_opp: Pointer to OPP to be used during device suspend. + * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers. + * @genpd_virt_devs: List of virtual devices for multiple genpd support. * @required_opp_tables: List of device OPP tables that are required by OPPs in * this table. * @required_opp_count: Number of required devices. @@ -177,6 +179,8 @@ struct opp_table { enum opp_table_access shared_opp; struct dev_pm_opp *suspend_opp; + struct mutex genpd_virt_dev_lock; + struct device **genpd_virt_devs; struct opp_table **required_opp_tables; unsigned int required_opp_count; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5d399eeef172..8fed222c089b 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -126,6 +126,8 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index); +void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -272,6 +274,12 @@ static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} +static inline struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev) {} static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { return -ENOTSUPP; -- cgit v1.2.3 From 4c6a343e57fe241fa30ab31ac4969561272cc6b2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Jun 2018 16:29:50 +0530 Subject: OPP: Rename and relocate of_genpd_opp_to_performance_state() The OPP core already has the performance state values for each of the genpd's OPPs and there is no need to call the genpd callback again to get the performance state for the case where the end device doesn't have an OPP table and has the "required-opps" property directly in its node. This commit renames of_genpd_opp_to_performance_state() as of_get_required_opp_performance_state() and moves it to the OPP core, as it is all about OPP stuff now. Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar --- drivers/base/power/domain.c | 48 --------------------------------------------- drivers/opp/of.c | 44 +++++++++++++++++++++++++++++++++++++++++ include/linux/pm_domain.h | 9 --------- include/linux/pm_opp.h | 5 +++++ 4 files changed, 49 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 7be8c94c6b7f..8e554e6a82a2 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2552,54 +2552,6 @@ unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev, } EXPORT_SYMBOL_GPL(pm_genpd_opp_to_performance_state); -/** - * of_genpd_opp_to_performance_state- Gets performance state of device's - * power domain corresponding to a DT node's "required-opps" property. - * - * @dev: Device for which the performance-state needs to be found. - * @np: DT node where the "required-opps" property is present. This can be - * the device node itself (if it doesn't have an OPP table) or a node - * within the OPP table of a device (if device has an OPP table). - * - * Returns performance state corresponding to the "required-opps" property of - * a DT node. This calls platform specific genpd->opp_to_performance_state() - * callback to translate power domain OPP to performance state. - * - * Returns performance state on success and 0 on failure. - */ -unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *np) -{ - struct generic_pm_domain *genpd; - struct dev_pm_opp *opp; - int state = 0; - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return 0; - - if (unlikely(!genpd->set_performance_state)) - return 0; - - genpd_lock(genpd); - - opp = of_dev_pm_opp_find_required_opp(&genpd->dev, np); - if (IS_ERR(opp)) { - dev_err(dev, "Failed to find required OPP: %ld\n", - PTR_ERR(opp)); - goto unlock; - } - - state = genpd->opp_to_performance_state(genpd, opp); - dev_pm_opp_put(opp); - -unlock: - genpd_unlock(genpd); - - return state; -} -EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state); - static int __init genpd_bus_init(void) { return bus_register(&genpd_bus_type); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 4e494720ac25..369d63a58ac4 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -969,6 +969,50 @@ put_cpu_node: } EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus); +/** + * of_get_required_opp_performance_state() - Search for required OPP and return its performance state. + * @np: Node that contains the "required-opps" property. + * @index: Index of the phandle to parse. + * + * Returns the performance state of the OPP pointed out by the "required-opps" + * property at @index in @np. + * + * Return: Positive performance state on success, otherwise 0 on errors. + */ +unsigned int of_get_required_opp_performance_state(struct device_node *np, + int index) +{ + struct dev_pm_opp *opp; + struct device_node *required_np; + struct opp_table *opp_table; + unsigned int pstate = 0; + + required_np = of_parse_required_opp(np, index); + if (!required_np) + return 0; + + opp_table = _find_table_of_opp_np(required_np); + if (IS_ERR(opp_table)) { + pr_err("%s: Failed to find required OPP table %pOF: %ld\n", + __func__, np, PTR_ERR(opp_table)); + goto put_required_np; + } + + opp = _find_opp_of_np(opp_table, required_np); + if (opp) { + pstate = opp->pstate; + dev_pm_opp_put(opp); + } + + dev_pm_opp_put_opp_table(opp_table); + +put_required_np: + of_node_put(required_np); + + return pstate; +} +EXPORT_SYMBOL_GPL(of_get_required_opp_performance_state); + /** * of_dev_pm_opp_find_required_opp() - Search for required OPP. * @dev: The device whose OPP node is referenced by the 'np' DT node. diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 4f803f934308..642036952553 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -260,8 +260,6 @@ int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev, struct dev_pm_opp *opp); -unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *np); int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, @@ -308,13 +306,6 @@ pm_genpd_opp_to_performance_state(struct device *genpd_dev, return 0; } -static inline unsigned int -of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *np) -{ - return 0; -} - static inline int genpd_dev_pm_attach(struct device *dev) { return 0; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 8fed222c089b..889bb347fbd9 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -315,6 +315,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); +unsigned int of_get_required_opp_performance_state(struct device_node *np, int index); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -357,6 +358,10 @@ static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) { return NULL; } +static inline unsigned int of_get_required_opp_performance_state(struct device_node *np, int index) +{ + return 0; +} #endif #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 534245cc69c2a3597d8ed0e7782ae3f563e92c68 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Jun 2018 16:33:25 +0530 Subject: OPP: Remove of_dev_pm_opp_find_required_opp() This isn't used anymore, remove it. Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 54 -------------------------------------------------- include/linux/pm_opp.h | 5 ----- 2 files changed, 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 369d63a58ac4..3740822b4197 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1013,60 +1013,6 @@ put_required_np: } EXPORT_SYMBOL_GPL(of_get_required_opp_performance_state); -/** - * of_dev_pm_opp_find_required_opp() - Search for required OPP. - * @dev: The device whose OPP node is referenced by the 'np' DT node. - * @np: Node that contains the "required-opps" property. - * - * Returns the OPP of the device 'dev', whose phandle is present in the "np" - * node. Although the "required-opps" property supports having multiple - * phandles, this helper routine only parses the very first phandle in the list. - * - * Return: Matching opp, else returns ERR_PTR in case of error and should be - * handled using IS_ERR. - * - * The callers are required to call dev_pm_opp_put() for the returned OPP after - * use. - */ -struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, - struct device_node *np) -{ - struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ENODEV); - struct device_node *required_np; - struct opp_table *opp_table; - - opp_table = _find_opp_table(dev); - if (IS_ERR(opp_table)) - return ERR_CAST(opp_table); - - required_np = of_parse_phandle(np, "required-opps", 0); - if (unlikely(!required_np)) { - dev_err(dev, "Unable to parse required-opps\n"); - goto put_opp_table; - } - - mutex_lock(&opp_table->lock); - - list_for_each_entry(temp_opp, &opp_table->opp_list, node) { - if (temp_opp->available && temp_opp->np == required_np) { - opp = temp_opp; - - /* Increment the reference count of OPP */ - dev_pm_opp_get(opp); - break; - } - } - - mutex_unlock(&opp_table->lock); - - of_node_put(required_np); -put_opp_table: - dev_pm_opp_put_opp_table(opp_table); - - return opp; -} -EXPORT_SYMBOL_GPL(of_dev_pm_opp_find_required_opp); - /** * dev_pm_opp_get_of_node() - Gets the DT node corresponding to an opp * @opp: opp for which DT node has to be returned for diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 889bb347fbd9..2b2c3fd985ab 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -313,7 +313,6 @@ int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask); int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); -struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); unsigned int of_get_required_opp_performance_state(struct device_node *np, int index); #else @@ -350,10 +349,6 @@ static inline struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device return NULL; } -static inline struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np) -{ - return NULL; -} static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp) { return NULL; -- cgit v1.2.3 From 48207d7595d2be604e21228e5a93aaff17e4b808 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 5 Oct 2018 21:42:06 +0200 Subject: gpio: drop devm_gpiochip_remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is hardly any reason to call devm_gpiochip_remove() because the driver core handles calling gpiochip_remove() automatically. To make it harder to introduce new (and probably unneeded) callers, drop the function. Signed-off-by: Uwe Kleine-König Signed-off-by: Linus Walleij --- Documentation/driver-model/devres.txt | 1 - drivers/gpio/gpiolib.c | 18 +----------------- include/linux/gpio/driver.h | 1 - 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 43681ca0837f..48aa1ef80d75 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -255,7 +255,6 @@ GPIO devm_gpiod_get_optional() devm_gpiod_put() devm_gpiochip_add_data() - devm_gpiochip_remove() devm_gpio_request() devm_gpio_request_one() devm_gpio_free() diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 230e41562462..9ccc096a0df7 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1524,6 +1524,7 @@ static int devm_gpio_chip_match(struct device *dev, void *res, void *data) return *r == data; } + /** * devm_gpiochip_add_data() - Resource manager gpiochip_add_data() * @dev: pointer to the device that gpio_chip belongs to. @@ -1563,23 +1564,6 @@ int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip, } EXPORT_SYMBOL_GPL(devm_gpiochip_add_data); -/** - * devm_gpiochip_remove() - Resource manager of gpiochip_remove() - * @dev: device for which which resource was allocated - * @chip: the chip to remove - * - * A gpio_chip with any GPIOs still requested may not be removed. - */ -void devm_gpiochip_remove(struct device *dev, struct gpio_chip *chip) -{ - int ret; - - ret = devres_release(dev, devm_gpio_chip_release, - devm_gpio_chip_match, chip); - WARN_ON(ret); -} -EXPORT_SYMBOL_GPL(devm_gpiochip_remove); - /** * gpiochip_find() - iterator for locating a specific gpio_chip * @data: data to pass to match function diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 2db62b550b95..f70d976e1395 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -422,7 +422,6 @@ static inline int gpiochip_add(struct gpio_chip *chip) extern void gpiochip_remove(struct gpio_chip *chip); extern int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip, void *data); -extern void devm_gpiochip_remove(struct device *dev, struct gpio_chip *chip); extern struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *chip, void *data)); -- cgit v1.2.3 From 18534df419041e6c1f4b41af56ee7d41f757815c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 1 Nov 2018 21:12:50 +0800 Subject: gpiolib: Fix possible use after free on label gpiod_request_commit() copies the pointer to the label passed as an argument only to be used later. But there's a chance the caller could immediately free the passed string(e.g., local variable). This could trigger a use after free when we use gpio label(e.g., gpiochip_unlock_as_irq(), gpiochip_is_requested()). To be on the safe side: duplicate the string with kstrdup_const() so that if an unaware user passes an address to a stack-allocated buffer, we won't get the arbitrary label. Also fix gpiod_set_consumer_name(). Signed-off-by: Muchun Song Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 25 +++++++++++++++++++++---- include/linux/gpio/consumer.h | 6 ++++-- 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9ccc096a0df7..2a9d50678aa1 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2282,6 +2282,12 @@ static int gpiod_request_commit(struct gpio_desc *desc, const char *label) unsigned long flags; unsigned offset; + if (label) { + label = kstrdup_const(label, GFP_KERNEL); + if (!label) + return -ENOMEM; + } + spin_lock_irqsave(&gpio_lock, flags); /* NOTE: gpio_request() can be called in early boot, @@ -2292,6 +2298,7 @@ static int gpiod_request_commit(struct gpio_desc *desc, const char *label) desc_set_label(desc, label ? : "?"); status = 0; } else { + kfree_const(label); status = -EBUSY; goto done; } @@ -2308,6 +2315,7 @@ static int gpiod_request_commit(struct gpio_desc *desc, const char *label) if (status < 0) { desc_set_label(desc, NULL); + kfree_const(label); clear_bit(FLAG_REQUESTED, &desc->flags); goto done; } @@ -2403,6 +2411,7 @@ static bool gpiod_free_commit(struct gpio_desc *desc) chip->free(chip, gpio_chip_hwgpio(desc)); spin_lock_irqsave(&gpio_lock, flags); } + kfree_const(desc->label); desc_set_label(desc, NULL); clear_bit(FLAG_ACTIVE_LOW, &desc->flags); clear_bit(FLAG_REQUESTED, &desc->flags); @@ -3358,11 +3367,19 @@ EXPORT_SYMBOL_GPL(gpiod_cansleep); * @desc: gpio to set the consumer name on * @name: the new consumer name */ -void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name) +int gpiod_set_consumer_name(struct gpio_desc *desc, const char *name) { - VALIDATE_DESC_VOID(desc); - /* Just overwrite whatever the previous name was */ - desc->label = name; + VALIDATE_DESC(desc); + if (name) { + name = kstrdup_const(name, GFP_KERNEL); + if (!name) + return -ENOMEM; + } + + kfree_const(desc->label); + desc_set_label(desc, name); + + return 0; } EXPORT_SYMBOL_GPL(gpiod_set_consumer_name); diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index f2f887795d43..ed070512b40e 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -162,7 +162,7 @@ int gpiod_is_active_low(const struct gpio_desc *desc); int gpiod_cansleep(const struct gpio_desc *desc); int gpiod_to_irq(const struct gpio_desc *desc); -void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name); +int gpiod_set_consumer_name(struct gpio_desc *desc, const char *name); /* Convert between the old gpio_ and new gpiod_ interfaces */ struct gpio_desc *gpio_to_desc(unsigned gpio); @@ -495,10 +495,12 @@ static inline int gpiod_to_irq(const struct gpio_desc *desc) return -EINVAL; } -static inline void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name) +static inline int gpiod_set_consumer_name(struct gpio_desc *desc, + const char *name) { /* GPIO can never have been requested */ WARN_ON(1); + return -EINVAL; } static inline struct gpio_desc *gpio_to_desc(unsigned gpio) -- cgit v1.2.3 From b0e137ad24b6cc36a4ab09558a401e124163eefb Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 15 Oct 2018 21:41:28 +0200 Subject: mtd: rawnand: Provide helper for polling GPIO R/B pin Each controller driver having access to NAND R/B pin over GPIO would have to reimplement the polling loop otherwise. Suggested-by: Boris Brezillon Signed-off-by: Janusz Krzysztofik Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 31 +++++++++++++++++++++++++++++++ include/linux/mtd/rawnand.h | 4 ++++ 2 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 05bd0779fe9b..0d5a2dc59b8d 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "internals.h" @@ -531,6 +532,36 @@ int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms) }; EXPORT_SYMBOL_GPL(nand_soft_waitrdy); +/** + * nand_gpio_waitrdy - Poll R/B GPIO pin until ready + * @chip: NAND chip structure + * @gpiod: GPIO descriptor of R/B pin + * @timeout_ms: Timeout in ms + * + * Poll the R/B GPIO pin until it becomes ready. If that does not happen + * whitin the specified timeout, -ETIMEDOUT is returned. + * + * This helper is intended to be used when the controller has access to the + * NAND R/B pin over GPIO. + * + * Return 0 if the R/B pin indicates chip is ready, a negative error otherwise. + */ +int nand_gpio_waitrdy(struct nand_chip *chip, struct gpio_desc *gpiod, + unsigned long timeout_ms) +{ + /* Wait until R/B pin indicates chip is ready or timeout occurs */ + timeout_ms = jiffies + msecs_to_jiffies(timeout_ms); + do { + if (gpiod_get_value_cansleep(gpiod)) + return 0; + + cond_resched(); + } while (time_before(jiffies, timeout_ms)); + + return gpiod_get_value_cansleep(gpiod) ? 0 : -ETIMEDOUT; +}; +EXPORT_SYMBOL_GPL(nand_gpio_waitrdy); + /** * panic_nand_get_device - [GENERIC] Get chip for selected access * @chip: the nand chip descriptor diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index e10b126e148f..4e91a70ede10 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1346,4 +1346,8 @@ void nand_release(struct nand_chip *chip); */ int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms); +struct gpio_desc; +int nand_gpio_waitrdy(struct nand_chip *chip, struct gpio_desc *gpiod, + unsigned long timeout_ms); + #endif /* __LINUX_MTD_RAWNAND_H */ -- cgit v1.2.3 From 6da4b3ab9a6e9b1b5f90322ab3fa3a7dd18edb19 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 2 Nov 2018 22:59:51 +0800 Subject: genirq/affinity: Add support for allocating interrupt sets A driver may have a need to allocate multiple sets of MSI/MSI-X interrupts, and have them appropriately affinitized. Add support for defining a number of sets in the irq_affinity structure, of varying sizes, and get each set affinitized correctly across the machine. [ tglx: Minor changelog tweaks ] Signed-off-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Cc: linux-block@vger.kernel.org Link: https://lkml.kernel.org/r/20181102145951.31979-5-ming.lei@redhat.com --- drivers/pci/msi.c | 14 +++++++++ include/linux/interrupt.h | 4 +++ kernel/irq/affinity.c | 77 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 72 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index af24ed50a245..265ed3e4c920 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1036,6 +1036,13 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (maxvec < minvec) return -ERANGE; + /* + * If the caller is passing in sets, we can't support a range of + * vectors. The caller needs to handle that. + */ + if (affd && affd->nr_sets && minvec != maxvec) + return -EINVAL; + if (WARN_ON_ONCE(dev->msi_enabled)) return -EINVAL; @@ -1087,6 +1094,13 @@ static int __pci_enable_msix_range(struct pci_dev *dev, if (maxvec < minvec) return -ERANGE; + /* + * If the caller is passing in sets, we can't support a range of + * supported vectors. The caller needs to handle that. + */ + if (affd && affd->nr_sets && minvec != maxvec) + return -EINVAL; + if (WARN_ON_ONCE(dev->msix_enabled)) return -EINVAL; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 1d6711c28271..ca397ff40836 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -247,10 +247,14 @@ struct irq_affinity_notify { * the MSI(-X) vector space * @post_vectors: Don't apply affinity to @post_vectors at end of * the MSI(-X) vector space + * @nr_sets: Length of passed in *sets array + * @sets: Number of affinitized sets */ struct irq_affinity { int pre_vectors; int post_vectors; + int nr_sets; + int *sets; }; #if defined(CONFIG_SMP) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index e028b773e38a..08c904eb7279 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -171,28 +171,29 @@ out: * 2) spread other possible CPUs on these vectors */ static int irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, + int startvec, int numvecs, int firstvec, cpumask_var_t *node_to_cpumask, struct cpumask *masks) { - int curvec = startvec, usedvecs = -1; + int curvec = startvec, nr_present, nr_others; + int ret = -ENOMEM; cpumask_var_t nmsk, npresmsk; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) - return usedvecs; + return ret; if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) goto fail; + ret = 0; /* Stabilize the cpumasks */ get_online_cpus(); build_node_to_cpumask(node_to_cpumask); /* Spread on present CPUs starting from affd->pre_vectors */ - usedvecs = __irq_build_affinity_masks(affd, curvec, numvecs, - affd->pre_vectors, - node_to_cpumask, - cpu_present_mask, nmsk, masks); + nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, + firstvec, node_to_cpumask, + cpu_present_mask, nmsk, masks); /* * Spread on non present CPUs starting from the next vector to be @@ -200,23 +201,24 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, * vector space, assign the non present CPUs to the already spread * out vectors. */ - if (usedvecs >= numvecs) - curvec = affd->pre_vectors; + if (nr_present >= numvecs) + curvec = firstvec; else - curvec = affd->pre_vectors + usedvecs; + curvec = firstvec + nr_present; cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); - usedvecs += __irq_build_affinity_masks(affd, curvec, numvecs, - affd->pre_vectors, - node_to_cpumask, npresmsk, - nmsk, masks); + nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, + firstvec, node_to_cpumask, + npresmsk, nmsk, masks); put_online_cpus(); + if (nr_present < numvecs) + WARN_ON(nr_present + nr_others < numvecs); + free_cpumask_var(npresmsk); fail: free_cpumask_var(nmsk); - - return usedvecs; + return ret; } /** @@ -233,6 +235,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) int curvec, usedvecs; cpumask_var_t *node_to_cpumask; struct cpumask *masks = NULL; + int i, nr_sets; /* * If there aren't any vectors left after applying the pre/post @@ -253,8 +256,28 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) for (curvec = 0; curvec < affd->pre_vectors; curvec++) cpumask_copy(masks + curvec, irq_default_affinity); - usedvecs = irq_build_affinity_masks(affd, curvec, affvecs, - node_to_cpumask, masks); + /* + * Spread on present CPUs starting from affd->pre_vectors. If we + * have multiple sets, build each sets affinity mask separately. + */ + nr_sets = affd->nr_sets; + if (!nr_sets) + nr_sets = 1; + + for (i = 0, usedvecs = 0; i < nr_sets; i++) { + int this_vecs = affd->sets ? affd->sets[i] : affvecs; + int ret; + + ret = irq_build_affinity_masks(affd, curvec, this_vecs, + curvec, node_to_cpumask, masks); + if (ret) { + kfree(masks); + masks = NULL; + goto outnodemsk; + } + curvec += this_vecs; + usedvecs += this_vecs; + } /* Fill out vectors at the end that don't need affinity */ if (usedvecs >= affvecs) @@ -279,13 +302,21 @@ int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity { int resv = affd->pre_vectors + affd->post_vectors; int vecs = maxvec - resv; - int ret; + int set_vecs; if (resv > minvec) return 0; - get_online_cpus(); - ret = min_t(int, cpumask_weight(cpu_possible_mask), vecs) + resv; - put_online_cpus(); - return ret; + if (affd->nr_sets) { + int i; + + for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) + set_vecs += affd->sets[i]; + } else { + get_online_cpus(); + set_vecs = cpumask_weight(cpu_possible_mask); + put_online_cpus(); + } + + return resv + min(set_vecs, vecs); } -- cgit v1.2.3 From 61d0de0543a6e982918c6054a6a12cfbdd73018a Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 30 Oct 2018 09:55:07 -0500 Subject: regulator: pfuze100-regulator: add coin support to PF0100 The driver currently supports coin cell / super cap charging, so this patch extends it to support PF0100. Signed-off-by: Adam Ford Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown --- drivers/regulator/pfuze100-regulator.c | 2 ++ include/linux/regulator/pfuze100.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index dd41a9bb3f5c..df5df1c495ad 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -370,6 +370,7 @@ static struct pfuze_regulator pfuze100_regulators[] = { PFUZE100_VGEN_REG(PFUZE100, VGEN4, PFUZE100_VGEN4VOL, 1800000, 3300000, 100000), PFUZE100_VGEN_REG(PFUZE100, VGEN5, PFUZE100_VGEN5VOL, 1800000, 3300000, 100000), PFUZE100_VGEN_REG(PFUZE100, VGEN6, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000), + PFUZE100_COIN_REG(PFUZE100, COIN, PFUZE100_COINVOL, 0x7, pfuze100_coin), }; static struct pfuze_regulator pfuze200_regulators[] = { @@ -436,6 +437,7 @@ static struct of_regulator_match pfuze100_matches[] = { { .name = "vgen4", }, { .name = "vgen5", }, { .name = "vgen6", }, + { .name = "coin", }, }; /* PFUZE200 */ diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h index cb5aecd40f07..331d7d940c7a 100644 --- a/include/linux/regulator/pfuze100.h +++ b/include/linux/regulator/pfuze100.h @@ -33,7 +33,8 @@ #define PFUZE100_VGEN4 12 #define PFUZE100_VGEN5 13 #define PFUZE100_VGEN6 14 -#define PFUZE100_MAX_REGULATOR 15 +#define PFUZE100_COIN 15 +#define PFUZE100_MAX_REGULATOR 16 #define PFUZE200_SW1AB 0 #define PFUZE200_SW2 1 -- cgit v1.2.3 From 5e1abdc3fe56939d9ac34209706b1a527b77b61b Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 6 Nov 2018 10:45:36 -0500 Subject: net: skbuff.h: remove unnecessary unlikely() WARN_ON() already contains an unlikely(), so it's not necessary to use unlikely. Signed-off-by: Yangtao Li Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0ba687454267..7dcfb5591dc3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2508,10 +2508,8 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { - if (unlikely(skb_is_nonlinear(skb))) { - WARN_ON(1); + if (WARN_ON(skb_is_nonlinear(skb))) return; - } skb->len = len; skb_set_tail_pointer(skb, len); } -- cgit v1.2.3 From 23b5f73266e59a598c1e5dd435d87651b5a7626b Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 1 Oct 2018 12:45:00 -0700 Subject: usb: typec: tcpm: Do not disconnect link for self powered devices During HARD_RESET the data link is disconnected. For self powered device, the spec is advising against doing that. >From USB_PD_R3_0 7.1.5 Response to Hard Resets Device operation during and after a Hard Reset is defined as follows: Self-powered devices Should Not disconnect from USB during a Hard Reset (see Section 9.1.2). Bus powered devices will disconnect from USB during a Hard Reset due to the loss of their power source. Tackle this by letting TCPM know whether the device is self or bus powered. This overcomes unnecessary port disconnections from hard reset. Also, speeds up the enumeration time when connected to Type-A ports. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus --------- Version history: V3: Rebase on top of usb-next V2: Based on feedback from heikki.krogerus@linux.intel.com - self_powered added to the struct tcpm_port which is populated from a. "connector" node of the device tree in tcpm_fw_get_caps() b. "self_powered" node of the tcpc_config in tcpm_copy_caps Based on feedbase from linux@roeck-us.net - Code was refactored - SRC_HARD_RESET_VBUS_OFF sets the link state to false based on self_powered flag V1 located here: https://lkml.org/lkml/2018/9/13/94 Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 12 ++++++++++-- include/linux/usb/tcpm.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index dbbd71f754d0..ba6e5cdaed2c 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -317,6 +317,9 @@ struct tcpm_port { /* Deadline in jiffies to exit src_try_wait state */ unsigned long max_wait; + /* port belongs to a self powered device */ + bool self_powered; + #ifdef CONFIG_DEBUG_FS struct dentry *dentry; struct mutex logbuffer_lock; /* log buffer access lock */ @@ -3254,7 +3257,8 @@ static void run_state_machine(struct tcpm_port *port) case SRC_HARD_RESET_VBUS_OFF: tcpm_set_vconn(port, true); tcpm_set_vbus(port, false); - tcpm_set_roles(port, false, TYPEC_SOURCE, TYPEC_HOST); + tcpm_set_roles(port, port->self_powered, TYPEC_SOURCE, + TYPEC_HOST); tcpm_set_state(port, SRC_HARD_RESET_VBUS_ON, PD_T_SRC_RECOVER); break; case SRC_HARD_RESET_VBUS_ON: @@ -3267,7 +3271,8 @@ static void run_state_machine(struct tcpm_port *port) memset(&port->pps_data, 0, sizeof(port->pps_data)); tcpm_set_vconn(port, false); tcpm_set_charge(port, false); - tcpm_set_roles(port, false, TYPEC_SINK, TYPEC_DEVICE); + tcpm_set_roles(port, port->self_powered, TYPEC_SINK, + TYPEC_DEVICE); /* * VBUS may or may not toggle, depending on the adapter. * If it doesn't toggle, transition to SNK_HARD_RESET_SINK_ON @@ -4412,6 +4417,8 @@ sink: return -EINVAL; port->operating_snk_mw = mw / 1000; + port->self_powered = fwnode_property_read_bool(fwnode, "self-powered"); + return 0; } @@ -4720,6 +4727,7 @@ static int tcpm_copy_caps(struct tcpm_port *port, port->typec_caps.prefer_role = tcfg->default_role; port->typec_caps.type = tcfg->type; port->typec_caps.data = tcfg->data; + port->self_powered = port->tcpc->config->self_powered; return 0; } diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 7e7fbfb84e8e..50c74a77db55 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -89,6 +89,7 @@ struct tcpc_config { enum typec_port_data data; enum typec_role default_role; bool try_role_hw; /* try.{src,snk} implemented in hardware */ + bool self_powered; /* port belongs to a self powered device */ const struct typec_altmode_desc *alt_modes; }; -- cgit v1.2.3 From 64e3d12f769d60eaee6d2e53a9b7f0b3814f32ed Mon Sep 17 00:00:00 2001 From: Kuo-Hsin Yang Date: Tue, 6 Nov 2018 13:23:24 +0000 Subject: mm, drm/i915: mark pinned shmemfs pages as unevictable The i915 driver uses shmemfs to allocate backing storage for gem objects. These shmemfs pages can be pinned (increased ref count) by shmem_read_mapping_page_gfp(). When a lot of pages are pinned, vmscan wastes a lot of time scanning these pinned pages. In some extreme case, all pages in the inactive anon lru are pinned, and only the inactive anon lru is scanned due to inactive_ratio, the system cannot swap and invokes the oom-killer. Mark these pinned pages as unevictable to speed up vmscan. Export pagevec API check_move_unevictable_pages(). This patch was inspired by Chris Wilson's change [1]. [1]: https://patchwork.kernel.org/patch/9768741/ Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Peter Zijlstra Cc: Andrew Morton Cc: Dave Hansen Signed-off-by: Kuo-Hsin Yang Acked-by: Michal Hocko # mm part Reviewed-by: Chris Wilson Acked-by: Dave Hansen Acked-by: Andrew Morton Link: https://patchwork.freedesktop.org/patch/msgid/20181106132324.17390-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson --- Documentation/vm/unevictable-lru.rst | 6 +++++- drivers/gpu/drm/i915/i915_gem.c | 33 +++++++++++++++++++++++++++++---- include/linux/swap.h | 4 +++- mm/shmem.c | 2 +- mm/vmscan.c | 22 +++++++++++----------- 5 files changed, 49 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst index fdd84cb8d511..b8e29f977f2d 100644 --- a/Documentation/vm/unevictable-lru.rst +++ b/Documentation/vm/unevictable-lru.rst @@ -143,7 +143,7 @@ using a number of wrapper functions: Query the address space, and return true if it is completely unevictable. -These are currently used in two places in the kernel: +These are currently used in three places in the kernel: (1) By ramfs to mark the address spaces of its inodes when they are created, and this mark remains for the life of the inode. @@ -154,6 +154,10 @@ These are currently used in two places in the kernel: swapped out; the application must touch the pages manually if it wants to ensure they're in memory. + (3) By the i915 driver to mark pinned address space until it's unpinned. The + amount of unevictable memory marked by i915 driver is roughly the bounded + object size in debugfs/dri/0/i915_gem_objects. + Detecting Unevictable Pages --------------------------- diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 347b3836c809..5b80b0c14aed 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2382,11 +2382,23 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) invalidate_mapping_pages(mapping, 0, (loff_t)-1); } +/* + * Move pages to appropriate lru and release the pagevec, decrementing the + * ref count of those pages. + */ +static void check_release_pagevec(struct pagevec *pvec) +{ + check_move_unevictable_pages(pvec); + __pagevec_release(pvec); + cond_resched(); +} + static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, struct sg_table *pages) { struct sgt_iter sgt_iter; + struct pagevec pvec; struct page *page; __i915_gem_object_release_shmem(obj, pages, true); @@ -2396,6 +2408,9 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj, pages); + mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); + + pagevec_init(&pvec); for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); @@ -2403,9 +2418,11 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); - put_page(page); - cond_resched(); + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); obj->mm.dirty = false; sg_free_table(pages); @@ -2526,6 +2543,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); unsigned int sg_page_sizes; + struct pagevec pvec; gfp_t noreclaim; int ret; @@ -2561,6 +2579,7 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; + mapping_set_unevictable(mapping); noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; @@ -2675,8 +2694,14 @@ rebuild_st: err_sg: sg_mark_end(sg); err_pages: - for_each_sgt_page(page, sgt_iter, st) - put_page(page); + mapping_clear_unevictable(mapping); + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); sg_free_table(st); kfree(st); diff --git a/include/linux/swap.h b/include/linux/swap.h index 8e2c11e692ba..6c95df96c9aa 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -18,6 +18,8 @@ struct notifier_block; struct bio; +struct pagevec; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 @@ -373,7 +375,7 @@ static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, #endif extern int page_evictable(struct page *page); -extern void check_move_unevictable_pages(struct page **, int nr_pages); +extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); extern void kswapd_stop(int nid); diff --git a/mm/shmem.c b/mm/shmem.c index 446942677cd4..0c3b005a59eb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -781,7 +781,7 @@ void shmem_unlock_mapping(struct address_space *mapping) break; index = indices[pvec.nr - 1] + 1; pagevec_remove_exceptionals(&pvec); - check_move_unevictable_pages(pvec.pages, pvec.nr); + check_move_unevictable_pages(&pvec); pagevec_release(&pvec); cond_resched(); } diff --git a/mm/vmscan.c b/mm/vmscan.c index c7ce2c161225..0dbc493026a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -4162,17 +4163,16 @@ int page_evictable(struct page *page) return ret; } -#ifdef CONFIG_SHMEM /** - * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list - * @pages: array of pages to check - * @nr_pages: number of pages to check + * check_move_unevictable_pages - check pages for evictability and move to + * appropriate zone lru list + * @pvec: pagevec with lru pages to check * - * Checks pages for evictability and moves them to the appropriate lru list. - * - * This function is only used for SysV IPC SHM_UNLOCK. + * Checks pages for evictability, if an evictable page is in the unevictable + * lru list, moves it to the appropriate evictable lru list. This function + * should be only used for lru pages. */ -void check_move_unevictable_pages(struct page **pages, int nr_pages) +void check_move_unevictable_pages(struct pagevec *pvec) { struct lruvec *lruvec; struct pglist_data *pgdat = NULL; @@ -4180,8 +4180,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) int pgrescued = 0; int i; - for (i = 0; i < nr_pages; i++) { - struct page *page = pages[i]; + for (i = 0; i < pvec->nr; i++) { + struct page *page = pvec->pages[i]; struct pglist_data *pagepgdat = page_pgdat(page); pgscanned++; @@ -4213,4 +4213,4 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) spin_unlock_irq(&pgdat->lru_lock); } } -#endif /* CONFIG_SHMEM */ +EXPORT_SYMBOL_GPL(check_move_unevictable_pages); -- cgit v1.2.3 From 5132b3d283710d196cd8af99b5585507e8b30709 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 1 Nov 2018 22:25:04 +0100 Subject: spi: gpio: Support 3WIRE high-impedance turn-around Some devices such as the TPO TPG110 display panel require a "high-impedance turn-around", in effect a clock cycle after switching the line from output to input mode. Support this in the GPIO driver to begin with. Other driver may implement it if they can, it is unclear if this can be achieved with anything else than GPIO bit-banging. Cc: Andrzej Hajda Acked-by: Lorenzo Bianconi Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- drivers/spi/spi-gpio.c | 24 +++++++++++++++++++++--- include/linux/spi/spi.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c index 45973ee3ae11..a4aee26028cd 100644 --- a/drivers/spi/spi-gpio.c +++ b/drivers/spi/spi-gpio.c @@ -256,11 +256,29 @@ static int spi_gpio_setup(struct spi_device *spi) static int spi_gpio_set_direction(struct spi_device *spi, bool output) { struct spi_gpio *spi_gpio = spi_to_spi_gpio(spi); + int ret; if (output) return gpiod_direction_output(spi_gpio->mosi, 1); - else - return gpiod_direction_input(spi_gpio->mosi); + + ret = gpiod_direction_input(spi_gpio->mosi); + if (ret) + return ret; + /* + * Send a turnaround high impedance cycle when switching + * from output to input. Theoretically there should be + * a clock delay here, but as has been noted above, the + * nsec delay function for bit-banged GPIO is simply + * {} because bit-banging just doesn't get fast enough + * anyway. + */ + if (spi->mode & SPI_3WIRE_HIZ) { + gpiod_set_value_cansleep(spi_gpio->sck, + !(spi->mode & SPI_CPOL)); + gpiod_set_value_cansleep(spi_gpio->sck, + !!(spi->mode & SPI_CPOL)); + } + return 0; } static void spi_gpio_cleanup(struct spi_device *spi) @@ -410,7 +428,7 @@ static int spi_gpio_probe(struct platform_device *pdev) return status; master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); - master->mode_bits = SPI_3WIRE | SPI_CPHA | SPI_CPOL; + master->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL; master->flags = master_flags; master->bus_num = pdev->id; /* The master needs to think there is a chipselect even if not connected */ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6be77fa5ab90..3ced58eebe1b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -155,6 +155,7 @@ struct spi_device { #define SPI_RX_DUAL 0x400 /* receive with 2 wires */ #define SPI_RX_QUAD 0x800 /* receive with 4 wires */ #define SPI_CS_WORD 0x1000 /* toggle cs after each word */ +#define SPI_3WIRE_HIZ 0x2000 /* high impedance turnaround */ int irq; void *controller_state; void *controller_data; -- cgit v1.2.3 From dedf7dce4cec5c0abe69f4fa6938d5100398220b Mon Sep 17 00:00:00 2001 From: "Woods, Brian" Date: Tue, 6 Nov 2018 20:08:14 +0000 Subject: hwmon/k10temp, x86/amd_nb: Consolidate shared device IDs Consolidate shared PCI_DEVICE_IDs that were scattered through k10temp and amd_nb, and move them into pci_ids. Signed-off-by: Brian Woods Signed-off-by: Borislav Petkov Acked-by: Guenter Roeck CC: Bjorn Helgaas CC: Clemens Ladisch CC: "H. Peter Anvin" CC: Ingo Molnar CC: Jean Delvare CC: Jia Zhang CC: CC: CC: Pu Wen CC: Thomas Gleixner CC: x86-ml Link: http://lkml.kernel.org/r/20181106200754.60722-2-brian.woods@amd.com --- arch/x86/kernel/amd_nb.c | 3 +-- drivers/hwmon/k10temp.c | 9 +-------- include/linux/pci_ids.h | 2 ++ 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index a6eca647bc76..19d489ee2b1e 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -11,13 +11,12 @@ #include #include #include +#include #include #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 -#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 -#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec /* Protect the PCI config register pairs used for SMN and DF indirect access. */ diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 2cef0c37ff6f..bc6871c8dd4e 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -41,14 +42,6 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F3 0x15b3 #endif -#ifndef PCI_DEVICE_ID_AMD_17H_DF_F3 -#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 -#endif - -#ifndef PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 -#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb -#endif - /* CPUID function 0x80000001, ebx */ #define CPUID_PKGTYPE_MASK 0xf0000000 #define CPUID_PKGTYPE_F 0x00000000 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 69f0abe1ba1a..78d5cd29778a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -545,6 +545,8 @@ #define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F3 0x1583 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F4 0x1584 +#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From be3518a16ef270e3b030a6ae96055f83f51bd3dd Mon Sep 17 00:00:00 2001 From: "Woods, Brian" Date: Tue, 6 Nov 2018 20:08:18 +0000 Subject: x86/amd_nb: Add PCI device IDs for family 17h, model 30h Add the PCI device IDs for family 17h model 30h, since they are needed for accessing various registers via the data fabric/SMN interface. Signed-off-by: Brian Woods Signed-off-by: Borislav Petkov CC: Bjorn Helgaas CC: Clemens Ladisch CC: Guenter Roeck CC: "H. Peter Anvin" CC: Ingo Molnar CC: Jean Delvare CC: Jia Zhang CC: CC: CC: Pu Wen CC: Thomas Gleixner CC: x86-ml Link: http://lkml.kernel.org/r/20181106200754.60722-4-brian.woods@amd.com --- arch/x86/kernel/amd_nb.c | 6 ++++++ include/linux/pci_ids.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index cc34266e3c62..cc51275c8759 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -16,8 +16,10 @@ #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 +#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -27,9 +29,11 @@ static u32 *flush_words; static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, {} }; + #define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704 const struct pci_device_id amd_nb_misc_ids[] = { @@ -43,6 +47,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; @@ -56,6 +61,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 78d5cd29778a..349276fbd269 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -547,6 +547,7 @@ #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F4 0x1584 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From 600335205b8d162891b5ef2e32343f5b8020efd8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Oct 2018 09:53:52 -0600 Subject: ide: convert to blk-mq ide-disk and ide-cd tested as working just fine, ide-tape and ide-floppy haven't. But the latter don't require changes, so they should work without issue. Add helper function to insert a request from a work queue, since we cannot invoke the blk-mq request insertion from IRQ context. Cc: David Miller Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/ide/ide-atapi.c | 25 ++++--- drivers/ide/ide-cd.c | 175 ++++++++++++++++++++++++++---------------------- drivers/ide/ide-disk.c | 5 +- drivers/ide/ide-io.c | 100 +++++++++++++++------------ drivers/ide/ide-park.c | 4 +- drivers/ide/ide-pm.c | 28 ++------ drivers/ide/ide-probe.c | 68 ++++++++++++++----- include/linux/ide.h | 13 +++- 8 files changed, 239 insertions(+), 179 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 8b2b72b93885..33210bc67618 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -172,8 +172,8 @@ EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); void ide_prep_sense(ide_drive_t *drive, struct request *rq) { struct request_sense *sense = &drive->sense_data; - struct request *sense_rq = drive->sense_rq; - struct scsi_request *req = scsi_req(sense_rq); + struct request *sense_rq; + struct scsi_request *req; unsigned int cmd_len, sense_len; int err; @@ -196,9 +196,16 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) if (ata_sense_request(rq) || drive->sense_rq_armed) return; + sense_rq = drive->sense_rq; + if (!sense_rq) { + sense_rq = blk_mq_alloc_request(drive->queue, REQ_OP_DRV_IN, + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + drive->sense_rq = sense_rq; + } + req = scsi_req(sense_rq); + memset(sense, 0, sizeof(*sense)); - blk_rq_init(rq->q, sense_rq); scsi_req_init(req); err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, @@ -207,6 +214,8 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) if (printk_ratelimit()) printk(KERN_WARNING PFX "%s: failed to map sense " "buffer\n", drive->name); + blk_mq_free_request(sense_rq); + drive->sense_rq = NULL; return; } @@ -226,6 +235,8 @@ EXPORT_SYMBOL_GPL(ide_prep_sense); int ide_queue_sense_rq(ide_drive_t *drive, void *special) { + struct request *sense_rq = drive->sense_rq; + /* deferred failure from ide_prep_sense() */ if (!drive->sense_rq_armed) { printk(KERN_WARNING PFX "%s: error queuing a sense request\n", @@ -233,12 +244,12 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special) return -ENOMEM; } - drive->sense_rq->special = special; + sense_rq->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; - elv_add_request(drive->queue, drive->sense_rq, ELEVATOR_INSERT_FRONT); + ide_insert_request_head(drive, sense_rq); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -270,10 +281,8 @@ void ide_retry_pc(ide_drive_t *drive) */ drive->hwif->rq = NULL; ide_requeue_and_plug(drive, failed_rq); - if (ide_queue_sense_rq(drive, pc)) { - blk_start_request(failed_rq); + if (ide_queue_sense_rq(drive, pc)) ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq)); - } } EXPORT_SYMBOL_GPL(ide_retry_pc); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f9b59d41813f..4ecaf2ace4cb 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -258,11 +258,22 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq) /* * take a breather */ - blk_delay_queue(drive->queue, 1); + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(drive->queue, 1); return 1; } } +static void ide_cd_free_sense(ide_drive_t *drive) +{ + if (!drive->sense_rq) + return; + + blk_mq_free_request(drive->sense_rq); + drive->sense_rq = NULL; + drive->sense_rq_armed = false; +} + /** * Returns: * 0: if the request should be continued. @@ -516,6 +527,82 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) return false; } +/* standard prep_rq_fn that builds 10 byte cmds */ +static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) +{ + int hard_sect = queue_logical_block_size(q); + long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); + unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); + struct scsi_request *req = scsi_req(rq); + + if (rq_data_dir(rq) == READ) + req->cmd[0] = GPCMD_READ_10; + else + req->cmd[0] = GPCMD_WRITE_10; + + /* + * fill in lba + */ + req->cmd[2] = (block >> 24) & 0xff; + req->cmd[3] = (block >> 16) & 0xff; + req->cmd[4] = (block >> 8) & 0xff; + req->cmd[5] = block & 0xff; + + /* + * and transfer length + */ + req->cmd[7] = (blocks >> 8) & 0xff; + req->cmd[8] = blocks & 0xff; + req->cmd_len = 10; + return BLKPREP_OK; +} + +/* + * Most of the SCSI commands are supported directly by ATAPI devices. + * This transform handles the few exceptions. + */ +static int ide_cdrom_prep_pc(struct request *rq) +{ + u8 *c = scsi_req(rq)->cmd; + + /* transform 6-byte read/write commands to the 10-byte version */ + if (c[0] == READ_6 || c[0] == WRITE_6) { + c[8] = c[4]; + c[5] = c[3]; + c[4] = c[2]; + c[3] = c[1] & 0x1f; + c[2] = 0; + c[1] &= 0xe0; + c[0] += (READ_10 - READ_6); + scsi_req(rq)->cmd_len = 10; + return BLKPREP_OK; + } + + /* + * it's silly to pretend we understand 6-byte sense commands, just + * reject with ILLEGAL_REQUEST and the caller should take the + * appropriate action + */ + if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { + scsi_req(rq)->result = ILLEGAL_REQUEST; + return BLKPREP_KILL; + } + + return BLKPREP_OK; +} + +static int ide_cdrom_prep_fn(ide_drive_t *drive, struct request *rq) +{ + if (!blk_rq_is_passthrough(rq)) { + scsi_req_init(scsi_req(rq)); + + return ide_cdrom_prep_fs(drive->queue, rq); + } else if (blk_rq_is_scsi(rq)) + return ide_cdrom_prep_pc(rq); + + return 0; +} + static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -675,7 +762,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) out_end: if (blk_rq_is_scsi(rq) && rc == 0) { scsi_req(rq)->resid_len = 0; - blk_end_request_all(rq, BLK_STS_OK); + blk_mq_end_request(rq, BLK_STS_OK); hwif->rq = NULL; } else { if (sense && uptodate) @@ -705,6 +792,8 @@ out_end: if (sense && rc == 2) ide_error(drive, "request sense failure", stat); } + + ide_cd_free_sense(drive); return ide_stopped; } @@ -729,7 +818,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) * We may be retrying this request after an error. Fix up any * weirdness which might be present in the request packet. */ - q->prep_rq_fn(q, rq); + ide_cdrom_prep_fn(drive, rq); } /* fs requests *must* be hardware frame aligned */ @@ -1323,82 +1412,6 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) return nslots; } -/* standard prep_rq_fn that builds 10 byte cmds */ -static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) -{ - int hard_sect = queue_logical_block_size(q); - long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); - unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); - struct scsi_request *req = scsi_req(rq); - - q->initialize_rq_fn(rq); - - if (rq_data_dir(rq) == READ) - req->cmd[0] = GPCMD_READ_10; - else - req->cmd[0] = GPCMD_WRITE_10; - - /* - * fill in lba - */ - req->cmd[2] = (block >> 24) & 0xff; - req->cmd[3] = (block >> 16) & 0xff; - req->cmd[4] = (block >> 8) & 0xff; - req->cmd[5] = block & 0xff; - - /* - * and transfer length - */ - req->cmd[7] = (blocks >> 8) & 0xff; - req->cmd[8] = blocks & 0xff; - req->cmd_len = 10; - return BLKPREP_OK; -} - -/* - * Most of the SCSI commands are supported directly by ATAPI devices. - * This transform handles the few exceptions. - */ -static int ide_cdrom_prep_pc(struct request *rq) -{ - u8 *c = scsi_req(rq)->cmd; - - /* transform 6-byte read/write commands to the 10-byte version */ - if (c[0] == READ_6 || c[0] == WRITE_6) { - c[8] = c[4]; - c[5] = c[3]; - c[4] = c[2]; - c[3] = c[1] & 0x1f; - c[2] = 0; - c[1] &= 0xe0; - c[0] += (READ_10 - READ_6); - scsi_req(rq)->cmd_len = 10; - return BLKPREP_OK; - } - - /* - * it's silly to pretend we understand 6-byte sense commands, just - * reject with ILLEGAL_REQUEST and the caller should take the - * appropriate action - */ - if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { - scsi_req(rq)->result = ILLEGAL_REQUEST; - return BLKPREP_KILL; - } - - return BLKPREP_OK; -} - -static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq) -{ - if (!blk_rq_is_passthrough(rq)) - return ide_cdrom_prep_fs(q, rq); - else if (blk_rq_is_scsi(rq)) - return ide_cdrom_prep_pc(rq); - - return 0; -} - struct cd_list_entry { const char *id_model; const char *id_firmware; @@ -1508,7 +1521,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) ide_debug_log(IDE_DBG_PROBE, "enter"); - blk_queue_prep_rq(q, ide_cdrom_prep_fn); + drive->prep_rq = ide_cdrom_prep_fn; blk_queue_dma_alignment(q, 31); blk_queue_update_dma_pad(q, 15); @@ -1569,7 +1582,7 @@ static void ide_cd_release(struct device *dev) if (devinfo->handle == drive) unregister_cdrom(devinfo); drive->driver_data = NULL; - blk_queue_prep_rq(drive->queue, NULL); + drive->prep_rq = NULL; g->private_data = NULL; put_disk(g); kfree(info); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index e3b4e659082d..f8567c8c9dd1 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -427,9 +427,8 @@ static void ide_disk_unlock_native_capacity(ide_drive_t *drive) drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */ } -static int idedisk_prep_fn(struct request_queue *q, struct request *rq) +static int idedisk_prep_fn(ide_drive_t *drive, struct request *rq) { - ide_drive_t *drive = q->queuedata; struct ide_cmd *cmd; if (req_op(rq) != REQ_OP_FLUSH) @@ -548,7 +547,7 @@ static void update_flush(ide_drive_t *drive) if (barrier) { wc = true; - blk_queue_prep_rq(drive->queue, idedisk_prep_fn); + drive->prep_rq = idedisk_prep_fn; } } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 0d93e0cfbeaf..5093c605c91c 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -67,7 +67,15 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error, ide_dma_on(drive); } - return blk_end_request(rq, error, nr_bytes); + if (!blk_update_request(rq, error, nr_bytes)) { + if (rq == drive->sense_rq) + drive->sense_rq = NULL; + + __blk_mq_end_request(rq, error); + return 0; + } + + return 1; } EXPORT_SYMBOL_GPL(ide_end_rq); @@ -307,8 +315,6 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - BUG_ON(!(rq->rq_flags & RQF_STARTED)); - #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", drive->hwif->name, (unsigned long) rq); @@ -320,6 +326,9 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) goto kill_rq; } + if (drive->prep_rq && drive->prep_rq(drive, rq)) + return ide_stopped; + if (ata_pm_request(rq)) ide_check_pm_state(drive, rq); @@ -430,44 +439,38 @@ static inline void ide_unlock_host(struct ide_host *host) } } -static void __ide_requeue_and_plug(struct request_queue *q, struct request *rq) -{ - if (rq) - blk_requeue_request(q, rq); - if (rq || blk_peek_request(q)) { - /* Use 3ms as that was the old plug delay */ - blk_delay_queue(q, 3); - } -} - void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); - __ide_requeue_and_plug(q, rq); - spin_unlock_irqrestore(q->queue_lock, flags); + /* Use 3ms as that was the old plug delay */ + if (rq) { + blk_mq_requeue_request(rq, false); + blk_mq_delay_kick_requeue_list(q, 3); + } else + blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3); } /* * Issue a new request to a device. */ -void do_ide_request(struct request_queue *q) +blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - ide_drive_t *drive = q->queuedata; + ide_drive_t *drive = hctx->queue->queuedata; ide_hwif_t *hwif = drive->hwif; struct ide_host *host = hwif->host; struct request *rq = NULL; ide_startstop_t startstop; - spin_unlock_irq(q->queue_lock); - /* HLD do_request() callback might sleep, make sure it's okay */ might_sleep(); if (ide_lock_host(host, hwif)) - goto plug_device_2; + return BLK_STS_DEV_RESOURCE; + + rq = bd->rq; + blk_mq_start_request(rq); spin_lock_irq(&hwif->lock); @@ -503,21 +506,16 @@ repeat: hwif->cur_dev = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - spin_unlock_irq(&hwif->lock); - spin_lock_irq(q->queue_lock); /* * we know that the queue isn't empty, but this can happen * if the q->prep_rq_fn() decides to kill a request */ - if (!rq) - rq = blk_fetch_request(drive->queue); - - spin_unlock_irq(q->queue_lock); - spin_lock_irq(&hwif->lock); - if (!rq) { - ide_unlock_port(hwif); - goto out; + rq = bd->rq; + if (!rq) { + ide_unlock_port(hwif); + goto out; + } } /* @@ -551,23 +549,24 @@ repeat: if (startstop == ide_stopped) { rq = hwif->rq; hwif->rq = NULL; - goto repeat; + if (rq) + goto repeat; + ide_unlock_port(hwif); + goto out; } - } else - goto plug_device; + } else { +plug_device: + spin_unlock_irq(&hwif->lock); + ide_unlock_host(host); + ide_requeue_and_plug(drive, rq); + return BLK_STS_OK; + } + out: spin_unlock_irq(&hwif->lock); if (rq == NULL) ide_unlock_host(host); - spin_lock_irq(q->queue_lock); - return; - -plug_device: - spin_unlock_irq(&hwif->lock); - ide_unlock_host(host); -plug_device_2: - spin_lock_irq(q->queue_lock); - __ide_requeue_and_plug(q, rq); + return BLK_STS_OK; } static int drive_is_ready(ide_drive_t *drive) @@ -887,3 +886,16 @@ void ide_pad_transfer(ide_drive_t *drive, int write, int len) } } EXPORT_SYMBOL_GPL(ide_pad_transfer); + +void ide_insert_request_head(ide_drive_t *drive, struct request *rq) +{ + ide_hwif_t *hwif = drive->hwif; + unsigned long flags; + + spin_lock_irqsave(&hwif->lock, flags); + list_add_tail(&rq->queuelist, &drive->rq_list); + spin_unlock_irqrestore(&hwif->lock, flags); + + kblockd_schedule_work(&drive->rq_work); +} +EXPORT_SYMBOL_GPL(ide_insert_request_head); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 622f0edb3945..de9e85cf74d1 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -27,7 +27,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) spin_unlock_irq(&hwif->lock); if (start_queue) - blk_run_queue(q); + blk_mq_run_hw_queues(q, true); return; } spin_unlock_irq(&hwif->lock); @@ -54,7 +54,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; - elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); + ide_insert_request_head(drive, rq); out: return; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 59217aa1d1fb..ea10507e5190 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -40,32 +40,20 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) return ret; } -static void ide_end_sync_rq(struct request *rq, blk_status_t error) -{ - complete(rq->end_io_data); -} - static int ide_pm_execute_rq(struct request *rq) { struct request_queue *q = rq->q; - DECLARE_COMPLETION_ONSTACK(wait); - - rq->end_io_data = &wait; - rq->end_io = ide_end_sync_rq; spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { rq->rq_flags |= RQF_QUIET; scsi_req(rq)->result = -ENXIO; - __blk_end_request_all(rq, BLK_STS_OK); spin_unlock_irq(q->queue_lock); + blk_mq_end_request(rq, BLK_STS_OK); return -ENXIO; } - __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); - __blk_run_queue_uncond(q); spin_unlock_irq(q->queue_lock); - - wait_for_completion_io(&wait); + blk_execute_rq(q, NULL, rq, true); return scsi_req(rq)->result ? -EIO : 0; } @@ -79,6 +67,8 @@ int generic_ide_resume(struct device *dev) struct ide_pm_state rqpm; int err; + blk_mq_start_stopped_hw_queues(drive->queue, true); + if (ide_port_acpi(hwif)) { /* call ACPI _PS0 / _STM only once */ if ((drive->dn & 1) == 0 || pair == NULL) { @@ -226,15 +216,14 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) #endif spin_lock_irqsave(q->queue_lock, flags); if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) - blk_stop_queue(q); + blk_mq_stop_hw_queues(q); else drive->dev_flags &= ~IDE_DFLAG_BLOCKED; spin_unlock_irqrestore(q->queue_lock, flags); drive->hwif->rq = NULL; - if (blk_end_request(rq, BLK_STS_OK, 0)) - BUG(); + blk_mq_end_request(rq, BLK_STS_OK); } void ide_check_pm_state(ide_drive_t *drive, struct request *rq) @@ -260,7 +249,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; struct request_queue *q = drive->queue; - unsigned long flags; int rc; #ifdef DEBUG_PM printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name); @@ -274,8 +262,6 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) if (rc) printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_mq_start_hw_queues(q); } } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 3b75a7b7a284..40384838e439 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -750,6 +750,11 @@ static void ide_initialize_rq(struct request *rq) req->sreq.sense = req->sense; } +static const struct blk_mq_ops ide_mq_ops = { + .queue_rq = ide_queue_rq, + .initialize_rq_fn = ide_initialize_rq, +}; + /* * init request queue */ @@ -759,6 +764,7 @@ static int ide_init_queue(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; int max_sectors = 256; int max_sg_entries = PRD_ENTRIES; + struct blk_mq_tag_set *set; /* * Our default set up assumes the normal IDE case, @@ -767,19 +773,26 @@ static int ide_init_queue(ide_drive_t *drive) * limits and LBA48 we could raise it but as yet * do not. */ - q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL); - if (!q) + + set = &drive->tag_set; + set->ops = &ide_mq_ops; + set->nr_hw_queues = 1; + set->queue_depth = 32; + set->reserved_tags = 1; + set->cmd_size = sizeof(struct ide_request); + set->numa_node = hwif_to_node(hwif); + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + if (blk_mq_alloc_tag_set(set)) return 1; - q->request_fn = do_ide_request; - q->initialize_rq_fn = ide_initialize_rq; - q->cmd_size = sizeof(struct ide_request); - blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); - if (blk_init_allocated_queue(q) < 0) { - blk_cleanup_queue(q); + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { + blk_mq_free_tag_set(set); return 1; } + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); + q->queuedata = drive; blk_queue_segment_boundary(q, 0xffff); @@ -965,8 +978,12 @@ static void drive_release_dev (struct device *dev) ide_proc_unregister_device(drive); + if (drive->sense_rq) + blk_mq_free_request(drive->sense_rq); + blk_cleanup_queue(drive->queue); drive->queue = NULL; + blk_mq_free_tag_set(&drive->tag_set); drive->dev_flags &= ~IDE_DFLAG_PRESENT; @@ -1133,6 +1150,28 @@ static void ide_port_cable_detect(ide_hwif_t *hwif) } } +/* + * Deferred request list insertion handler + */ +static void drive_rq_insert_work(struct work_struct *work) +{ + ide_drive_t *drive = container_of(work, ide_drive_t, rq_work); + ide_hwif_t *hwif = drive->hwif; + struct request *rq; + LIST_HEAD(list); + + spin_lock_irq(&hwif->lock); + if (!list_empty(&drive->rq_list)) + list_splice_init(&drive->rq_list, &list); + spin_unlock_irq(&hwif->lock); + + while (!list_empty(&list)) { + rq = list_first_entry(&list, struct request, queuelist); + list_del_init(&rq->queuelist); + blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL); + } +} + static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR }; @@ -1145,12 +1184,10 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) ide_port_for_each_dev(i, drive, hwif) { u8 j = (hwif->index * MAX_DRIVES) + i; u16 *saved_id = drive->id; - struct request *saved_sense_rq = drive->sense_rq; memset(drive, 0, sizeof(*drive)); memset(saved_id, 0, SECTOR_SIZE); drive->id = saved_id; - drive->sense_rq = saved_sense_rq; drive->media = ide_disk; drive->select = (i << 4) | ATA_DEVICE_OBS; @@ -1166,6 +1203,9 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) INIT_LIST_HEAD(&drive->list); init_completion(&drive->gendev_rel_comp); + + INIT_WORK(&drive->rq_work, drive_rq_insert_work); + INIT_LIST_HEAD(&drive->rq_list); } } @@ -1255,7 +1295,6 @@ static void ide_port_free_devices(ide_hwif_t *hwif) int i; ide_port_for_each_dev(i, drive, hwif) { - kfree(drive->sense_rq); kfree(drive->id); kfree(drive); } @@ -1283,17 +1322,10 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) if (drive->id == NULL) goto out_free_drive; - drive->sense_rq = kmalloc(sizeof(struct request) + - sizeof(struct ide_request), GFP_KERNEL); - if (!drive->sense_rq) - goto out_free_id; - hwif->devices[i] = drive; } return 0; -out_free_id: - kfree(drive->id); out_free_drive: kfree(drive); out_nomem: diff --git a/include/linux/ide.h b/include/linux/ide.h index c74b0321922a..079f8bc0b0f4 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -529,6 +529,10 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ + int (*prep_rq)(struct ide_drive_s *, struct request *); + + struct blk_mq_tag_set tag_set; + struct request *rq; /* current request */ void *driver_data; /* extra driver data */ u16 *id; /* identification info */ @@ -612,6 +616,10 @@ struct ide_drive_s { bool sense_rq_armed; struct request *sense_rq; struct request_sense sense_data; + + /* async sense insertion */ + struct work_struct rq_work; + struct list_head rq_list; }; typedef struct ide_drive_s ide_drive_t; @@ -1089,6 +1097,7 @@ extern int ide_pci_clk; int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int); void ide_kill_rq(ide_drive_t *, struct request *); +void ide_insert_request_head(ide_drive_t *, struct request *); void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); @@ -1208,7 +1217,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); -extern void do_ide_request(struct request_queue *); +extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); -- cgit v1.2.3 From 9ba20527f4d1430b5f3e5f566be5af3e156a3284 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:15:10 -0600 Subject: blk-mq: provide mq_ops->busy() hook We'll hook into this from blk_lld_busy(), allowing blk-mq to also return whether or not a given queue currently has requests in progress. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 2 ++ include/linux/blk-mq.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index ce12515f9b9b..ca1a3af49f87 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3431,6 +3431,8 @@ int blk_lld_busy(struct request_queue *q) { if (q->lld_busy_fn) return q->lld_busy_fn(q); + if (q->mq_ops && q->mq_ops->busy) + return q->mq_ops->busy(q); return 0; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2286dc12c6bc..5c8418ebbfd6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -114,6 +114,7 @@ typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, typedef void (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); +typedef bool (busy_fn)(struct request_queue *); struct blk_mq_ops { @@ -165,6 +166,11 @@ struct blk_mq_ops { /* Called from inside blk_get_request() */ void (*initialize_rq_fn)(struct request *rq); + /* + * If set, returns whether or not this queue currently is busy + */ + busy_fn *busy; + map_queues_fn *map_queues; #ifdef CONFIG_BLK_DEBUG_FS -- cgit v1.2.3 From c6f2882691e8fd128083abdcc3c5aa5b410c2367 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:22:19 -0600 Subject: block: remove q->lld_busy_fn() Nobody is using the legacy path for blk_lld_busy() anymore, remove it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- block/blk-settings.c | 6 ------ include/linux/blkdev.h | 3 --- 3 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index ca1a3af49f87..03ef8f0e7dc5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3429,8 +3429,6 @@ EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); */ int blk_lld_busy(struct request_queue *q) { - if (q->lld_busy_fn) - return q->lld_busy_fn(q); if (q->mq_ops && q->mq_ops->busy) return q->mq_ops->busy(q); diff --git a/block/blk-settings.c b/block/blk-settings.c index 696c04c1ab6c..ac8b8ba4b126 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -73,12 +73,6 @@ void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) } EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); -void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) -{ - q->lld_busy_fn = fn; -} -EXPORT_SYMBOL_GPL(blk_queue_lld_busy); - /** * blk_set_default_limits - reset limits to default values * @lim: the queue_limits structure to reset diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4293dc1cd160..e867733b761d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -320,7 +320,6 @@ typedef void (unprep_rq_fn) (struct request_queue *, struct request *); struct bio_vec; typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (lld_busy_fn) (struct request_queue *q); typedef int (bsg_job_fn) (struct bsg_job *); typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); typedef void (exit_rq_fn)(struct request_queue *, struct request *); @@ -466,7 +465,6 @@ struct request_queue { softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; - lld_busy_fn *lld_busy_fn; /* Called just after a request is allocated */ init_rq_fn *init_rq_fn; /* Called just before a request is freed */ @@ -1255,7 +1253,6 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); -- cgit v1.2.3 From aae3b069d5ce865ca5ef2902c2a22cef7ab4f3a2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Oct 2018 11:26:25 -0600 Subject: bsg: pass in desired timeout handler This will ease in the conversion to blk-mq, where we can't set a timeout handler after queue init. Cc: Johannes Thumshirn Cc: linux-scsi@vger.kernel.org Reviewed-by: Hannes Reinecke Tested-by: Benjamin Block Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/bsg-lib.c | 3 ++- drivers/scsi/scsi_transport_fc.c | 7 +++---- drivers/scsi/scsi_transport_iscsi.c | 2 +- drivers/scsi/scsi_transport_sas.c | 4 ++-- drivers/scsi/ufs/ufs_bsg.c | 2 +- include/linux/bsg-lib.h | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index f3501cdaf1a6..1da011ec04e6 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -304,7 +304,7 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req) * @dd_job_size: size of LLD data needed for each job */ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, int dd_job_size) + bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size) { struct request_queue *q; int ret; @@ -327,6 +327,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, blk_queue_flag_set(QUEUE_FLAG_BIDI, q); blk_queue_softirq_done(q, bsg_softirq_done); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); + blk_queue_rq_timed_out(q, timeout); ret = bsg_register_queue(q, dev, name, &bsg_transport_ops); if (ret) { diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 381668fa135d..98aaffb4c715 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3780,7 +3780,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) snprintf(bsg_name, sizeof(bsg_name), "fc_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size); + q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, fc_bsg_job_timeout, + i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "fc_host%d: bsg interface failed to initialize - setup queue\n", @@ -3788,7 +3789,6 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) return PTR_ERR(q); } __scsi_init_queue(shost, q); - blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT); fc_host->rqst_q = q; return 0; @@ -3826,14 +3826,13 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) return -ENOTSUPP; q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch, - i->f->dd_bsg_size); + fc_bsg_job_timeout, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "failed to setup bsg queue\n"); return PTR_ERR(q); } __scsi_init_queue(shost, q); blk_queue_prep_rq(q, fc_bsg_rport_prep); - blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); rport->rqst_q = q; return 0; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 6fd2fe210fc3..26b11a775be9 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1542,7 +1542,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0); + q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, NULL, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 0a165b2b3e81..cf6d47891d77 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -198,7 +198,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) if (rphy) { q = bsg_setup_queue(&rphy->dev, dev_name(&rphy->dev), - sas_smp_dispatch, 0); + sas_smp_dispatch, NULL, 0); if (IS_ERR(q)) return PTR_ERR(q); rphy->q = q; @@ -207,7 +207,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) snprintf(name, sizeof(name), "sas_host%d", shost->host_no); q = bsg_setup_queue(&shost->shost_gendev, name, - sas_smp_dispatch, 0); + sas_smp_dispatch, NULL, 0); if (IS_ERR(q)) return PTR_ERR(q); to_sas_host_attrs(shost)->q = q; diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index e5f8e54bf644..dd0e9700a74c 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -193,7 +193,7 @@ int ufs_bsg_probe(struct ufs_hba *hba) if (ret) goto out; - q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, 0); + q = bsg_setup_queue(bsg_dev, dev_name(bsg_dev), ufs_bsg_request, NULL, 0); if (IS_ERR(q)) { ret = PTR_ERR(q); goto out; diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 6aeaf6472665..b13ae143e7ef 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -72,7 +72,7 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, int dd_job_size); + bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit v1.2.3 From 5e28b8d8a1b03ce86f33d38a64a4983d2b5c7679 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Oct 2018 11:27:02 -0600 Subject: bsg: provide bsg_remove_queue() helper All drivers do unregister + cleanup, provide a helper for that. Cc: linux-scsi@vger.kernel.org Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Tested-by: Benjamin Block Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/bsg-lib.c | 9 +++++++++ drivers/scsi/scsi_transport_fc.c | 5 +---- drivers/scsi/scsi_transport_iscsi.c | 5 +---- drivers/scsi/scsi_transport_sas.c | 6 +----- drivers/scsi/ufs/ufs_bsg.c | 2 +- include/linux/bsg-lib.h | 1 + 6 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 1da011ec04e6..3f2e9a1bae44 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -296,6 +296,15 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req) kfree(job->reply); } +void bsg_remove_queue(struct request_queue *q) +{ + if (q) { + bsg_unregister_queue(q); + blk_cleanup_queue(q); + } +} +EXPORT_SYMBOL_GPL(bsg_remove_queue); + /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 98aaffb4c715..638f83ab04b2 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3851,10 +3851,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) static void fc_bsg_remove(struct request_queue *q) { - if (q) { - bsg_unregister_queue(q); - blk_cleanup_queue(q); - } + bsg_remove_queue(q); } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 26b11a775be9..ff123023e5a5 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1576,10 +1576,7 @@ static int iscsi_remove_host(struct transport_container *tc, struct Scsi_Host *shost = dev_to_shost(dev); struct iscsi_cls_host *ihost = shost->shost_data; - if (ihost->bsg_q) { - bsg_unregister_queue(ihost->bsg_q); - blk_cleanup_queue(ihost->bsg_q); - } + bsg_remove_queue(ihost->bsg_q); return 0; } diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index cf6d47891d77..692b46937e52 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -246,11 +246,7 @@ static int sas_host_remove(struct transport_container *tc, struct device *dev, struct Scsi_Host *shost = dev_to_shost(dev); struct request_queue *q = to_sas_host_attrs(shost)->q; - if (q) { - bsg_unregister_queue(q); - blk_cleanup_queue(q); - } - + bsg_remove_queue(q); return 0; } diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index dd0e9700a74c..775bb4e5e36e 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -157,7 +157,7 @@ void ufs_bsg_remove(struct ufs_hba *hba) if (!hba->bsg_queue) return; - bsg_unregister_queue(hba->bsg_queue); + bsg_remove_queue(hba->bsg_queue); device_del(bsg_dev); put_device(bsg_dev); diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index b13ae143e7ef..9c9b134b1fa5 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -73,6 +73,7 @@ void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size); +void bsg_remove_queue(struct request_queue *q); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit v1.2.3 From 771a93c489bf486b957c7399f89ee06d43ba2d93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 22 Oct 2018 05:12:32 -0600 Subject: block: remove blk_complete_request() It's now unused. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-softirq.c | 20 -------------------- include/linux/blkdev.h | 1 - 2 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/block/blk-softirq.c b/block/blk-softirq.c index e47a2f751884..8ca0f6caf174 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -145,26 +145,6 @@ do_local: } EXPORT_SYMBOL(__blk_complete_request); -/** - * blk_complete_request - end I/O on a request - * @req: the request being processed - * - * Description: - * Ends all I/O on a request. It does not handle partial completions, - * unless the driver actually implements this in its completion callback - * through requeueing. The actual completion happens out-of-order, - * through a softirq handler. The user must have registered a completion - * callback through blk_queue_softirq_done(). - **/ -void blk_complete_request(struct request *req) -{ - if (unlikely(blk_should_fake_timeout(req->q))) - return; - if (!blk_mark_rq_complete(req)) - __blk_complete_request(req); -} -EXPORT_SYMBOL(blk_complete_request); - static __init int blk_softirq_init(void) { int i; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e867733b761d..6baea6563364 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1203,7 +1203,6 @@ extern bool __blk_end_request(struct request *rq, blk_status_t error, extern void __blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); -extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_unprep_request(struct request *); -- cgit v1.2.3 From 7ca01926463a15f5d2681458643b2453930b873a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 03:39:36 -0600 Subject: block: remove legacy rq tagging It's now unused, kill it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 88 ---------- block/Makefile | 2 +- block/blk-core.c | 6 - block/blk-mq-debugfs.c | 2 - block/blk-mq-tag.c | 6 +- block/blk-sysfs.c | 3 - block/blk-tag.c | 378 ----------------------------------------- include/linux/blkdev.h | 35 ---- 8 files changed, 3 insertions(+), 517 deletions(-) delete mode 100644 block/blk-tag.c (limited to 'include/linux') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 207eca58efaa..ac18b488cb5e 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -65,7 +65,6 @@ Description of Contents: 3.2.3 I/O completion 3.2.4 Implications for drivers that do not interpret bios (don't handle multiple segments) - 3.2.5 Request command tagging 3.3 I/O submission 4. The I/O scheduler 5. Scalability related changes @@ -708,93 +707,6 @@ is crossed on completion of a transfer. (The end*request* functions should be used if only if the request has come down from block/bio path, not for direct access requests which only specify rq->buffer without a valid rq->bio) -3.2.5 Generic request command tagging - -3.2.5.1 Tag helpers - -Block now offers some simple generic functionality to help support command -queueing (typically known as tagged command queueing), ie manage more than -one outstanding command on a queue at any given time. - - blk_queue_init_tags(struct request_queue *q, int depth) - - Initialize internal command tagging structures for a maximum - depth of 'depth'. - - blk_queue_free_tags((struct request_queue *q) - - Teardown tag info associated with the queue. This will be done - automatically by block if blk_queue_cleanup() is called on a queue - that is using tagging. - -The above are initialization and exit management, the main helpers during -normal operations are: - - blk_queue_start_tag(struct request_queue *q, struct request *rq) - - Start tagged operation for this request. A free tag number between - 0 and 'depth' is assigned to the request (rq->tag holds this number), - and 'rq' is added to the internal tag management. If the maximum depth - for this queue is already achieved (or if the tag wasn't started for - some other reason), 1 is returned. Otherwise 0 is returned. - - blk_queue_end_tag(struct request_queue *q, struct request *rq) - - End tagged operation on this request. 'rq' is removed from the internal - book keeping structures. - -To minimize struct request and queue overhead, the tag helpers utilize some -of the same request members that are used for normal request queue management. -This means that a request cannot both be an active tag and be on the queue -list at the same time. blk_queue_start_tag() will remove the request, but -the driver must remember to call blk_queue_end_tag() before signalling -completion of the request to the block layer. This means ending tag -operations before calling end_that_request_last()! For an example of a user -of these helpers, see the IDE tagged command queueing support. - -3.2.5.2 Tag info - -Some block functions exist to query current tag status or to go from a -tag number to the associated request. These are, in no particular order: - - blk_queue_tagged(q) - - Returns 1 if the queue 'q' is using tagging, 0 if not. - - blk_queue_tag_request(q, tag) - - Returns a pointer to the request associated with tag 'tag'. - - blk_queue_tag_depth(q) - - Return current queue depth. - - blk_queue_tag_queue(q) - - Returns 1 if the queue can accept a new queued command, 0 if we are - at the maximum depth already. - - blk_queue_rq_tagged(rq) - - Returns 1 if the request 'rq' is tagged. - -3.2.5.2 Internal structure - -Internally, block manages tags in the blk_queue_tag structure: - - struct blk_queue_tag { - struct request **tag_index; /* array or pointers to rq */ - unsigned long *tag_map; /* bitmap of free tags */ - struct list_head busy_list; /* fifo list of busy tags */ - int busy; /* queue depth */ - int max_depth; /* max queue depth */ - }; - -Most of the above is simple and straight forward, however busy_list may need -a bit of explaining. Normally we don't care too much about request ordering, -but in the event of any barrier requests in the tag queue we need to ensure -that requests are restarted in the order they were queue. - 3.3 I/O Submission The routine submit_bio() is used to submit a single io. Higher level i/o diff --git a/block/Makefile b/block/Makefile index 27eac600474f..213674c8faaa 100644 --- a/block/Makefile +++ b/block/Makefile @@ -3,7 +3,7 @@ # Makefile for the kernel block layer # -obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ +obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ diff --git a/block/blk-core.c b/block/blk-core.c index 03ef8f0e7dc5..daaed4dfa719 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1658,9 +1658,6 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) trace_block_rq_requeue(q, rq); rq_qos_requeue(q, rq); - if (rq->rq_flags & RQF_QUEUED) - blk_queue_end_tag(q, rq); - BUG_ON(blk_queued_rq(rq)); elv_requeue_request(q, rq); @@ -3174,9 +3171,6 @@ void blk_finish_request(struct request *req, blk_status_t error) if (req->rq_flags & RQF_STATS) blk_stat_add(req, now); - if (req->rq_flags & RQF_QUEUED) - blk_queue_end_tag(q, req); - BUG_ON(blk_queued_rq(req)); if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req)) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 10b284a1f18d..9ed43a7c70b5 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -112,7 +112,6 @@ static int queue_pm_only_show(void *data, struct seq_file *m) #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name static const char *const blk_queue_flag_name[] = { - QUEUE_FLAG_NAME(QUEUED), QUEUE_FLAG_NAME(STOPPED), QUEUE_FLAG_NAME(DYING), QUEUE_FLAG_NAME(BYPASS), @@ -318,7 +317,6 @@ static const char *const cmd_flag_name[] = { static const char *const rqf_name[] = { RQF_NAME(SORTED), RQF_NAME(STARTED), - RQF_NAME(QUEUED), RQF_NAME(SOFTBARRIER), RQF_NAME(FLUSH_SEQ), RQF_NAME(MIXED_MERGE), diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cfda95b85d34..4254e74c1446 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -530,10 +530,8 @@ u32 blk_mq_unique_tag(struct request *rq) struct blk_mq_hw_ctx *hctx; int hwq = 0; - if (q->mq_ops) { - hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); - hwq = hctx->queue_num; - } + hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); + hwq = hctx->queue_num; return (hwq << BLK_MQ_UNIQUE_TAG_BITS) | (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 844a454a7b3a..1b82ccfde3fe 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -849,9 +849,6 @@ static void __blk_release_queue(struct work_struct *work) blk_exit_rl(q, &q->root_rl); - if (q->queue_tags) - __blk_queue_free_tags(q); - blk_queue_free_zone_bitmaps(q); if (!q->mq_ops) { diff --git a/block/blk-tag.c b/block/blk-tag.c deleted file mode 100644 index fbc153aef166..000000000000 --- a/block/blk-tag.c +++ /dev/null @@ -1,378 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Functions related to tagged command queuing - */ -#include -#include -#include -#include -#include - -#include "blk.h" - -/** - * blk_queue_find_tag - find a request by its tag and queue - * @q: The request queue for the device - * @tag: The tag of the request - * - * Notes: - * Should be used when a device returns a tag and you want to match - * it with a request. - * - * no locks need be held. - **/ -struct request *blk_queue_find_tag(struct request_queue *q, int tag) -{ - return blk_map_queue_find_tag(q->queue_tags, tag); -} -EXPORT_SYMBOL(blk_queue_find_tag); - -/** - * blk_free_tags - release a given set of tag maintenance info - * @bqt: the tag map to free - * - * Drop the reference count on @bqt and frees it when the last reference - * is dropped. - */ -void blk_free_tags(struct blk_queue_tag *bqt) -{ - if (atomic_dec_and_test(&bqt->refcnt)) { - BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) < - bqt->max_depth); - - kfree(bqt->tag_index); - bqt->tag_index = NULL; - - kfree(bqt->tag_map); - bqt->tag_map = NULL; - - kfree(bqt); - } -} -EXPORT_SYMBOL(blk_free_tags); - -/** - * __blk_queue_free_tags - release tag maintenance info - * @q: the request queue for the device - * - * Notes: - * blk_cleanup_queue() will take care of calling this function, if tagging - * has been used. So there's no need to call this directly. - **/ -void __blk_queue_free_tags(struct request_queue *q) -{ - struct blk_queue_tag *bqt = q->queue_tags; - - if (!bqt) - return; - - blk_free_tags(bqt); - - q->queue_tags = NULL; - queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q); -} - -/** - * blk_queue_free_tags - release tag maintenance info - * @q: the request queue for the device - * - * Notes: - * This is used to disable tagged queuing to a device, yet leave - * queue in function. - **/ -void blk_queue_free_tags(struct request_queue *q) -{ - queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q); -} -EXPORT_SYMBOL(blk_queue_free_tags); - -static int -init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth) -{ - struct request **tag_index; - unsigned long *tag_map; - int nr_ulongs; - - if (q && depth > q->nr_requests * 2) { - depth = q->nr_requests * 2; - printk(KERN_ERR "%s: adjusted depth to %d\n", - __func__, depth); - } - - tag_index = kcalloc(depth, sizeof(struct request *), GFP_ATOMIC); - if (!tag_index) - goto fail; - - nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; - tag_map = kcalloc(nr_ulongs, sizeof(unsigned long), GFP_ATOMIC); - if (!tag_map) - goto fail; - - tags->real_max_depth = depth; - tags->max_depth = depth; - tags->tag_index = tag_index; - tags->tag_map = tag_map; - - return 0; -fail: - kfree(tag_index); - return -ENOMEM; -} - -static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, - int depth, int alloc_policy) -{ - struct blk_queue_tag *tags; - - tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); - if (!tags) - goto fail; - - if (init_tag_map(q, tags, depth)) - goto fail; - - atomic_set(&tags->refcnt, 1); - tags->alloc_policy = alloc_policy; - tags->next_tag = 0; - return tags; -fail: - kfree(tags); - return NULL; -} - -/** - * blk_init_tags - initialize the tag info for an external tag map - * @depth: the maximum queue depth supported - * @alloc_policy: tag allocation policy - **/ -struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy) -{ - return __blk_queue_init_tags(NULL, depth, alloc_policy); -} -EXPORT_SYMBOL(blk_init_tags); - -/** - * blk_queue_init_tags - initialize the queue tag info - * @q: the request queue for the device - * @depth: the maximum queue depth supported - * @tags: the tag to use - * @alloc_policy: tag allocation policy - * - * Queue lock must be held here if the function is called to resize an - * existing map. - **/ -int blk_queue_init_tags(struct request_queue *q, int depth, - struct blk_queue_tag *tags, int alloc_policy) -{ - int rc; - - BUG_ON(tags && q->queue_tags && tags != q->queue_tags); - - if (!tags && !q->queue_tags) { - tags = __blk_queue_init_tags(q, depth, alloc_policy); - - if (!tags) - return -ENOMEM; - - } else if (q->queue_tags) { - rc = blk_queue_resize_tags(q, depth); - if (rc) - return rc; - queue_flag_set(QUEUE_FLAG_QUEUED, q); - return 0; - } else - atomic_inc(&tags->refcnt); - - /* - * assign it, all done - */ - q->queue_tags = tags; - queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); - return 0; -} -EXPORT_SYMBOL(blk_queue_init_tags); - -/** - * blk_queue_resize_tags - change the queueing depth - * @q: the request queue for the device - * @new_depth: the new max command queueing depth - * - * Notes: - * Must be called with the queue lock held. - **/ -int blk_queue_resize_tags(struct request_queue *q, int new_depth) -{ - struct blk_queue_tag *bqt = q->queue_tags; - struct request **tag_index; - unsigned long *tag_map; - int max_depth, nr_ulongs; - - if (!bqt) - return -ENXIO; - - /* - * if we already have large enough real_max_depth. just - * adjust max_depth. *NOTE* as requests with tag value - * between new_depth and real_max_depth can be in-flight, tag - * map can not be shrunk blindly here. - */ - if (new_depth <= bqt->real_max_depth) { - bqt->max_depth = new_depth; - return 0; - } - - /* - * Currently cannot replace a shared tag map with a new - * one, so error out if this is the case - */ - if (atomic_read(&bqt->refcnt) != 1) - return -EBUSY; - - /* - * save the old state info, so we can copy it back - */ - tag_index = bqt->tag_index; - tag_map = bqt->tag_map; - max_depth = bqt->real_max_depth; - - if (init_tag_map(q, bqt, new_depth)) - return -ENOMEM; - - memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); - nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; - memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); - - kfree(tag_index); - kfree(tag_map); - return 0; -} -EXPORT_SYMBOL(blk_queue_resize_tags); - -/** - * blk_queue_end_tag - end tag operations for a request - * @q: the request queue for the device - * @rq: the request that has completed - * - * Description: - * Typically called when end_that_request_first() returns %0, meaning - * all transfers have been done for a request. It's important to call - * this function before end_that_request_last(), as that will put the - * request back on the free list thus corrupting the internal tag list. - **/ -void blk_queue_end_tag(struct request_queue *q, struct request *rq) -{ - struct blk_queue_tag *bqt = q->queue_tags; - unsigned tag = rq->tag; /* negative tags invalid */ - - lockdep_assert_held(q->queue_lock); - - BUG_ON(tag >= bqt->real_max_depth); - - list_del_init(&rq->queuelist); - rq->rq_flags &= ~RQF_QUEUED; - rq->tag = -1; - rq->internal_tag = -1; - - if (unlikely(bqt->tag_index[tag] == NULL)) - printk(KERN_ERR "%s: tag %d is missing\n", - __func__, tag); - - bqt->tag_index[tag] = NULL; - - if (unlikely(!test_bit(tag, bqt->tag_map))) { - printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", - __func__, tag); - return; - } - /* - * The tag_map bit acts as a lock for tag_index[bit], so we need - * unlock memory barrier semantics. - */ - clear_bit_unlock(tag, bqt->tag_map); -} - -/** - * blk_queue_start_tag - find a free tag and assign it - * @q: the request queue for the device - * @rq: the block request that needs tagging - * - * Description: - * This can either be used as a stand-alone helper, or possibly be - * assigned as the queue &prep_rq_fn (in which case &struct request - * automagically gets a tag assigned). Note that this function - * assumes that any type of request can be queued! if this is not - * true for your device, you must check the request type before - * calling this function. The request will also be removed from - * the request queue, so it's the drivers responsibility to readd - * it if it should need to be restarted for some reason. - **/ -int blk_queue_start_tag(struct request_queue *q, struct request *rq) -{ - struct blk_queue_tag *bqt = q->queue_tags; - unsigned max_depth; - int tag; - - lockdep_assert_held(q->queue_lock); - - if (unlikely((rq->rq_flags & RQF_QUEUED))) { - printk(KERN_ERR - "%s: request %p for device [%s] already tagged %d", - __func__, rq, - rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); - BUG(); - } - - /* - * Protect against shared tag maps, as we may not have exclusive - * access to the tag map. - * - * We reserve a few tags just for sync IO, since we don't want - * to starve sync IO on behalf of flooding async IO. - */ - max_depth = bqt->max_depth; - if (!rq_is_sync(rq) && max_depth > 1) { - switch (max_depth) { - case 2: - max_depth = 1; - break; - case 3: - max_depth = 2; - break; - default: - max_depth -= 2; - } - if (q->in_flight[BLK_RW_ASYNC] > max_depth) - return 1; - } - - do { - if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) { - tag = find_first_zero_bit(bqt->tag_map, max_depth); - if (tag >= max_depth) - return 1; - } else { - int start = bqt->next_tag; - int size = min_t(int, bqt->max_depth, max_depth + start); - tag = find_next_zero_bit(bqt->tag_map, size, start); - if (tag >= size && start + size > bqt->max_depth) { - size = start + size - bqt->max_depth; - tag = find_first_zero_bit(bqt->tag_map, size); - } - if (tag >= size) - return 1; - } - - } while (test_and_set_bit_lock(tag, bqt->tag_map)); - /* - * We need lock ordering semantics given by test_and_set_bit_lock. - * See blk_queue_end_tag for details. - */ - - bqt->next_tag = (tag + 1) % bqt->max_depth; - rq->rq_flags |= RQF_QUEUED; - rq->tag = tag; - bqt->tag_index[tag] = rq; - blk_start_request(rq); - return 0; -} -EXPORT_SYMBOL(blk_queue_start_tag); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6baea6563364..8afe3331777e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -85,8 +85,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_SORTED ((__force req_flags_t)(1 << 0)) /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* uses tagged queueing */ -#define RQF_QUEUED ((__force req_flags_t)(1 << 2)) /* may not be passed by ioscheduler */ #define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) /* request for flush sequence */ @@ -336,15 +334,6 @@ enum blk_queue_state { Queue_up, }; -struct blk_queue_tag { - struct request **tag_index; /* map of busy tags */ - unsigned long *tag_map; /* bit map of free/busy tags */ - int max_depth; /* what we will send to device */ - int real_max_depth; /* what the array can hold */ - atomic_t refcnt; /* map can be shared */ - int alloc_policy; /* tag allocation policy */ - int next_tag; /* next tag */ -}; #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ @@ -568,8 +557,6 @@ struct request_queue { unsigned int dma_pad_mask; unsigned int dma_alignment; - struct blk_queue_tag *queue_tags; - unsigned int nr_sorted; unsigned int in_flight[2]; @@ -680,7 +667,6 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ #define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ @@ -724,7 +710,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); -#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) @@ -1359,26 +1344,6 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) !list_empty(&plug->cb_list)); } -/* - * tag stuff - */ -extern int blk_queue_start_tag(struct request_queue *, struct request *); -extern struct request *blk_queue_find_tag(struct request_queue *, int); -extern void blk_queue_end_tag(struct request_queue *, struct request *); -extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); -extern void blk_queue_free_tags(struct request_queue *); -extern int blk_queue_resize_tags(struct request_queue *, int); -extern struct blk_queue_tag *blk_init_tags(int, int); -extern void blk_free_tags(struct blk_queue_tag *); - -static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, - int tag) -{ - if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) - return NULL; - return bqt->tag_index[tag]; -} - extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); -- cgit v1.2.3 From a1ce35fa49852db60fc6e268038530be533c5b15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:23:51 -0600 Subject: block: remove dead elevator code This removes a bunch of core and elevator related code. On the core front, we remove anything related to queue running, draining, initialization, plugging, and congestions. We also kill anything related to request allocation, merging, retrieval, and completion. Remove any checking for single queue IO schedulers, as they no longer exist. This means we can also delete a bunch of code related to request issue, adding, completion, etc - and all the SQ related ops and helpers. Also kill the load_default_modules(), as all that did was provide for a way to load the default single queue elevator. Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 1 - block/blk-core.c | 1749 +--------------------------------------------- block/blk-exec.c | 20 +- block/blk-ioc.c | 33 +- block/blk-merge.c | 5 - block/blk-settings.c | 36 - block/blk-sysfs.c | 36 +- block/blk.h | 51 -- block/elevator.c | 377 +--------- block/kyber-iosched.c | 1 - block/mq-deadline.c | 1 - include/linux/blkdev.h | 93 +-- include/linux/elevator.h | 90 +-- include/linux/init.h | 1 - init/do_mounts_initrd.c | 3 - init/initramfs.c | 6 - init/main.c | 12 - 17 files changed, 75 insertions(+), 2440 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3a27d31fcda6..44c7e567aa25 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5745,7 +5745,6 @@ static struct elevator_type iosched_bfq_mq = { .exit_sched = bfq_exit_queue, }, - .uses_mq = true, .icq_size = sizeof(struct bfq_io_cq), .icq_align = __alignof__(struct bfq_io_cq), .elevator_attrs = bfq_attrs, diff --git a/block/blk-core.c b/block/blk-core.c index daaed4dfa719..18538a41a532 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -144,46 +144,6 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear); -static void blk_clear_congested(struct request_list *rl, int sync) -{ -#ifdef CONFIG_CGROUP_WRITEBACK - clear_wb_congested(rl->blkg->wb_congested, sync); -#else - /* - * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't - * flip its congestion state for events on other blkcgs. - */ - if (rl == &rl->q->root_rl) - clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync); -#endif -} - -static void blk_set_congested(struct request_list *rl, int sync) -{ -#ifdef CONFIG_CGROUP_WRITEBACK - set_wb_congested(rl->blkg->wb_congested, sync); -#else - /* see blk_clear_congested() */ - if (rl == &rl->q->root_rl) - set_wb_congested(rl->q->backing_dev_info->wb.congested, sync); -#endif -} - -void blk_queue_congestion_threshold(struct request_queue *q) -{ - int nr; - - nr = q->nr_requests - (q->nr_requests / 8) + 1; - if (nr > q->nr_requests) - nr = q->nr_requests; - q->nr_congestion_on = nr; - - nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; - if (nr < 1) - nr = 1; - q->nr_congestion_off = nr; -} - void blk_rq_init(struct request_queue *q, struct request *rq) { memset(rq, 0, sizeof(*rq)); @@ -292,99 +252,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg) } EXPORT_SYMBOL(blk_dump_rq_flags); -static void blk_delay_work(struct work_struct *work) -{ - struct request_queue *q; - - q = container_of(work, struct request_queue, delay_work.work); - spin_lock_irq(q->queue_lock); - __blk_run_queue(q); - spin_unlock_irq(q->queue_lock); -} - -/** - * blk_delay_queue - restart queueing after defined interval - * @q: The &struct request_queue in question - * @msecs: Delay in msecs - * - * Description: - * Sometimes queueing needs to be postponed for a little while, to allow - * resources to come back. This function will make sure that queueing is - * restarted around the specified time. - */ -void blk_delay_queue(struct request_queue *q, unsigned long msecs) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - if (likely(!blk_queue_dead(q))) - queue_delayed_work(kblockd_workqueue, &q->delay_work, - msecs_to_jiffies(msecs)); -} -EXPORT_SYMBOL(blk_delay_queue); - -/** - * blk_start_queue_async - asynchronously restart a previously stopped queue - * @q: The &struct request_queue in question - * - * Description: - * blk_start_queue_async() will clear the stop flag on the queue, and - * ensure that the request_fn for the queue is run from an async - * context. - **/ -void blk_start_queue_async(struct request_queue *q) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - queue_flag_clear(QUEUE_FLAG_STOPPED, q); - blk_run_queue_async(q); -} -EXPORT_SYMBOL(blk_start_queue_async); - -/** - * blk_start_queue - restart a previously stopped queue - * @q: The &struct request_queue in question - * - * Description: - * blk_start_queue() will clear the stop flag on the queue, and call - * the request_fn for the queue if it was in a stopped state when - * entered. Also see blk_stop_queue(). - **/ -void blk_start_queue(struct request_queue *q) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - queue_flag_clear(QUEUE_FLAG_STOPPED, q); - __blk_run_queue(q); -} -EXPORT_SYMBOL(blk_start_queue); - -/** - * blk_stop_queue - stop a queue - * @q: The &struct request_queue in question - * - * Description: - * The Linux block layer assumes that a block driver will consume all - * entries on the request queue when the request_fn strategy is called. - * Often this will not happen, because of hardware limitations (queue - * depth settings). If a device driver gets a 'queue full' response, - * or if it simply chooses not to queue more I/O at one point, it can - * call this function to prevent the request_fn from being called until - * the driver has signalled it's ready to go again. This happens by calling - * blk_start_queue() to restart queue operations. - **/ -void blk_stop_queue(struct request_queue *q) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - cancel_delayed_work(&q->delay_work); - queue_flag_set(QUEUE_FLAG_STOPPED, q); -} -EXPORT_SYMBOL(blk_stop_queue); - /** * blk_sync_queue - cancel any pending callbacks on a queue * @q: the queue @@ -415,8 +282,6 @@ void blk_sync_queue(struct request_queue *q) cancel_delayed_work_sync(&q->requeue_work); queue_for_each_hw_ctx(q, hctx, i) cancel_delayed_work_sync(&hctx->run_work); - } else { - cancel_delayed_work_sync(&q->delay_work); } } EXPORT_SYMBOL(blk_sync_queue); @@ -442,250 +307,12 @@ void blk_clear_pm_only(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_clear_pm_only); -/** - * __blk_run_queue_uncond - run a queue whether or not it has been stopped - * @q: The queue to run - * - * Description: - * Invoke request handling on a queue if there are any pending requests. - * May be used to restart request handling after a request has completed. - * This variant runs the queue whether or not the queue has been - * stopped. Must be called with the queue lock held and interrupts - * disabled. See also @blk_run_queue. - */ -inline void __blk_run_queue_uncond(struct request_queue *q) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - if (unlikely(blk_queue_dead(q))) - return; - - /* - * Some request_fn implementations, e.g. scsi_request_fn(), unlock - * the queue lock internally. As a result multiple threads may be - * running such a request function concurrently. Keep track of the - * number of active request_fn invocations such that blk_drain_queue() - * can wait until all these request_fn calls have finished. - */ - q->request_fn_active++; - q->request_fn(q); - q->request_fn_active--; -} -EXPORT_SYMBOL_GPL(__blk_run_queue_uncond); - -/** - * __blk_run_queue - run a single device queue - * @q: The queue to run - * - * Description: - * See @blk_run_queue. - */ -void __blk_run_queue(struct request_queue *q) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - if (unlikely(blk_queue_stopped(q))) - return; - - __blk_run_queue_uncond(q); -} -EXPORT_SYMBOL(__blk_run_queue); - -/** - * blk_run_queue_async - run a single device queue in workqueue context - * @q: The queue to run - * - * Description: - * Tells kblockd to perform the equivalent of @blk_run_queue on behalf - * of us. - * - * Note: - * Since it is not allowed to run q->delay_work after blk_cleanup_queue() - * has canceled q->delay_work, callers must hold the queue lock to avoid - * race conditions between blk_cleanup_queue() and blk_run_queue_async(). - */ -void blk_run_queue_async(struct request_queue *q) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q))) - mod_delayed_work(kblockd_workqueue, &q->delay_work, 0); -} -EXPORT_SYMBOL(blk_run_queue_async); - -/** - * blk_run_queue - run a single device queue - * @q: The queue to run - * - * Description: - * Invoke request handling on this queue, if it has pending work to do. - * May be used to restart queueing when a request has completed. - */ -void blk_run_queue(struct request_queue *q) -{ - unsigned long flags; - - WARN_ON_ONCE(q->mq_ops); - - spin_lock_irqsave(q->queue_lock, flags); - __blk_run_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); -} -EXPORT_SYMBOL(blk_run_queue); - void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } EXPORT_SYMBOL(blk_put_queue); -/** - * __blk_drain_queue - drain requests from request_queue - * @q: queue to drain - * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV - * - * Drain requests from @q. If @drain_all is set, all requests are drained. - * If not, only ELVPRIV requests are drained. The caller is responsible - * for ensuring that no new requests which need to be drained are queued. - */ -static void __blk_drain_queue(struct request_queue *q, bool drain_all) - __releases(q->queue_lock) - __acquires(q->queue_lock) -{ - int i; - - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - while (true) { - bool drain = false; - - /* - * The caller might be trying to drain @q before its - * elevator is initialized. - */ - if (q->elevator) - elv_drain_elevator(q); - - blkcg_drain_queue(q); - - /* - * This function might be called on a queue which failed - * driver init after queue creation or is not yet fully - * active yet. Some drivers (e.g. fd and loop) get unhappy - * in such cases. Kick queue iff dispatch queue has - * something on it and @q has request_fn set. - */ - if (!list_empty(&q->queue_head) && q->request_fn) - __blk_run_queue(q); - - drain |= q->nr_rqs_elvpriv; - drain |= q->request_fn_active; - - /* - * Unfortunately, requests are queued at and tracked from - * multiple places and there's no single counter which can - * be drained. Check all the queues and counters. - */ - if (drain_all) { - struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); - drain |= !list_empty(&q->queue_head); - for (i = 0; i < 2; i++) { - drain |= q->nr_rqs[i]; - drain |= q->in_flight[i]; - if (fq) - drain |= !list_empty(&fq->flush_queue[i]); - } - } - - if (!drain) - break; - - spin_unlock_irq(q->queue_lock); - - msleep(10); - - spin_lock_irq(q->queue_lock); - } - - /* - * With queue marked dead, any woken up waiter will fail the - * allocation path, so the wakeup chaining is lost and we're - * left with hung waiters. We need to wake up those waiters. - */ - if (q->request_fn) { - struct request_list *rl; - - blk_queue_for_each_rl(rl, q) - for (i = 0; i < ARRAY_SIZE(rl->wait); i++) - wake_up_all(&rl->wait[i]); - } -} - -void blk_drain_queue(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - __blk_drain_queue(q, true); - spin_unlock_irq(q->queue_lock); -} - -/** - * blk_queue_bypass_start - enter queue bypass mode - * @q: queue of interest - * - * In bypass mode, only the dispatch FIFO queue of @q is used. This - * function makes @q enter bypass mode and drains all requests which were - * throttled or issued before. On return, it's guaranteed that no request - * is being throttled or has ELVPRIV set and blk_queue_bypass() %true - * inside queue or RCU read lock. - */ -void blk_queue_bypass_start(struct request_queue *q) -{ - WARN_ON_ONCE(q->mq_ops); - - spin_lock_irq(q->queue_lock); - q->bypass_depth++; - queue_flag_set(QUEUE_FLAG_BYPASS, q); - spin_unlock_irq(q->queue_lock); - - /* - * Queues start drained. Skip actual draining till init is - * complete. This avoids lenghty delays during queue init which - * can happen many times during boot. - */ - if (blk_queue_init_done(q)) { - spin_lock_irq(q->queue_lock); - __blk_drain_queue(q, false); - spin_unlock_irq(q->queue_lock); - - /* ensure blk_queue_bypass() is %true inside RCU read lock */ - synchronize_rcu(); - } -} -EXPORT_SYMBOL_GPL(blk_queue_bypass_start); - -/** - * blk_queue_bypass_end - leave queue bypass mode - * @q: queue of interest - * - * Leave bypass mode and restore the normal queueing behavior. - * - * Note: although blk_queue_bypass_start() is only called for blk-sq queues, - * this function is called for both blk-sq and blk-mq queues. - */ -void blk_queue_bypass_end(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - if (!--q->bypass_depth) - queue_flag_clear(QUEUE_FLAG_BYPASS, q); - WARN_ON_ONCE(q->bypass_depth < 0); - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL_GPL(blk_queue_bypass_end); - void blk_set_queue_dying(struct request_queue *q) { blk_queue_flag_set(QUEUE_FLAG_DYING, q); @@ -699,18 +326,6 @@ void blk_set_queue_dying(struct request_queue *q) if (q->mq_ops) blk_mq_wake_waiters(q); - else { - struct request_list *rl; - - spin_lock_irq(q->queue_lock); - blk_queue_for_each_rl(rl, q) { - if (rl->rq_pool) { - wake_up_all(&rl->wait[BLK_RW_SYNC]); - wake_up_all(&rl->wait[BLK_RW_ASYNC]); - } - } - spin_unlock_irq(q->queue_lock); - } /* Make blk_queue_enter() reexamine the DYING flag. */ wake_up_all(&q->mq_freeze_wq); @@ -822,6 +437,7 @@ void blk_cleanup_queue(struct request_queue *q) if (q->mq_ops) blk_mq_free_queue(q); + percpu_ref_exit(&q->q_usage_counter); spin_lock_irq(lock); @@ -1013,8 +629,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; - q->end_sector = 0; - q->boundary_rq = NULL; q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); if (q->id < 0) @@ -1047,7 +661,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, #ifdef CONFIG_BLK_CGROUP INIT_LIST_HEAD(&q->blkg_list); #endif - INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1100,105 +713,6 @@ fail_q: } EXPORT_SYMBOL(blk_alloc_queue_node); -/** - * blk_init_queue - prepare a request queue for use with a block device - * @rfn: The function to be called to process requests that have been - * placed on the queue. - * @lock: Request queue spin lock - * - * Description: - * If a block device wishes to use the standard request handling procedures, - * which sorts requests and coalesces adjacent requests, then it must - * call blk_init_queue(). The function @rfn will be called when there - * are requests on the queue that need to be processed. If the device - * supports plugging, then @rfn may not be called immediately when requests - * are available on the queue, but may be called at some time later instead. - * Plugged queues are generally unplugged when a buffer belonging to one - * of the requests on the queue is needed, or due to memory pressure. - * - * @rfn is not required, or even expected, to remove all requests off the - * queue, but only as many as it can handle at a time. If it does leave - * requests on the queue, it is responsible for arranging that the requests - * get dealt with eventually. - * - * The queue spin lock must be held while manipulating the requests on the - * request queue; this lock will be taken also from interrupt context, so irq - * disabling is needed for it. - * - * Function returns a pointer to the initialized request queue, or %NULL if - * it didn't succeed. - * - * Note: - * blk_init_queue() must be paired with a blk_cleanup_queue() call - * when the block device is deactivated (such as at module unload). - **/ - -struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) -{ - return blk_init_queue_node(rfn, lock, NUMA_NO_NODE); -} -EXPORT_SYMBOL(blk_init_queue); - -struct request_queue * -blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) -{ - struct request_queue *q; - - q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock); - if (!q) - return NULL; - - q->request_fn = rfn; - if (blk_init_allocated_queue(q) < 0) { - blk_cleanup_queue(q); - return NULL; - } - - return q; -} -EXPORT_SYMBOL(blk_init_queue_node); - -static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); - - -int blk_init_allocated_queue(struct request_queue *q) -{ - WARN_ON_ONCE(q->mq_ops); - - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL); - if (!q->fq) - return -ENOMEM; - - if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL)) - goto out_free_flush_queue; - - if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) - goto out_exit_flush_rq; - - INIT_WORK(&q->timeout_work, blk_timeout_work); - q->queue_flags |= QUEUE_FLAG_DEFAULT; - - /* - * This also sets hw/phys segments, boundary and size - */ - blk_queue_make_request(q, blk_queue_bio); - - q->sg_reserved_size = INT_MAX; - - if (elevator_init(q)) - goto out_exit_flush_rq; - return 0; - -out_exit_flush_rq: - if (q->exit_rq_fn) - q->exit_rq_fn(q, q->fq->flush_rq); -out_free_flush_queue: - blk_free_flush_queue(q->fq); - q->fq = NULL; - return -ENOMEM; -} -EXPORT_SYMBOL(blk_init_allocated_queue); - bool blk_get_queue(struct request_queue *q) { if (likely(!blk_queue_dying(q))) { @@ -1210,477 +724,38 @@ bool blk_get_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_get_queue); -static inline void blk_free_request(struct request_list *rl, struct request *rq) -{ - if (rq->rq_flags & RQF_ELVPRIV) { - elv_put_request(rl->q, rq); - if (rq->elv.icq) - put_io_context(rq->elv.icq->ioc); - } - - mempool_free(rq, rl->rq_pool); -} - -/* - * ioc_batching returns true if the ioc is a valid batching request and - * should be given priority access to a request. +/** + * blk_get_request - allocate a request + * @q: request queue to allocate a request for + * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC. + * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT. */ -static inline int ioc_batching(struct request_queue *q, struct io_context *ioc) +struct request *blk_get_request(struct request_queue *q, unsigned int op, + blk_mq_req_flags_t flags) { - if (!ioc) - return 0; + struct request *req; - /* - * Make sure the process is able to allocate at least 1 request - * even if the batch times out, otherwise we could theoretically - * lose wakeups. - */ - return ioc->nr_batch_requests == q->nr_batching || - (ioc->nr_batch_requests > 0 - && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); -} + WARN_ON_ONCE(op & REQ_NOWAIT); + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT)); -/* - * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This - * will cause the process to be a "batcher" on all queues in the system. This - * is the behaviour we want though - once it gets a wakeup it should be given - * a nice run. - */ -static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) -{ - if (!ioc || ioc_batching(q, ioc)) - return; + req = blk_mq_alloc_request(q, op, flags); + if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) + q->mq_ops->initialize_rq_fn(req); - ioc->nr_batch_requests = q->nr_batching; - ioc->last_waited = jiffies; + return req; } +EXPORT_SYMBOL(blk_get_request); -static void __freed_request(struct request_list *rl, int sync) +static void part_round_stats_single(struct request_queue *q, int cpu, + struct hd_struct *part, unsigned long now, + unsigned int inflight) { - struct request_queue *q = rl->q; - - if (rl->count[sync] < queue_congestion_off_threshold(q)) - blk_clear_congested(rl, sync); - - if (rl->count[sync] + 1 <= q->nr_requests) { - if (waitqueue_active(&rl->wait[sync])) - wake_up(&rl->wait[sync]); - - blk_clear_rl_full(rl, sync); + if (inflight) { + __part_stat_add(cpu, part, time_in_queue, + inflight * (now - part->stamp)); + __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } -} - -/* - * A request has just been released. Account for it, update the full and - * congestion status, wake up any waiters. Called under q->queue_lock. - */ -static void freed_request(struct request_list *rl, bool sync, - req_flags_t rq_flags) -{ - struct request_queue *q = rl->q; - - q->nr_rqs[sync]--; - rl->count[sync]--; - if (rq_flags & RQF_ELVPRIV) - q->nr_rqs_elvpriv--; - - __freed_request(rl, sync); - - if (unlikely(rl->starved[sync ^ 1])) - __freed_request(rl, sync ^ 1); -} - -int blk_update_nr_requests(struct request_queue *q, unsigned int nr) -{ - struct request_list *rl; - int on_thresh, off_thresh; - - WARN_ON_ONCE(q->mq_ops); - - spin_lock_irq(q->queue_lock); - q->nr_requests = nr; - blk_queue_congestion_threshold(q); - on_thresh = queue_congestion_on_threshold(q); - off_thresh = queue_congestion_off_threshold(q); - - blk_queue_for_each_rl(rl, q) { - if (rl->count[BLK_RW_SYNC] >= on_thresh) - blk_set_congested(rl, BLK_RW_SYNC); - else if (rl->count[BLK_RW_SYNC] < off_thresh) - blk_clear_congested(rl, BLK_RW_SYNC); - - if (rl->count[BLK_RW_ASYNC] >= on_thresh) - blk_set_congested(rl, BLK_RW_ASYNC); - else if (rl->count[BLK_RW_ASYNC] < off_thresh) - blk_clear_congested(rl, BLK_RW_ASYNC); - - if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { - blk_set_rl_full(rl, BLK_RW_SYNC); - } else { - blk_clear_rl_full(rl, BLK_RW_SYNC); - wake_up(&rl->wait[BLK_RW_SYNC]); - } - - if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { - blk_set_rl_full(rl, BLK_RW_ASYNC); - } else { - blk_clear_rl_full(rl, BLK_RW_ASYNC); - wake_up(&rl->wait[BLK_RW_ASYNC]); - } - } - - spin_unlock_irq(q->queue_lock); - return 0; -} - -/** - * __get_request - get a free request - * @rl: request list to allocate from - * @op: operation and flags - * @bio: bio to allocate request for (can be %NULL) - * @flags: BLQ_MQ_REQ_* flags - * @gfp_mask: allocator flags - * - * Get a free request from @q. This function may fail under memory - * pressure or if @q is dead. - * - * Must be called with @q->queue_lock held and, - * Returns ERR_PTR on failure, with @q->queue_lock held. - * Returns request pointer on success, with @q->queue_lock *not held*. - */ -static struct request *__get_request(struct request_list *rl, unsigned int op, - struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask) -{ - struct request_queue *q = rl->q; - struct request *rq; - struct elevator_type *et = q->elevator->type; - struct io_context *ioc = rq_ioc(bio); - struct io_cq *icq = NULL; - const bool is_sync = op_is_sync(op); - int may_queue; - req_flags_t rq_flags = RQF_ALLOCED; - - lockdep_assert_held(q->queue_lock); - - if (unlikely(blk_queue_dying(q))) - return ERR_PTR(-ENODEV); - - may_queue = elv_may_queue(q, op); - if (may_queue == ELV_MQUEUE_NO) - goto rq_starved; - - if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { - if (rl->count[is_sync]+1 >= q->nr_requests) { - /* - * The queue will fill after this allocation, so set - * it as full, and mark this process as "batching". - * This process will be allowed to complete a batch of - * requests, others will be blocked. - */ - if (!blk_rl_full(rl, is_sync)) { - ioc_set_batching(q, ioc); - blk_set_rl_full(rl, is_sync); - } else { - if (may_queue != ELV_MQUEUE_MUST - && !ioc_batching(q, ioc)) { - /* - * The queue is full and the allocating - * process is not a "batcher", and not - * exempted by the IO scheduler - */ - return ERR_PTR(-ENOMEM); - } - } - } - blk_set_congested(rl, is_sync); - } - - /* - * Only allow batching queuers to allocate up to 50% over the defined - * limit of requests, otherwise we could have thousands of requests - * allocated with any setting of ->nr_requests - */ - if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) - return ERR_PTR(-ENOMEM); - - q->nr_rqs[is_sync]++; - rl->count[is_sync]++; - rl->starved[is_sync] = 0; - - /* - * Decide whether the new request will be managed by elevator. If - * so, mark @rq_flags and increment elvpriv. Non-zero elvpriv will - * prevent the current elevator from being destroyed until the new - * request is freed. This guarantees icq's won't be destroyed and - * makes creating new ones safe. - * - * Flush requests do not use the elevator so skip initialization. - * This allows a request to share the flush and elevator data. - * - * Also, lookup icq while holding queue_lock. If it doesn't exist, - * it will be created after releasing queue_lock. - */ - if (!op_is_flush(op) && !blk_queue_bypass(q)) { - rq_flags |= RQF_ELVPRIV; - q->nr_rqs_elvpriv++; - if (et->icq_cache && ioc) - icq = ioc_lookup_icq(ioc, q); - } - - if (blk_queue_io_stat(q)) - rq_flags |= RQF_IO_STAT; - spin_unlock_irq(q->queue_lock); - - /* allocate and init request */ - rq = mempool_alloc(rl->rq_pool, gfp_mask); - if (!rq) - goto fail_alloc; - - blk_rq_init(q, rq); - blk_rq_set_rl(rq, rl); - rq->cmd_flags = op; - rq->rq_flags = rq_flags; - if (flags & BLK_MQ_REQ_PREEMPT) - rq->rq_flags |= RQF_PREEMPT; - - /* init elvpriv */ - if (rq_flags & RQF_ELVPRIV) { - if (unlikely(et->icq_cache && !icq)) { - if (ioc) - icq = ioc_create_icq(ioc, q, gfp_mask); - if (!icq) - goto fail_elvpriv; - } - - rq->elv.icq = icq; - if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) - goto fail_elvpriv; - - /* @rq->elv.icq holds io_context until @rq is freed */ - if (icq) - get_io_context(icq->ioc); - } -out: - /* - * ioc may be NULL here, and ioc_batching will be false. That's - * OK, if the queue is under the request limit then requests need - * not count toward the nr_batch_requests limit. There will always - * be some limit enforced by BLK_BATCH_TIME. - */ - if (ioc_batching(q, ioc)) - ioc->nr_batch_requests--; - - trace_block_getrq(q, bio, op); - return rq; - -fail_elvpriv: - /* - * elvpriv init failed. ioc, icq and elvpriv aren't mempool backed - * and may fail indefinitely under memory pressure and thus - * shouldn't stall IO. Treat this request as !elvpriv. This will - * disturb iosched and blkcg but weird is bettern than dead. - */ - printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", - __func__, dev_name(q->backing_dev_info->dev)); - - rq->rq_flags &= ~RQF_ELVPRIV; - rq->elv.icq = NULL; - - spin_lock_irq(q->queue_lock); - q->nr_rqs_elvpriv--; - spin_unlock_irq(q->queue_lock); - goto out; - -fail_alloc: - /* - * Allocation failed presumably due to memory. Undo anything we - * might have messed up. - * - * Allocating task should really be put onto the front of the wait - * queue, but this is pretty rare. - */ - spin_lock_irq(q->queue_lock); - freed_request(rl, is_sync, rq_flags); - - /* - * in the very unlikely event that allocation failed and no - * requests for this direction was pending, mark us starved so that - * freeing of a request in the other direction will notice - * us. another possible fix would be to split the rq mempool into - * READ and WRITE - */ -rq_starved: - if (unlikely(rl->count[is_sync] == 0)) - rl->starved[is_sync] = 1; - return ERR_PTR(-ENOMEM); -} - -/** - * get_request - get a free request - * @q: request_queue to allocate request from - * @op: operation and flags - * @bio: bio to allocate request for (can be %NULL) - * @flags: BLK_MQ_REQ_* flags. - * @gfp: allocator flags - * - * Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags, - * this function keeps retrying under memory pressure and fails iff @q is dead. - * - * Must be called with @q->queue_lock held and, - * Returns ERR_PTR on failure, with @q->queue_lock held. - * Returns request pointer on success, with @q->queue_lock *not held*. - */ -static struct request *get_request(struct request_queue *q, unsigned int op, - struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp) -{ - const bool is_sync = op_is_sync(op); - DEFINE_WAIT(wait); - struct request_list *rl; - struct request *rq; - - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - rl = blk_get_rl(q, bio); /* transferred to @rq on success */ -retry: - rq = __get_request(rl, op, bio, flags, gfp); - if (!IS_ERR(rq)) - return rq; - - if (op & REQ_NOWAIT) { - blk_put_rl(rl); - return ERR_PTR(-EAGAIN); - } - - if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) { - blk_put_rl(rl); - return rq; - } - - /* wait on @rl and retry */ - prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, - TASK_UNINTERRUPTIBLE); - - trace_block_sleeprq(q, bio, op); - - spin_unlock_irq(q->queue_lock); - io_schedule(); - - /* - * After sleeping, we become a "batching" process and will be able - * to allocate at least one request, and up to a big batch of them - * for a small period time. See ioc_batching, ioc_set_batching - */ - ioc_set_batching(q, current->io_context); - - spin_lock_irq(q->queue_lock); - finish_wait(&rl->wait[is_sync], &wait); - - goto retry; -} - -/* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */ -static struct request *blk_old_get_request(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags) -{ - struct request *rq; - gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO; - int ret = 0; - - WARN_ON_ONCE(q->mq_ops); - - /* create ioc upfront */ - create_io_context(gfp_mask, q->node); - - ret = blk_queue_enter(q, flags); - if (ret) - return ERR_PTR(ret); - spin_lock_irq(q->queue_lock); - rq = get_request(q, op, NULL, flags, gfp_mask); - if (IS_ERR(rq)) { - spin_unlock_irq(q->queue_lock); - blk_queue_exit(q); - return rq; - } - - /* q->queue_lock is unlocked at this point */ - rq->__data_len = 0; - rq->__sector = (sector_t) -1; - rq->bio = rq->biotail = NULL; - return rq; -} - -/** - * blk_get_request - allocate a request - * @q: request queue to allocate a request for - * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC. - * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT. - */ -struct request *blk_get_request(struct request_queue *q, unsigned int op, - blk_mq_req_flags_t flags) -{ - struct request *req; - - WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT)); - - if (q->mq_ops) { - req = blk_mq_alloc_request(q, op, flags); - if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) - q->mq_ops->initialize_rq_fn(req); - } else { - req = blk_old_get_request(q, op, flags); - if (!IS_ERR(req) && q->initialize_rq_fn) - q->initialize_rq_fn(req); - } - - return req; -} -EXPORT_SYMBOL(blk_get_request); - -/** - * blk_requeue_request - put a request back on queue - * @q: request queue where request should be inserted - * @rq: request to be inserted - * - * Description: - * Drivers often keep queueing requests until the hardware cannot accept - * more, when that condition happens we need to put the request back - * on the queue. Must be called with queue lock held. - */ -void blk_requeue_request(struct request_queue *q, struct request *rq) -{ - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - blk_delete_timer(rq); - blk_clear_rq_complete(rq); - trace_block_rq_requeue(q, rq); - rq_qos_requeue(q, rq); - - BUG_ON(blk_queued_rq(rq)); - - elv_requeue_request(q, rq); -} -EXPORT_SYMBOL(blk_requeue_request); - -static void add_acct_request(struct request_queue *q, struct request *rq, - int where) -{ - blk_account_io_start(rq, true); - __elv_add_request(q, rq, where); -} - -static void part_round_stats_single(struct request_queue *q, int cpu, - struct hd_struct *part, unsigned long now, - unsigned int inflight) -{ - if (inflight) { - __part_stat_add(cpu, part, time_in_queue, - inflight * (now - part->stamp)); - __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); - } - part->stamp = now; + part->stamp = now; } /** @@ -1730,61 +805,16 @@ EXPORT_SYMBOL_GPL(part_round_stats); void __blk_put_request(struct request_queue *q, struct request *req) { - req_flags_t rq_flags = req->rq_flags; - if (unlikely(!q)) return; - if (q->mq_ops) { - blk_mq_free_request(req); - return; - } - - lockdep_assert_held(q->queue_lock); - - blk_req_zone_write_unlock(req); - blk_pm_put_request(req); - blk_pm_mark_last_busy(req); - - elv_completed_request(q, req); - - /* this is a bio leak */ - WARN_ON(req->bio != NULL); - - rq_qos_done(q, req); - - /* - * Request may not have originated from ll_rw_blk. if not, - * it didn't come out of our reserved rq pools - */ - if (rq_flags & RQF_ALLOCED) { - struct request_list *rl = blk_rq_rl(req); - bool sync = op_is_sync(req->cmd_flags); - - BUG_ON(!list_empty(&req->queuelist)); - BUG_ON(ELV_ON_HASH(req)); - - blk_free_request(rl, req); - freed_request(rl, sync, rq_flags); - blk_put_rl(rl); - blk_queue_exit(q); - } + blk_mq_free_request(req); } EXPORT_SYMBOL_GPL(__blk_put_request); void blk_put_request(struct request *req) { - struct request_queue *q = req->q; - - if (q->mq_ops) - blk_mq_free_request(req); - else { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - __blk_put_request(q, req); - spin_unlock_irqrestore(q->queue_lock, flags); - } + blk_mq_free_request(req); } EXPORT_SYMBOL(blk_put_request); @@ -1893,10 +923,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, return false; *request_count = 0; - if (q->mq_ops) - plug_list = &plug->mq_list; - else - plug_list = &plug->list; + plug_list = &plug->mq_list; list_for_each_entry_reverse(rq, plug_list, queuelist) { bool merged = false; @@ -1947,11 +974,7 @@ unsigned int blk_plug_queued_count(struct request_queue *q) if (!plug) goto out; - if (q->mq_ops) - plug_list = &plug->mq_list; - else - plug_list = &plug->list; - + plug_list = &plug->mq_list; list_for_each_entry(rq, plug_list, queuelist) { if (rq->q == q) ret++; @@ -1979,133 +1002,6 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio) } EXPORT_SYMBOL_GPL(blk_init_request_from_bio); -static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) -{ - struct blk_plug *plug; - int where = ELEVATOR_INSERT_SORT; - struct request *req, *free; - unsigned int request_count = 0; - - /* - * low level driver can indicate that it wants pages above a - * certain limit bounced to low memory (ie for highmem, or even - * ISA dma in theory) - */ - blk_queue_bounce(q, &bio); - - blk_queue_split(q, &bio); - - if (!bio_integrity_prep(bio)) - return BLK_QC_T_NONE; - - if (op_is_flush(bio->bi_opf)) { - spin_lock_irq(q->queue_lock); - where = ELEVATOR_INSERT_FLUSH; - goto get_rq; - } - - /* - * Check if we can merge with the plugged list before grabbing - * any locks. - */ - if (!blk_queue_nomerges(q)) { - if (blk_attempt_plug_merge(q, bio, &request_count, NULL)) - return BLK_QC_T_NONE; - } else - request_count = blk_plug_queued_count(q); - - spin_lock_irq(q->queue_lock); - - switch (elv_merge(q, &req, bio)) { - case ELEVATOR_BACK_MERGE: - if (!bio_attempt_back_merge(q, req, bio)) - break; - elv_bio_merged(q, req, bio); - free = attempt_back_merge(q, req); - if (free) - __blk_put_request(q, free); - else - elv_merged_request(q, req, ELEVATOR_BACK_MERGE); - goto out_unlock; - case ELEVATOR_FRONT_MERGE: - if (!bio_attempt_front_merge(q, req, bio)) - break; - elv_bio_merged(q, req, bio); - free = attempt_front_merge(q, req); - if (free) - __blk_put_request(q, free); - else - elv_merged_request(q, req, ELEVATOR_FRONT_MERGE); - goto out_unlock; - default: - break; - } - -get_rq: - rq_qos_throttle(q, bio, q->queue_lock); - - /* - * Grab a free request. This is might sleep but can not fail. - * Returns with the queue unlocked. - */ - blk_queue_enter_live(q); - req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); - if (IS_ERR(req)) { - blk_queue_exit(q); - rq_qos_cleanup(q, bio); - if (PTR_ERR(req) == -ENOMEM) - bio->bi_status = BLK_STS_RESOURCE; - else - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); - goto out_unlock; - } - - rq_qos_track(q, req, bio); - - /* - * After dropping the lock and possibly sleeping here, our request - * may now be mergeable after it had proven unmergeable (above). - * We don't worry about that case for efficiency. It won't happen - * often, and the elevators are able to handle it. - */ - blk_init_request_from_bio(req, bio); - - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) - req->cpu = raw_smp_processor_id(); - - plug = current->plug; - if (plug) { - /* - * If this is the first request added after a plug, fire - * of a plug trace. - * - * @request_count may become stale because of schedule - * out, so check plug list again. - */ - if (!request_count || list_empty(&plug->list)) - trace_block_plug(q); - else { - struct request *last = list_entry_rq(plug->list.prev); - if (request_count >= BLK_MAX_REQUEST_COUNT || - blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) { - blk_flush_plug_list(plug, false); - trace_block_plug(q); - } - } - list_add_tail(&req->queuelist, &plug->list); - blk_account_io_start(req, true); - } else { - spin_lock_irq(q->queue_lock); - add_acct_request(q, req, where); - __blk_run_queue(q); -out_unlock: - spin_unlock_irq(q->queue_lock); - } - - return BLK_QC_T_NONE; -} - static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; @@ -2617,9 +1513,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, */ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) { - unsigned long flags; - int where = ELEVATOR_INSERT_BACK; - if (blk_cloned_rq_check_limits(q, rq)) return BLK_STS_IOERR; @@ -2627,38 +1520,15 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; - if (q->mq_ops) { - if (blk_queue_io_stat(q)) - blk_account_io_start(rq, true); - /* - * Since we have a scheduler attached on the top device, - * bypass a potential scheduler on the bottom device for - * insert. - */ - return blk_mq_request_issue_directly(rq); - } - - spin_lock_irqsave(q->queue_lock, flags); - if (unlikely(blk_queue_dying(q))) { - spin_unlock_irqrestore(q->queue_lock, flags); - return BLK_STS_IOERR; - } + if (blk_queue_io_stat(q)) + blk_account_io_start(rq, true); /* - * Submitting request must be dequeued before calling this function - * because it will be linked to another request_queue + * Since we have a scheduler attached on the top device, + * bypass a potential scheduler on the bottom device for + * insert. */ - BUG_ON(blk_queued_rq(rq)); - - if (op_is_flush(rq->cmd_flags)) - where = ELEVATOR_INSERT_FLUSH; - - add_acct_request(q, rq, where); - if (where == ELEVATOR_INSERT_FLUSH) - __blk_run_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - - return BLK_STS_OK; + return blk_mq_request_issue_directly(rq); } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); @@ -2778,225 +1648,6 @@ void blk_account_io_start(struct request *rq, bool new_io) part_stat_unlock(); } -static struct request *elv_next_request(struct request_queue *q) -{ - struct request *rq; - struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); - - WARN_ON_ONCE(q->mq_ops); - - while (1) { - list_for_each_entry(rq, &q->queue_head, queuelist) { -#ifdef CONFIG_PM - /* - * If a request gets queued in state RPM_SUSPENDED - * then that's a kernel bug. - */ - WARN_ON_ONCE(q->rpm_status == RPM_SUSPENDED); -#endif - return rq; - } - - /* - * Flush request is running and flush request isn't queueable - * in the drive, we can hold the queue till flush request is - * finished. Even we don't do this, driver can't dispatch next - * requests and will requeue them. And this can improve - * throughput too. For example, we have request flush1, write1, - * flush 2. flush1 is dispatched, then queue is hold, write1 - * isn't inserted to queue. After flush1 is finished, flush2 - * will be dispatched. Since disk cache is already clean, - * flush2 will be finished very soon, so looks like flush2 is - * folded to flush1. - * Since the queue is hold, a flag is set to indicate the queue - * should be restarted later. Please see flush_end_io() for - * details. - */ - if (fq->flush_pending_idx != fq->flush_running_idx && - !queue_flush_queueable(q)) { - fq->flush_queue_delayed = 1; - return NULL; - } - if (unlikely(blk_queue_bypass(q)) || - !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0)) - return NULL; - } -} - -/** - * blk_peek_request - peek at the top of a request queue - * @q: request queue to peek at - * - * Description: - * Return the request at the top of @q. The returned request - * should be started using blk_start_request() before LLD starts - * processing it. - * - * Return: - * Pointer to the request at the top of @q if available. Null - * otherwise. - */ -struct request *blk_peek_request(struct request_queue *q) -{ - struct request *rq; - int ret; - - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - while ((rq = elv_next_request(q)) != NULL) { - if (!(rq->rq_flags & RQF_STARTED)) { - /* - * This is the first time the device driver - * sees this request (possibly after - * requeueing). Notify IO scheduler. - */ - if (rq->rq_flags & RQF_SORTED) - elv_activate_rq(q, rq); - - /* - * just mark as started even if we don't start - * it, a request that has been delayed should - * not be passed by new incoming requests - */ - rq->rq_flags |= RQF_STARTED; - trace_block_rq_issue(q, rq); - } - - if (!q->boundary_rq || q->boundary_rq == rq) { - q->end_sector = rq_end_sector(rq); - q->boundary_rq = NULL; - } - - if (rq->rq_flags & RQF_DONTPREP) - break; - - if (q->dma_drain_size && blk_rq_bytes(rq)) { - /* - * make sure space for the drain appears we - * know we can do this because max_hw_segments - * has been adjusted to be one fewer than the - * device can handle - */ - rq->nr_phys_segments++; - } - - if (!q->prep_rq_fn) - break; - - ret = q->prep_rq_fn(q, rq); - if (ret == BLKPREP_OK) { - break; - } else if (ret == BLKPREP_DEFER) { - /* - * the request may have been (partially) prepped. - * we need to keep this request in the front to - * avoid resource deadlock. RQF_STARTED will - * prevent other fs requests from passing this one. - */ - if (q->dma_drain_size && blk_rq_bytes(rq) && - !(rq->rq_flags & RQF_DONTPREP)) { - /* - * remove the space for the drain we added - * so that we don't add it again - */ - --rq->nr_phys_segments; - } - - rq = NULL; - break; - } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { - rq->rq_flags |= RQF_QUIET; - /* - * Mark this request as started so we don't trigger - * any debug logic in the end I/O path. - */ - blk_start_request(rq); - __blk_end_request_all(rq, ret == BLKPREP_INVALID ? - BLK_STS_TARGET : BLK_STS_IOERR); - } else { - printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); - break; - } - } - - return rq; -} -EXPORT_SYMBOL(blk_peek_request); - -static void blk_dequeue_request(struct request *rq) -{ - struct request_queue *q = rq->q; - - BUG_ON(list_empty(&rq->queuelist)); - BUG_ON(ELV_ON_HASH(rq)); - - list_del_init(&rq->queuelist); - - /* - * the time frame between a request being removed from the lists - * and to it is freed is accounted as io that is in progress at - * the driver side. - */ - if (blk_account_rq(rq)) - q->in_flight[rq_is_sync(rq)]++; -} - -/** - * blk_start_request - start request processing on the driver - * @req: request to dequeue - * - * Description: - * Dequeue @req and start timeout timer on it. This hands off the - * request to the driver. - */ -void blk_start_request(struct request *req) -{ - lockdep_assert_held(req->q->queue_lock); - WARN_ON_ONCE(req->q->mq_ops); - - blk_dequeue_request(req); - - if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { - req->io_start_time_ns = ktime_get_ns(); -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW - req->throtl_size = blk_rq_sectors(req); -#endif - req->rq_flags |= RQF_STATS; - rq_qos_issue(req->q, req); - } - - BUG_ON(blk_rq_is_complete(req)); - blk_add_timer(req); -} -EXPORT_SYMBOL(blk_start_request); - -/** - * blk_fetch_request - fetch a request from a request queue - * @q: request queue to fetch a request from - * - * Description: - * Return the request at the top of @q. The request is started on - * return and LLD can start processing it immediately. - * - * Return: - * Pointer to the request at the top of @q if available. Null - * otherwise. - */ -struct request *blk_fetch_request(struct request_queue *q) -{ - struct request *rq; - - lockdep_assert_held(q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - rq = blk_peek_request(q); - if (rq) - blk_start_request(rq); - return rq; -} -EXPORT_SYMBOL(blk_fetch_request); - /* * Steal bios from a request and add them to a bio list. * The request must not have been partially completed before. @@ -3122,252 +1773,6 @@ bool blk_update_request(struct request *req, blk_status_t error, } EXPORT_SYMBOL_GPL(blk_update_request); -static bool blk_update_bidi_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes, - unsigned int bidi_bytes) -{ - if (blk_update_request(rq, error, nr_bytes)) - return true; - - /* Bidi request must be completed as a whole */ - if (unlikely(blk_bidi_rq(rq)) && - blk_update_request(rq->next_rq, error, bidi_bytes)) - return true; - - if (blk_queue_add_random(rq->q)) - add_disk_randomness(rq->rq_disk); - - return false; -} - -/** - * blk_unprep_request - unprepare a request - * @req: the request - * - * This function makes a request ready for complete resubmission (or - * completion). It happens only after all error handling is complete, - * so represents the appropriate moment to deallocate any resources - * that were allocated to the request in the prep_rq_fn. The queue - * lock is held when calling this. - */ -void blk_unprep_request(struct request *req) -{ - struct request_queue *q = req->q; - - req->rq_flags &= ~RQF_DONTPREP; - if (q->unprep_rq_fn) - q->unprep_rq_fn(q, req); -} -EXPORT_SYMBOL_GPL(blk_unprep_request); - -void blk_finish_request(struct request *req, blk_status_t error) -{ - struct request_queue *q = req->q; - u64 now = ktime_get_ns(); - - lockdep_assert_held(req->q->queue_lock); - WARN_ON_ONCE(q->mq_ops); - - if (req->rq_flags & RQF_STATS) - blk_stat_add(req, now); - - BUG_ON(blk_queued_rq(req)); - - if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req)) - laptop_io_completion(req->q->backing_dev_info); - - blk_delete_timer(req); - - if (req->rq_flags & RQF_DONTPREP) - blk_unprep_request(req); - - blk_account_io_done(req, now); - - if (req->end_io) { - rq_qos_done(q, req); - req->end_io(req, error); - } else { - if (blk_bidi_rq(req)) - __blk_put_request(req->next_rq->q, req->next_rq); - - __blk_put_request(q, req); - } -} -EXPORT_SYMBOL(blk_finish_request); - -/** - * blk_end_bidi_request - Complete a bidi request - * @rq: the request to complete - * @error: block status code - * @nr_bytes: number of bytes to complete @rq - * @bidi_bytes: number of bytes to complete @rq->next_rq - * - * Description: - * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. - * Drivers that supports bidi can safely call this member for any - * type of request, bidi or uni. In the later case @bidi_bytes is - * just ignored. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -static bool blk_end_bidi_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes, unsigned int bidi_bytes) -{ - struct request_queue *q = rq->q; - unsigned long flags; - - WARN_ON_ONCE(q->mq_ops); - - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) - return true; - - spin_lock_irqsave(q->queue_lock, flags); - blk_finish_request(rq, error); - spin_unlock_irqrestore(q->queue_lock, flags); - - return false; -} - -/** - * __blk_end_bidi_request - Complete a bidi request with queue lock held - * @rq: the request to complete - * @error: block status code - * @nr_bytes: number of bytes to complete @rq - * @bidi_bytes: number of bytes to complete @rq->next_rq - * - * Description: - * Identical to blk_end_bidi_request() except that queue lock is - * assumed to be locked on entry and remains so on return. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -static bool __blk_end_bidi_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes, unsigned int bidi_bytes) -{ - lockdep_assert_held(rq->q->queue_lock); - WARN_ON_ONCE(rq->q->mq_ops); - - if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) - return true; - - blk_finish_request(rq, error); - - return false; -} - -/** - * blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: block status code - * @nr_bytes: number of bytes to complete - * - * Description: - * Ends I/O on a number of bytes attached to @rq. - * If @rq has leftover, sets it up for the next range of segments. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -bool blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes) -{ - WARN_ON_ONCE(rq->q->mq_ops); - return blk_end_bidi_request(rq, error, nr_bytes, 0); -} -EXPORT_SYMBOL(blk_end_request); - -/** - * blk_end_request_all - Helper function for drives to finish the request. - * @rq: the request to finish - * @error: block status code - * - * Description: - * Completely finish @rq. - */ -void blk_end_request_all(struct request *rq, blk_status_t error) -{ - bool pending; - unsigned int bidi_bytes = 0; - - if (unlikely(blk_bidi_rq(rq))) - bidi_bytes = blk_rq_bytes(rq->next_rq); - - pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); - BUG_ON(pending); -} -EXPORT_SYMBOL(blk_end_request_all); - -/** - * __blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: block status code - * @nr_bytes: number of bytes to complete - * - * Description: - * Must be called with queue lock held unlike blk_end_request(). - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - **/ -bool __blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes) -{ - lockdep_assert_held(rq->q->queue_lock); - WARN_ON_ONCE(rq->q->mq_ops); - - return __blk_end_bidi_request(rq, error, nr_bytes, 0); -} -EXPORT_SYMBOL(__blk_end_request); - -/** - * __blk_end_request_all - Helper function for drives to finish the request. - * @rq: the request to finish - * @error: block status code - * - * Description: - * Completely finish @rq. Must be called with queue lock held. - */ -void __blk_end_request_all(struct request *rq, blk_status_t error) -{ - bool pending; - unsigned int bidi_bytes = 0; - - lockdep_assert_held(rq->q->queue_lock); - WARN_ON_ONCE(rq->q->mq_ops); - - if (unlikely(blk_bidi_rq(rq))) - bidi_bytes = blk_rq_bytes(rq->next_rq); - - pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); - BUG_ON(pending); -} -EXPORT_SYMBOL(__blk_end_request_all); - -/** - * __blk_end_request_cur - Helper function to finish the current request chunk. - * @rq: the request to finish the current chunk for - * @error: block status code - * - * Description: - * Complete the current consecutively mapped chunk from @rq. Must - * be called with queue lock held. - * - * Return: - * %false - we are done with this request - * %true - still buffers pending for this request - */ -bool __blk_end_request_cur(struct request *rq, blk_status_t error) -{ - return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); -} -EXPORT_SYMBOL(__blk_end_request_cur); - void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { @@ -3567,7 +1972,6 @@ void blk_start_plug(struct blk_plug *plug) if (tsk->plug) return; - INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); /* @@ -3578,36 +1982,6 @@ void blk_start_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_start_plug); -static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) -{ - struct request *rqa = container_of(a, struct request, queuelist); - struct request *rqb = container_of(b, struct request, queuelist); - - return !(rqa->q < rqb->q || - (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb))); -} - -/* - * If 'from_schedule' is true, then postpone the dispatch of requests - * until a safe kblockd context. We due this to avoid accidental big - * additional stack usage in driver dispatch, in places where the originally - * plugger did not intend it. - */ -static void queue_unplugged(struct request_queue *q, unsigned int depth, - bool from_schedule) - __releases(q->queue_lock) -{ - lockdep_assert_held(q->queue_lock); - - trace_block_unplug(q, depth, !from_schedule); - - if (from_schedule) - blk_run_queue_async(q); - else - __blk_run_queue(q); - spin_unlock_irq(q->queue_lock); -} - static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) { LIST_HEAD(callbacks); @@ -3652,65 +2026,10 @@ EXPORT_SYMBOL(blk_check_plugged); void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { - struct request_queue *q; - struct request *rq; - LIST_HEAD(list); - unsigned int depth; - flush_plug_callbacks(plug, from_schedule); if (!list_empty(&plug->mq_list)) blk_mq_flush_plug_list(plug, from_schedule); - - if (list_empty(&plug->list)) - return; - - list_splice_init(&plug->list, &list); - - list_sort(NULL, &list, plug_rq_cmp); - - q = NULL; - depth = 0; - - while (!list_empty(&list)) { - rq = list_entry_rq(list.next); - list_del_init(&rq->queuelist); - BUG_ON(!rq->q); - if (rq->q != q) { - /* - * This drops the queue lock - */ - if (q) - queue_unplugged(q, depth, from_schedule); - q = rq->q; - depth = 0; - spin_lock_irq(q->queue_lock); - } - - /* - * Short-circuit if @q is dead - */ - if (unlikely(blk_queue_dying(q))) { - __blk_end_request_all(rq, BLK_STS_IOERR); - continue; - } - - /* - * rq is already accounted, so use raw insert - */ - if (op_is_flush(rq->cmd_flags)) - __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); - else - __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); - - depth++; - } - - /* - * This drops the queue lock - */ - if (q) - queue_unplugged(q, depth, from_schedule); } void blk_finish_plug(struct blk_plug *plug) diff --git a/block/blk-exec.c b/block/blk-exec.c index f7b292f12449..a34b7d918742 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -48,8 +48,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, struct request *rq, int at_head, rq_end_io_fn *done) { - int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; - WARN_ON(irqs_disabled()); WARN_ON(!blk_rq_is_passthrough(rq)); @@ -60,23 +58,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * don't check dying flag for MQ because the request won't * be reused after dying flag is set */ - if (q->mq_ops) { - blk_mq_sched_insert_request(rq, at_head, true, false); - return; - } - - spin_lock_irq(q->queue_lock); - - if (unlikely(blk_queue_dying(q))) { - rq->rq_flags |= RQF_QUIET; - __blk_end_request_all(rq, BLK_STS_IOERR); - spin_unlock_irq(q->queue_lock); - return; - } - - __elv_add_request(q, rq, where); - __blk_run_queue(q); - spin_unlock_irq(q->queue_lock); + blk_mq_sched_insert_request(rq, at_head, true, false); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 01580f88fcb3..391128456aec 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -48,10 +48,8 @@ static void ioc_exit_icq(struct io_cq *icq) if (icq->flags & ICQ_EXITED) return; - if (et->uses_mq && et->ops.mq.exit_icq) + if (et->ops.mq.exit_icq) et->ops.mq.exit_icq(icq); - else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn) - et->ops.sq.elevator_exit_icq_fn(icq); icq->flags |= ICQ_EXITED; } @@ -187,25 +185,13 @@ void put_io_context_active(struct io_context *ioc) * reverse double locking. Read comment in ioc_release_fn() for * explanation on the nested locking annotation. */ -retry: spin_lock_irqsave_nested(&ioc->lock, flags, 1); hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) { if (icq->flags & ICQ_EXITED) continue; et = icq->q->elevator->type; - if (et->uses_mq) { - ioc_exit_icq(icq); - } else { - if (spin_trylock(icq->q->queue_lock)) { - ioc_exit_icq(icq); - spin_unlock(icq->q->queue_lock); - } else { - spin_unlock_irqrestore(&ioc->lock, flags); - cpu_relax(); - goto retry; - } - } + ioc_exit_icq(icq); } spin_unlock_irqrestore(&ioc->lock, flags); @@ -232,7 +218,7 @@ static void __ioc_clear_queue(struct list_head *icq_list) while (!list_empty(icq_list)) { struct io_cq *icq = list_entry(icq_list->next, - struct io_cq, q_node); + struct io_cq, q_node); struct io_context *ioc = icq->ioc; spin_lock_irqsave(&ioc->lock, flags); @@ -253,14 +239,9 @@ void ioc_clear_queue(struct request_queue *q) spin_lock_irq(q->queue_lock); list_splice_init(&q->icq_list, &icq_list); + spin_unlock_irq(q->queue_lock); - if (q->mq_ops) { - spin_unlock_irq(q->queue_lock); - __ioc_clear_queue(&icq_list); - } else { - __ioc_clear_queue(&icq_list); - spin_unlock_irq(q->queue_lock); - } + __ioc_clear_queue(&icq_list); } int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node) @@ -415,10 +396,8 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); - if (et->uses_mq && et->ops.mq.init_icq) + if (et->ops.mq.init_icq) et->ops.mq.init_icq(icq); - else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn) - et->ops.sq.elevator_init_icq_fn(icq); } else { kmem_cache_free(et->icq_cache, icq); icq = ioc_lookup_icq(ioc, q); diff --git a/block/blk-merge.c b/block/blk-merge.c index 6b5ad275ed56..c068c30b0c35 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -862,13 +862,8 @@ struct request *attempt_front_merge(struct request_queue *q, struct request *rq) int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next) { - struct elevator_queue *e = q->elevator; struct request *free; - if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) - if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) - return 0; - free = attempt_merge(q, rq, next); if (free) { __blk_put_request(q, free); diff --git a/block/blk-settings.c b/block/blk-settings.c index ac8b8ba4b126..39c3c301a687 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -20,40 +20,6 @@ EXPORT_SYMBOL(blk_max_low_pfn); unsigned long blk_max_pfn; -/** - * blk_queue_prep_rq - set a prepare_request function for queue - * @q: queue - * @pfn: prepare_request function - * - * It's possible for a queue to register a prepare_request callback which - * is invoked before the request is handed to the request_fn. The goal of - * the function is to prepare a request for I/O, it can be used to build a - * cdb from the request data for instance. - * - */ -void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) -{ - q->prep_rq_fn = pfn; -} -EXPORT_SYMBOL(blk_queue_prep_rq); - -/** - * blk_queue_unprep_rq - set an unprepare_request function for queue - * @q: queue - * @ufn: unprepare_request function - * - * It's possible for a queue to register an unprepare_request callback - * which is invoked before the request is finally completed. The goal - * of the function is to deallocate any data that was allocated in the - * prepare_request callback. - * - */ -void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn) -{ - q->unprep_rq_fn = ufn; -} -EXPORT_SYMBOL(blk_queue_unprep_rq); - void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) { q->softirq_done_fn = fn; @@ -163,8 +129,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->make_request_fn = mfn; blk_queue_dma_alignment(q, 511); - blk_queue_congestion_threshold(q); - q->nr_batching = BLK_BATCH_REQ; blk_set_default_limits(&q->limits); } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 1b82ccfde3fe..d4b1b84ba8ca 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -68,7 +68,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) unsigned long nr; int ret, err; - if (!q->request_fn && !q->mq_ops) + if (!q->mq_ops) return -EINVAL; ret = queue_var_store(&nr, page, count); @@ -78,11 +78,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) if (nr < BLKDEV_MIN_RQ) nr = BLKDEV_MIN_RQ; - if (q->request_fn) - err = blk_update_nr_requests(q, nr); - else - err = blk_mq_update_nr_requests(q, nr); - + err = blk_mq_update_nr_requests(q, nr); if (err) return err; @@ -463,20 +459,14 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, * ends up either enabling or disabling wbt completely. We can't * have IO inflight if that happens. */ - if (q->mq_ops) { - blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); - } else - blk_queue_bypass_start(q); + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); wbt_set_min_lat(q, val); wbt_update_limits(q); - if (q->mq_ops) { - blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); - } else - blk_queue_bypass_end(q); + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); return count; } @@ -847,17 +837,10 @@ static void __blk_release_queue(struct work_struct *work) blk_free_queue_stats(q->stats); - blk_exit_rl(q, &q->root_rl); - blk_queue_free_zone_bitmaps(q); - if (!q->mq_ops) { - if (q->exit_rq_fn) - q->exit_rq_fn(q, q->fq->flush_rq); - blk_free_flush_queue(q->fq); - } else { + if (q->mq_ops) blk_mq_release(q); - } blk_trace_shutdown(q); @@ -920,7 +903,6 @@ int blk_register_queue(struct gendisk *disk) if (!blk_queue_init_done(q)) { queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); percpu_ref_switch_to_percpu(&q->q_usage_counter); - blk_queue_bypass_end(q); } ret = blk_trace_init_sysfs(dev); @@ -947,7 +929,7 @@ int blk_register_queue(struct gendisk *disk) blk_throtl_register_queue(q); - if (q->request_fn || (q->mq_ops && q->elevator)) { + if ((q->mq_ops && q->elevator)) { ret = elv_register_queue(q); if (ret) { mutex_unlock(&q->sysfs_lock); @@ -1005,7 +987,7 @@ void blk_unregister_queue(struct gendisk *disk) blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); - if (q->request_fn || (q->mq_ops && q->elevator)) + if (q->mq_ops && q->elevator) elv_unregister_queue(q); mutex_unlock(&q->sysfs_lock); diff --git a/block/blk.h b/block/blk.h index 57a302bf5a70..e2604ae7ddfa 100644 --- a/block/blk.h +++ b/block/blk.h @@ -7,12 +7,6 @@ #include #include "blk-mq.h" -/* Amount of time in which a process may batch requests */ -#define BLK_BATCH_TIME (HZ/50UL) - -/* Number of requests a "batching" process may submit */ -#define BLK_BATCH_REQ 32 - /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) @@ -132,9 +126,6 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl); void blk_exit_queue(struct request_queue *q); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); -void blk_queue_bypass_start(struct request_queue *q); -void blk_queue_bypass_end(struct request_queue *q); -void __blk_queue_free_tags(struct request_queue *q); void blk_freeze_queue(struct request_queue *q); static inline void blk_queue_enter_live(struct request_queue *q) @@ -281,23 +272,6 @@ static inline bool blk_rq_is_complete(struct request *rq) void blk_insert_flush(struct request *rq); -static inline void elv_activate_rq(struct request_queue *q, struct request *rq) -{ - struct elevator_queue *e = q->elevator; - - if (e->type->ops.sq.elevator_activate_req_fn) - e->type->ops.sq.elevator_activate_req_fn(q, rq); -} - -static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) -{ - struct elevator_queue *e = q->elevator; - - if (e->type->ops.sq.elevator_deactivate_req_fn) - e->type->ops.sq.elevator_deactivate_req_fn(q, rq); -} - -int elevator_init(struct request_queue *); int elevator_init_mq(struct request_queue *q); int elevator_switch_mq(struct request_queue *q, struct elevator_type *new_e); @@ -332,31 +306,8 @@ void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); -void blk_queue_congestion_threshold(struct request_queue *q); - int blk_dev_init(void); - -/* - * Return the threshold (number of used requests) at which the queue is - * considered to be congested. It include a little hysteresis to keep the - * context switch rate down. - */ -static inline int queue_congestion_on_threshold(struct request_queue *q) -{ - return q->nr_congestion_on; -} - -/* - * The threshold at which a queue is considered to be uncongested - */ -static inline int queue_congestion_off_threshold(struct request_queue *q) -{ - return q->nr_congestion_off; -} - -extern int blk_update_nr_requests(struct request_queue *, unsigned int); - /* * Contribute to IO statistics IFF: * @@ -478,8 +429,6 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_BOUNCE */ -extern void blk_drain_queue(struct request_queue *q); - #ifdef CONFIG_BLK_CGROUP_IOLATENCY extern int blk_iolatency_init(struct request_queue *q); #else diff --git a/block/elevator.c b/block/elevator.c index 54e1adac26c5..334097c54b08 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -61,10 +61,8 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e->uses_mq && e->type->ops.mq.allow_merge) + if (e->type->ops.mq.allow_merge) return e->type->ops.mq.allow_merge(q, rq, bio); - else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn) - return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio); return 1; } @@ -95,14 +93,14 @@ static bool elevator_match(const struct elevator_type *e, const char *name) } /* - * Return scheduler with name 'name' and with matching 'mq capability + * Return scheduler with name 'name' */ -static struct elevator_type *elevator_find(const char *name, bool mq) +static struct elevator_type *elevator_find(const char *name) { struct elevator_type *e; list_for_each_entry(e, &elv_list, list) { - if (elevator_match(e, name) && (mq == e->uses_mq)) + if (elevator_match(e, name)) return e; } @@ -121,12 +119,12 @@ static struct elevator_type *elevator_get(struct request_queue *q, spin_lock(&elv_list_lock); - e = elevator_find(name, q->mq_ops != NULL); + e = elevator_find(name); if (!e && try_loading) { spin_unlock(&elv_list_lock); request_module("%s-iosched", name); spin_lock(&elv_list_lock); - e = elevator_find(name, q->mq_ops != NULL); + e = elevator_find(name); } if (e && !try_module_get(e->elevator_owner)) @@ -150,26 +148,6 @@ static int __init elevator_setup(char *str) __setup("elevator=", elevator_setup); -/* called during boot to load the elevator chosen by the elevator param */ -void __init load_default_elevator_module(void) -{ - struct elevator_type *e; - - if (!chosen_elevator[0]) - return; - - /* - * Boot parameter is deprecated, we haven't supported that for MQ. - * Only look for non-mq schedulers from here. - */ - spin_lock(&elv_list_lock); - e = elevator_find(chosen_elevator, false); - spin_unlock(&elv_list_lock); - - if (!e) - request_module("%s-iosched", chosen_elevator); -} - static struct kobj_type elv_ktype; struct elevator_queue *elevator_alloc(struct request_queue *q, @@ -185,7 +163,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); hash_init(eq->hash); - eq->uses_mq = e->uses_mq; return eq; } @@ -200,52 +177,11 @@ static void elevator_release(struct kobject *kobj) kfree(e); } -/* - * Use the default elevator specified by config boot param for non-mq devices, - * or by config option. Don't try to load modules as we could be running off - * async and request_module() isn't allowed from async. - */ -int elevator_init(struct request_queue *q) -{ - struct elevator_type *e = NULL; - int err = 0; - - /* - * q->sysfs_lock must be held to provide mutual exclusion between - * elevator_switch() and here. - */ - mutex_lock(&q->sysfs_lock); - if (unlikely(q->elevator)) - goto out_unlock; - - if (*chosen_elevator) { - e = elevator_get(q, chosen_elevator, false); - if (!e) - printk(KERN_ERR "I/O scheduler %s not found\n", - chosen_elevator); - } - - if (!e) { - printk(KERN_ERR - "Default I/O scheduler not found. Using noop.\n"); - e = elevator_get(q, "noop", false); - } - - err = e->ops.sq.elevator_init_fn(q, e); - if (err) - elevator_put(e); -out_unlock: - mutex_unlock(&q->sysfs_lock); - return err; -} - void elevator_exit(struct request_queue *q, struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); - if (e->uses_mq && e->type->ops.mq.exit_sched) + if (e->type->ops.mq.exit_sched) blk_mq_exit_sched(q, e); - else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) - e->type->ops.sq.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); kobject_put(&e->kobj); @@ -393,10 +329,8 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req, return ELEVATOR_BACK_MERGE; } - if (e->uses_mq && e->type->ops.mq.request_merge) + if (e->type->ops.mq.request_merge) return e->type->ops.mq.request_merge(q, req, bio); - else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn) - return e->type->ops.sq.elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } @@ -447,10 +381,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->uses_mq && e->type->ops.mq.request_merged) + if (e->type->ops.mq.request_merged) e->type->ops.mq.request_merged(q, rq, type); - else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn) - e->type->ops.sq.elevator_merged_fn(q, rq, type); if (type == ELEVATOR_BACK_MERGE) elv_rqhash_reposition(q, rq); @@ -464,13 +396,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, struct elevator_queue *e = q->elevator; bool next_sorted = false; - if (e->uses_mq && e->type->ops.mq.requests_merged) + if (e->type->ops.mq.requests_merged) e->type->ops.mq.requests_merged(q, rq, next); - else if (e->type->ops.sq.elevator_merge_req_fn) { - next_sorted = (__force bool)(next->rq_flags & RQF_SORTED); - if (next_sorted) - e->type->ops.sq.elevator_merge_req_fn(q, rq, next); - } elv_rqhash_reposition(q, rq); @@ -482,156 +409,12 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, q->last_merge = rq; } -void elv_bio_merged(struct request_queue *q, struct request *rq, - struct bio *bio) -{ - struct elevator_queue *e = q->elevator; - - if (WARN_ON_ONCE(e->uses_mq)) - return; - - if (e->type->ops.sq.elevator_bio_merged_fn) - e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); -} - -void elv_requeue_request(struct request_queue *q, struct request *rq) -{ - /* - * it already went through dequeue, we need to decrement the - * in_flight count again - */ - if (blk_account_rq(rq)) { - q->in_flight[rq_is_sync(rq)]--; - if (rq->rq_flags & RQF_SORTED) - elv_deactivate_rq(q, rq); - } - - rq->rq_flags &= ~RQF_STARTED; - - blk_pm_requeue_request(rq); - - __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); -} - -void elv_drain_elevator(struct request_queue *q) -{ - struct elevator_queue *e = q->elevator; - static int printed; - - if (WARN_ON_ONCE(e->uses_mq)) - return; - - lockdep_assert_held(q->queue_lock); - - while (e->type->ops.sq.elevator_dispatch_fn(q, 1)) - ; - if (q->nr_sorted && !blk_queue_is_zoned(q) && printed++ < 10 ) { - printk(KERN_ERR "%s: forced dispatching is broken " - "(nr_sorted=%u), please report this\n", - q->elevator->type->elevator_name, q->nr_sorted); - } -} - -void __elv_add_request(struct request_queue *q, struct request *rq, int where) -{ - trace_block_rq_insert(q, rq); - - blk_pm_add_request(q, rq); - - rq->q = q; - - if (rq->rq_flags & RQF_SOFTBARRIER) { - /* barriers are scheduling boundary, update end_sector */ - if (!blk_rq_is_passthrough(rq)) { - q->end_sector = rq_end_sector(rq); - q->boundary_rq = rq; - } - } else if (!(rq->rq_flags & RQF_ELVPRIV) && - (where == ELEVATOR_INSERT_SORT || - where == ELEVATOR_INSERT_SORT_MERGE)) - where = ELEVATOR_INSERT_BACK; - - switch (where) { - case ELEVATOR_INSERT_REQUEUE: - case ELEVATOR_INSERT_FRONT: - rq->rq_flags |= RQF_SOFTBARRIER; - list_add(&rq->queuelist, &q->queue_head); - break; - - case ELEVATOR_INSERT_BACK: - rq->rq_flags |= RQF_SOFTBARRIER; - elv_drain_elevator(q); - list_add_tail(&rq->queuelist, &q->queue_head); - /* - * We kick the queue here for the following reasons. - * - The elevator might have returned NULL previously - * to delay requests and returned them now. As the - * queue wasn't empty before this request, ll_rw_blk - * won't run the queue on return, resulting in hang. - * - Usually, back inserted requests won't be merged - * with anything. There's no point in delaying queue - * processing. - */ - __blk_run_queue(q); - break; - - case ELEVATOR_INSERT_SORT_MERGE: - /* - * If we succeed in merging this request with one in the - * queue already, we are done - rq has now been freed, - * so no need to do anything further. - */ - if (elv_attempt_insert_merge(q, rq)) - break; - /* fall through */ - case ELEVATOR_INSERT_SORT: - BUG_ON(blk_rq_is_passthrough(rq)); - rq->rq_flags |= RQF_SORTED; - q->nr_sorted++; - if (rq_mergeable(rq)) { - elv_rqhash_add(q, rq); - if (!q->last_merge) - q->last_merge = rq; - } - - /* - * Some ioscheds (cfq) run q->request_fn directly, so - * rq cannot be accessed after calling - * elevator_add_req_fn. - */ - q->elevator->type->ops.sq.elevator_add_req_fn(q, rq); - break; - - case ELEVATOR_INSERT_FLUSH: - rq->rq_flags |= RQF_SOFTBARRIER; - blk_insert_flush(rq); - break; - default: - printk(KERN_ERR "%s: bad insertion point %d\n", - __func__, where); - BUG(); - } -} -EXPORT_SYMBOL(__elv_add_request); - -void elv_add_request(struct request_queue *q, struct request *rq, int where) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - __elv_add_request(q, rq, where); - spin_unlock_irqrestore(q->queue_lock, flags); -} -EXPORT_SYMBOL(elv_add_request); - struct request *elv_latter_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->uses_mq && e->type->ops.mq.next_request) + if (e->type->ops.mq.next_request) return e->type->ops.mq.next_request(q, rq); - else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn) - return e->type->ops.sq.elevator_latter_req_fn(q, rq); return NULL; } @@ -640,66 +423,10 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->uses_mq && e->type->ops.mq.former_request) + if (e->type->ops.mq.former_request) return e->type->ops.mq.former_request(q, rq); - if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn) - return e->type->ops.sq.elevator_former_req_fn(q, rq); - return NULL; -} - -int elv_set_request(struct request_queue *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask) -{ - struct elevator_queue *e = q->elevator; - - if (WARN_ON_ONCE(e->uses_mq)) - return 0; - if (e->type->ops.sq.elevator_set_req_fn) - return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask); - return 0; -} - -void elv_put_request(struct request_queue *q, struct request *rq) -{ - struct elevator_queue *e = q->elevator; - - if (WARN_ON_ONCE(e->uses_mq)) - return; - - if (e->type->ops.sq.elevator_put_req_fn) - e->type->ops.sq.elevator_put_req_fn(rq); -} - -int elv_may_queue(struct request_queue *q, unsigned int op) -{ - struct elevator_queue *e = q->elevator; - - if (WARN_ON_ONCE(e->uses_mq)) - return 0; - - if (e->type->ops.sq.elevator_may_queue_fn) - return e->type->ops.sq.elevator_may_queue_fn(q, op); - - return ELV_MQUEUE_MAY; -} - -void elv_completed_request(struct request_queue *q, struct request *rq) -{ - struct elevator_queue *e = q->elevator; - - if (WARN_ON_ONCE(e->uses_mq)) - return; - - /* - * request is released from the driver, io must be done - */ - if (blk_account_rq(rq)) { - q->in_flight[rq_is_sync(rq)]--; - if ((rq->rq_flags & RQF_SORTED) && - e->type->ops.sq.elevator_completed_req_fn) - e->type->ops.sq.elevator_completed_req_fn(q, rq); - } + return NULL; } #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) @@ -768,8 +495,6 @@ int elv_register_queue(struct request_queue *q) } kobject_uevent(&e->kobj, KOBJ_ADD); e->registered = 1; - if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) - e->type->ops.sq.elevator_registered_fn(q); } return error; } @@ -809,7 +534,7 @@ int elv_register(struct elevator_type *e) /* register, don't allow duplicate names */ spin_lock(&elv_list_lock); - if (elevator_find(e->elevator_name, e->uses_mq)) { + if (elevator_find(e->elevator_name)) { spin_unlock(&elv_list_lock); kmem_cache_destroy(e->icq_cache); return -EBUSY; @@ -919,71 +644,17 @@ out_unlock: */ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { - struct elevator_queue *old = q->elevator; - bool old_registered = false; int err; lockdep_assert_held(&q->sysfs_lock); - if (q->mq_ops) { - blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); - - err = elevator_switch_mq(q, new_e); - - blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); - - return err; - } - - /* - * Turn on BYPASS and drain all requests w/ elevator private data. - * Block layer doesn't call into a quiesced elevator - all requests - * are directly put on the dispatch list without elevator data - * using INSERT_BACK. All requests have SOFTBARRIER set and no - * merge happens either. - */ - if (old) { - old_registered = old->registered; - - blk_queue_bypass_start(q); - - /* unregister and clear all auxiliary data of the old elevator */ - if (old_registered) - elv_unregister_queue(q); - - ioc_clear_queue(q); - } - - /* allocate, init and register new elevator */ - err = new_e->ops.sq.elevator_init_fn(q, new_e); - if (err) - goto fail_init; - - err = elv_register_queue(q); - if (err) - goto fail_register; - - /* done, kill the old one and finish */ - if (old) { - elevator_exit(q, old); - blk_queue_bypass_end(q); - } - - blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); - return 0; + err = elevator_switch_mq(q, new_e); -fail_register: - elevator_exit(q, q->elevator); -fail_init: - /* switch failed, restore and re-register old elevator */ - if (old) { - q->elevator = old; - elv_register_queue(q); - blk_queue_bypass_end(q); - } + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); return err; } @@ -1032,7 +703,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, { int ret; - if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q)) + if (!q->mq_ops || !elv_support_iosched(q)) return count; ret = __elevator_change(q, name); @@ -1047,7 +718,6 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) struct elevator_queue *e = q->elevator; struct elevator_type *elv = NULL; struct elevator_type *__e; - bool uses_mq = q->mq_ops != NULL; int len = 0; if (!queue_is_rq_based(q)) @@ -1060,14 +730,11 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) spin_lock(&elv_list_lock); list_for_each_entry(__e, &elv_list, list) { - if (elv && elevator_match(elv, __e->elevator_name) && - (__e->uses_mq == uses_mq)) { + if (elv && elevator_match(elv, __e->elevator_name)) { len += sprintf(name+len, "[%s] ", elv->elevator_name); continue; } - if (__e->uses_mq && q->mq_ops && elv_support_iosched(q)) - len += sprintf(name+len, "%s ", __e->elevator_name); - else if (!__e->uses_mq && !q->mq_ops) + if (elv_support_iosched(q)) len += sprintf(name+len, "%s ", __e->elevator_name); } spin_unlock(&elv_list_lock); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index eccac01a10b6..728757a34fa0 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -1032,7 +1032,6 @@ static struct elevator_type kyber_sched = { .dispatch_request = kyber_dispatch_request, .has_work = kyber_has_work, }, - .uses_mq = true, #ifdef CONFIG_BLK_DEBUG_FS .queue_debugfs_attrs = kyber_queue_debugfs_attrs, .hctx_debugfs_attrs = kyber_hctx_debugfs_attrs, diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 099a9e05854c..513edefd10fd 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -777,7 +777,6 @@ static struct elevator_type mq_deadline = { .exit_sched = dd_exit_queue, }, - .uses_mq = true, #ifdef CONFIG_BLK_DEBUG_FS .queue_debugfs_attrs = deadline_queue_debugfs_attrs, #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8afe3331777e..a9f6db8abcda 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -58,9 +58,6 @@ struct blk_stat_callback; typedef void (rq_end_io_fn)(struct request *, blk_status_t); -#define BLK_RL_SYNCFULL (1U << 0) -#define BLK_RL_ASYNCFULL (1U << 1) - struct request_list { struct request_queue *q; /* the queue this rl belongs to */ #ifdef CONFIG_BLK_CGROUP @@ -309,11 +306,8 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; -typedef void (request_fn_proc) (struct request_queue *q); typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); -typedef int (prep_rq_fn) (struct request_queue *, struct request *); -typedef void (unprep_rq_fn) (struct request_queue *, struct request *); struct bio_vec; typedef void (softirq_done_fn)(struct request *); @@ -432,8 +426,6 @@ struct request_queue { struct list_head queue_head; struct request *last_merge; struct elevator_queue *elevator; - int nr_rqs[2]; /* # allocated [a]sync rqs */ - int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ struct blk_queue_stats *stats; struct rq_qos *rq_qos; @@ -446,11 +438,8 @@ struct request_queue { */ struct request_list root_rl; - request_fn_proc *request_fn; make_request_fn *make_request_fn; poll_q_fn *poll_fn; - prep_rq_fn *prep_rq_fn; - unprep_rq_fn *unprep_rq_fn; softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; @@ -458,8 +447,6 @@ struct request_queue { init_rq_fn *init_rq_fn; /* Called just before a request is freed */ exit_rq_fn *exit_rq_fn; - /* Called from inside blk_get_request() */ - void (*initialize_rq_fn)(struct request *rq); const struct blk_mq_ops *mq_ops; @@ -475,17 +462,6 @@ struct request_queue { struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; - /* - * Dispatch queue sorting - */ - sector_t end_sector; - struct request *boundary_rq; - - /* - * Delayed queue handling - */ - struct delayed_work delay_work; - struct backing_dev_info *backing_dev_info; /* @@ -548,9 +524,6 @@ struct request_queue { * queue settings */ unsigned long nr_requests; /* Max # of requests */ - unsigned int nr_congestion_on; - unsigned int nr_congestion_off; - unsigned int nr_batching; unsigned int dma_drain_size; void *dma_drain_buffer; @@ -560,13 +533,6 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; - /* - * Number of active block driver functions for which blk_drain_queue() - * must wait. Must be incremented around functions that unlock the - * queue_lock internally, e.g. scsi_request_fn(). - */ - unsigned int request_fn_active; - unsigned int rq_timeout; int poll_nsec; @@ -740,11 +706,6 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -static inline int queue_in_flight(struct request_queue *q) -{ - return q->in_flight[0] + q->in_flight[1]; -} - static inline bool blk_account_rq(struct request *rq) { return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); @@ -765,7 +726,7 @@ static inline bool blk_account_rq(struct request *rq) */ static inline bool queue_is_rq_based(struct request_queue *q) { - return q->request_fn || q->mq_ops; + return q->mq_ops; } static inline unsigned int blk_queue_cluster(struct request_queue *q) @@ -828,27 +789,6 @@ static inline bool rq_is_sync(struct request *rq) return op_is_sync(rq->cmd_flags); } -static inline bool blk_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - return rl->flags & flag; -} - -static inline void blk_set_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags |= flag; -} - -static inline void blk_clear_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags &= ~flag; -} - static inline bool rq_mergeable(struct request *rq) { if (blk_rq_is_passthrough(rq)) @@ -969,7 +909,6 @@ extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); -extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, @@ -979,7 +918,6 @@ extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); -extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); @@ -992,15 +930,7 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); -extern void blk_start_queue(struct request_queue *q); -extern void blk_start_queue_async(struct request_queue *q); -extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *q); -extern void __blk_run_queue_uncond(struct request_queue *q); -extern void blk_run_queue(struct request_queue *); -extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); @@ -1155,13 +1085,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq) return nr_bios; } -/* - * Request issue related functions. - */ -extern struct request *blk_peek_request(struct request_queue *q); -extern void blk_start_request(struct request *rq); -extern struct request *blk_fetch_request(struct request_queue *q); - void blk_steal_bios(struct bio_list *list, struct request *rq); /* @@ -1179,9 +1102,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_finish_request(struct request *rq, blk_status_t error); -extern bool blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); @@ -1190,15 +1110,10 @@ extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -extern void blk_unprep_request(struct request *); /* * Access functions for manipulating queue properties */ -extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, - spinlock_t *lock, int node_id); -extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); -extern int blk_init_allocated_queue(struct request_queue *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); @@ -1239,8 +1154,6 @@ extern int blk_queue_dma_drain(struct request_queue *q, void *buf, unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); -extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); -extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); @@ -1298,7 +1211,6 @@ extern void blk_set_queue_dying(struct request_queue *); * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - struct list_head list; /* requests */ struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ }; @@ -1339,8 +1251,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) struct blk_plug *plug = tsk->plug; return plug && - (!list_empty(&plug->list) || - !list_empty(&plug->mq_list) || + (!list_empty(&plug->mq_list) || !list_empty(&plug->cb_list)); } diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 015bb59c0331..158004f1754d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -23,74 +23,6 @@ enum elv_merge { ELEVATOR_DISCARD_MERGE = 3, }; -typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **, - struct bio *); - -typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *); - -typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge); - -typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_allow_rq_merge_fn) (struct request_queue *, - struct request *, struct request *); - -typedef void (elevator_bio_merged_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_dispatch_fn) (struct request_queue *, int); - -typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); -typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); -typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int); - -typedef void (elevator_init_icq_fn) (struct io_cq *); -typedef void (elevator_exit_icq_fn) (struct io_cq *); -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, - struct bio *, gfp_t); -typedef void (elevator_put_req_fn) (struct request *); -typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); -typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); - -typedef int (elevator_init_fn) (struct request_queue *, - struct elevator_type *e); -typedef void (elevator_exit_fn) (struct elevator_queue *); -typedef void (elevator_registered_fn) (struct request_queue *); - -struct elevator_ops -{ - elevator_merge_fn *elevator_merge_fn; - elevator_merged_fn *elevator_merged_fn; - elevator_merge_req_fn *elevator_merge_req_fn; - elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn; - elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn; - elevator_bio_merged_fn *elevator_bio_merged_fn; - - elevator_dispatch_fn *elevator_dispatch_fn; - elevator_add_req_fn *elevator_add_req_fn; - elevator_activate_req_fn *elevator_activate_req_fn; - elevator_deactivate_req_fn *elevator_deactivate_req_fn; - - elevator_completed_req_fn *elevator_completed_req_fn; - - elevator_request_list_fn *elevator_former_req_fn; - elevator_request_list_fn *elevator_latter_req_fn; - - elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */ - elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */ - - elevator_set_req_fn *elevator_set_req_fn; - elevator_put_req_fn *elevator_put_req_fn; - - elevator_may_queue_fn *elevator_may_queue_fn; - - elevator_init_fn *elevator_init_fn; - elevator_exit_fn *elevator_exit_fn; - elevator_registered_fn *elevator_registered_fn; -}; - struct blk_mq_alloc_data; struct blk_mq_hw_ctx; @@ -138,16 +70,15 @@ struct elevator_type /* fields provided by elevator implementation */ union { - struct elevator_ops sq; struct elevator_mq_ops mq; } ops; + size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; const char *elevator_alias; struct module *elevator_owner; - bool uses_mq; #ifdef CONFIG_BLK_DEBUG_FS const struct blk_mq_debugfs_attr *queue_debugfs_attrs; const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; @@ -175,40 +106,25 @@ struct elevator_queue struct kobject kobj; struct mutex sysfs_lock; unsigned int registered:1; - unsigned int uses_mq:1; DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* * block elevator interface */ -extern void elv_dispatch_sort(struct request_queue *, struct request *); -extern void elv_dispatch_add_tail(struct request_queue *, struct request *); -extern void elv_add_request(struct request_queue *, struct request *, int); -extern void __elv_add_request(struct request_queue *, struct request *, int); extern enum elv_merge elv_merge(struct request_queue *, struct request **, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, enum elv_merge); -extern void elv_bio_merged(struct request_queue *q, struct request *, - struct bio *); extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); -extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); -extern int elv_may_queue(struct request_queue *, unsigned int); -extern void elv_completed_request(struct request_queue *, struct request *); -extern int elv_set_request(struct request_queue *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask); -extern void elv_put_request(struct request_queue *, struct request *); -extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration */ -extern void __init load_default_elevator_module(void); extern int elv_register(struct elevator_type *); extern void elv_unregister(struct elevator_type *); @@ -260,9 +176,5 @@ enum { #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) -#else /* CONFIG_BLOCK */ - -static inline void load_default_elevator_module(void) { } - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/init.h b/include/linux/init.h index 9c2aba1dbabf..5255069f5a9f 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -146,7 +146,6 @@ extern unsigned int reset_devices; /* used by init/main.c */ void setup_arch(char **); void prepare_namespace(void); -void __init load_default_modules(void); int __init init_rootfs(void); #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index d1a5d885ce13..73e02ea5d5d1 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -53,9 +53,6 @@ static void __init handle_initrd(void) ksys_mkdir("/old", 0700); ksys_chdir("/old"); - /* try loading default modules from initrd */ - load_default_modules(); - /* * In case that a resume from disk is carried out by linuxrc or one of * its children, we need to tell the freezer not to wait for us. diff --git a/init/initramfs.c b/init/initramfs.c index 640557788026..96af18fec4d0 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -644,12 +644,6 @@ static int __init populate_rootfs(void) #endif } flush_delayed_fput(); - /* - * Try loading default modules from initramfs. This gives - * us a chance to load before device_initcalls. - */ - load_default_modules(); - return 0; } rootfs_initcall(populate_rootfs); diff --git a/init/main.c b/init/main.c index ee147103ba1b..ca0cdb0c388b 100644 --- a/init/main.c +++ b/init/main.c @@ -996,17 +996,6 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(initcall_from_entry(fn)); } -/* - * This function requests modules which should be loaded by default and is - * called twice right after initrd is mounted and right before init is - * exec'd. If such modules are on either initrd or rootfs, they will be - * loaded before control is passed to userland. - */ -void __init load_default_modules(void) -{ - load_default_elevator_module(); -} - static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; @@ -1180,5 +1169,4 @@ static noinline void __init kernel_init_freeable(void) */ integrity_load_keys(); - load_default_modules(); } -- cgit v1.2.3 From f9cd4bfe96955e7a1d3ec54b393dee87b815ba3b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Nov 2018 16:41:41 -0600 Subject: block: get rid of MQ scheduler ops union This is a remnant of when we had ops for both SQ and MQ schedulers. Now it's just MQ, so get rid of the union. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- block/blk-ioc.c | 8 ++++---- block/blk-mq-sched.c | 33 ++++++++++++++++----------------- block/blk-mq-sched.h | 20 ++++++++++---------- block/blk-mq.c | 12 ++++++------ block/elevator.c | 26 +++++++++++++------------- block/kyber-iosched.c | 2 +- block/mq-deadline.c | 2 +- include/linux/elevator.h | 4 +--- 9 files changed, 53 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 44c7e567aa25..c7636cbefc85 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5724,7 +5724,7 @@ static struct elv_fs_entry bfq_attrs[] = { }; static struct elevator_type iosched_bfq_mq = { - .ops.mq = { + .ops = { .limit_depth = bfq_limit_depth, .prepare_request = bfq_prepare_request, .requeue_request = bfq_finish_requeue_request, diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 391128456aec..007aac6e6a4b 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -48,8 +48,8 @@ static void ioc_exit_icq(struct io_cq *icq) if (icq->flags & ICQ_EXITED) return; - if (et->ops.mq.exit_icq) - et->ops.mq.exit_icq(icq); + if (et->ops.exit_icq) + et->ops.exit_icq(icq); icq->flags |= ICQ_EXITED; } @@ -396,8 +396,8 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); - if (et->ops.mq.init_icq) - et->ops.mq.init_icq(icq); + if (et->ops.init_icq) + et->ops.init_icq(icq); } else { kmem_cache_free(et->icq_cache, icq); icq = ioc_lookup_icq(ioc, q); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 29bfe8017a2d..0feefd6c6aaa 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -85,14 +85,13 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) do { struct request *rq; - if (e->type->ops.mq.has_work && - !e->type->ops.mq.has_work(hctx)) + if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) break; if (!blk_mq_get_dispatch_budget(hctx)) break; - rq = e->type->ops.mq.dispatch_request(hctx); + rq = e->type->ops.dispatch_request(hctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); break; @@ -163,7 +162,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; - const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; + const bool has_sched_dispatch = e && e->type->ops.dispatch_request; LIST_HEAD(rq_list); /* RCU or SRCU read lock is needed before checking quiesced flag */ @@ -314,9 +313,9 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); bool ret = false; - if (e && e->type->ops.mq.bio_merge) { + if (e && e->type->ops.bio_merge) { blk_mq_put_ctx(ctx); - return e->type->ops.mq.bio_merge(hctx, bio); + return e->type->ops.bio_merge(hctx, bio); } if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && @@ -380,11 +379,11 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) goto run; - if (e && e->type->ops.mq.insert_requests) { + if (e && e->type->ops.insert_requests) { LIST_HEAD(list); list_add(&rq->queuelist, &list); - e->type->ops.mq.insert_requests(hctx, &list, at_head); + e->type->ops.insert_requests(hctx, &list, at_head); } else { spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq, at_head); @@ -403,8 +402,8 @@ void blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); struct elevator_queue *e = hctx->queue->elevator; - if (e && e->type->ops.mq.insert_requests) - e->type->ops.mq.insert_requests(hctx, list, false); + if (e && e->type->ops.insert_requests) + e->type->ops.insert_requests(hctx, list, false); else { /* * try to issue requests directly if the hw queue isn't @@ -489,15 +488,15 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) goto err; } - ret = e->ops.mq.init_sched(q, e); + ret = e->ops.init_sched(q, e); if (ret) goto err; blk_mq_debugfs_register_sched(q); queue_for_each_hw_ctx(q, hctx, i) { - if (e->ops.mq.init_hctx) { - ret = e->ops.mq.init_hctx(hctx, i); + if (e->ops.init_hctx) { + ret = e->ops.init_hctx(hctx, i); if (ret) { eq = q->elevator; blk_mq_exit_sched(q, eq); @@ -523,14 +522,14 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) queue_for_each_hw_ctx(q, hctx, i) { blk_mq_debugfs_unregister_sched_hctx(hctx); - if (e->type->ops.mq.exit_hctx && hctx->sched_data) { - e->type->ops.mq.exit_hctx(hctx, i); + if (e->type->ops.exit_hctx && hctx->sched_data) { + e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } } blk_mq_debugfs_unregister_sched(q); - if (e->type->ops.mq.exit_sched) - e->type->ops.mq.exit_sched(e); + if (e->type->ops.exit_sched) + e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q); q->elevator = NULL; } diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 8a9544203173..947f236b273d 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -43,8 +43,8 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e && e->type->ops.mq.allow_merge) - return e->type->ops.mq.allow_merge(q, rq, bio); + if (e && e->type->ops.allow_merge) + return e->type->ops.allow_merge(q, rq, bio); return true; } @@ -53,8 +53,8 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) { struct elevator_queue *e = rq->q->elevator; - if (e && e->type->ops.mq.completed_request) - e->type->ops.mq.completed_request(rq, now); + if (e && e->type->ops.completed_request) + e->type->ops.completed_request(rq, now); } static inline void blk_mq_sched_started_request(struct request *rq) @@ -62,8 +62,8 @@ static inline void blk_mq_sched_started_request(struct request *rq) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e && e->type->ops.mq.started_request) - e->type->ops.mq.started_request(rq); + if (e && e->type->ops.started_request) + e->type->ops.started_request(rq); } static inline void blk_mq_sched_requeue_request(struct request *rq) @@ -71,16 +71,16 @@ static inline void blk_mq_sched_requeue_request(struct request *rq) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e && e->type->ops.mq.requeue_request) - e->type->ops.mq.requeue_request(rq); + if (e && e->type->ops.requeue_request) + e->type->ops.requeue_request(rq); } static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) { struct elevator_queue *e = hctx->queue->elevator; - if (e && e->type->ops.mq.has_work) - return e->type->ops.mq.has_work(hctx); + if (e && e->type->ops.has_work) + return e->type->ops.has_work(hctx); return false; } diff --git a/block/blk-mq.c b/block/blk-mq.c index a58d2d953876..d106d7a970cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -363,9 +363,9 @@ static struct request *blk_mq_get_request(struct request_queue *q, * dispatch list. Don't include reserved tags in the * limiting, as it isn't useful. */ - if (!op_is_flush(op) && e->type->ops.mq.limit_depth && + if (!op_is_flush(op) && e->type->ops.limit_depth && !(data->flags & BLK_MQ_REQ_RESERVED)) - e->type->ops.mq.limit_depth(op, data); + e->type->ops.limit_depth(op, data); } else { blk_mq_tag_busy(data->hctx); } @@ -383,11 +383,11 @@ static struct request *blk_mq_get_request(struct request_queue *q, rq = blk_mq_rq_ctx_init(data, tag, op); if (!op_is_flush(op)) { rq->elv.icq = NULL; - if (e && e->type->ops.mq.prepare_request) { + if (e && e->type->ops.prepare_request) { if (e->type->icq_cache && rq_ioc(bio)) blk_mq_sched_assign_ioc(rq, bio); - e->type->ops.mq.prepare_request(rq, bio); + e->type->ops.prepare_request(rq, bio); rq->rq_flags |= RQF_ELVPRIV; } } @@ -491,8 +491,8 @@ void blk_mq_free_request(struct request *rq) struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); if (rq->rq_flags & RQF_ELVPRIV) { - if (e && e->type->ops.mq.finish_request) - e->type->ops.mq.finish_request(rq); + if (e && e->type->ops.finish_request) + e->type->ops.finish_request(rq); if (rq->elv.icq) { put_io_context(rq->elv.icq->ioc); rq->elv.icq = NULL; diff --git a/block/elevator.c b/block/elevator.c index 334097c54b08..19351ffa56b1 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -61,8 +61,8 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e->type->ops.mq.allow_merge) - return e->type->ops.mq.allow_merge(q, rq, bio); + if (e->type->ops.allow_merge) + return e->type->ops.allow_merge(q, rq, bio); return 1; } @@ -180,7 +180,7 @@ static void elevator_release(struct kobject *kobj) void elevator_exit(struct request_queue *q, struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); - if (e->type->ops.mq.exit_sched) + if (e->type->ops.exit_sched) blk_mq_exit_sched(q, e); mutex_unlock(&e->sysfs_lock); @@ -329,8 +329,8 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req, return ELEVATOR_BACK_MERGE; } - if (e->type->ops.mq.request_merge) - return e->type->ops.mq.request_merge(q, req, bio); + if (e->type->ops.request_merge) + return e->type->ops.request_merge(q, req, bio); return ELEVATOR_NO_MERGE; } @@ -381,8 +381,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.mq.request_merged) - e->type->ops.mq.request_merged(q, rq, type); + if (e->type->ops.request_merged) + e->type->ops.request_merged(q, rq, type); if (type == ELEVATOR_BACK_MERGE) elv_rqhash_reposition(q, rq); @@ -396,8 +396,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, struct elevator_queue *e = q->elevator; bool next_sorted = false; - if (e->type->ops.mq.requests_merged) - e->type->ops.mq.requests_merged(q, rq, next); + if (e->type->ops.requests_merged) + e->type->ops.requests_merged(q, rq, next); elv_rqhash_reposition(q, rq); @@ -413,8 +413,8 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.mq.next_request) - return e->type->ops.mq.next_request(q, rq); + if (e->type->ops.next_request) + return e->type->ops.next_request(q, rq); return NULL; } @@ -423,8 +423,8 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.mq.former_request) - return e->type->ops.mq.former_request(q, rq); + if (e->type->ops.former_request) + return e->type->ops.former_request(q, rq); return NULL; } diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 728757a34fa0..1fd83a91e749 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -1017,7 +1017,7 @@ static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { #endif static struct elevator_type kyber_sched = { - .ops.mq = { + .ops = { .init_sched = kyber_init_sched, .exit_sched = kyber_exit_sched, .init_hctx = kyber_init_hctx, diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 513edefd10fd..1bd06cefce57 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -761,7 +761,7 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { #endif static struct elevator_type mq_deadline = { - .ops.mq = { + .ops = { .insert_requests = dd_insert_requests, .dispatch_request = dd_dispatch_request, .prepare_request = dd_prepare_request, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 158004f1754d..2e9e2763bf47 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -69,9 +69,7 @@ struct elevator_type struct kmem_cache *icq_cache; /* fields provided by elevator implementation */ - union { - struct elevator_mq_ops mq; - } ops; + struct elevator_mq_ops ops; size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ -- cgit v1.2.3 From 92bc5a24844ada9b010f03c49a493e3edeadaa54 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 13:52:28 -0600 Subject: block: remove __blk_put_request() Now there's no difference between blk_put_request() and __blk_put_request() anymore, get rid of the underscore version and convert the few callers. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 9 --------- block/blk-merge.c | 2 +- drivers/scsi/osd/osd_initiator.c | 4 ++-- drivers/scsi/osst.c | 2 +- drivers/scsi/scsi_error.c | 2 +- drivers/scsi/sg.c | 2 +- drivers/scsi/st.c | 2 +- drivers/target/target_core_pscsi.c | 2 +- include/linux/blkdev.h | 1 - 9 files changed, 8 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 18538a41a532..700dd4587282 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -803,15 +803,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part) } EXPORT_SYMBOL_GPL(part_round_stats); -void __blk_put_request(struct request_queue *q, struct request *req) -{ - if (unlikely(!q)) - return; - - blk_mq_free_request(req); -} -EXPORT_SYMBOL_GPL(__blk_put_request); - void blk_put_request(struct request *req) { blk_mq_free_request(req); diff --git a/block/blk-merge.c b/block/blk-merge.c index c068c30b0c35..3d073305da33 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -866,7 +866,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, free = attempt_merge(q, rq, next); if (free) { - __blk_put_request(q, free); + blk_put_request(free); return 1; } diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index e19fa883376f..60cf7c5eb880 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -506,11 +506,11 @@ static void osd_request_async_done(struct request *req, blk_status_t error) _set_error_resid(or, req, error); if (req->next_rq) { - __blk_put_request(req->q, req->next_rq); + blk_put_request(req->next_rq); req->next_rq = NULL; } - __blk_put_request(req->q, req); + blk_put_request(req); or->request = NULL; or->in.req = NULL; or->out.req = NULL; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 7a1a1edde35d..664c1238a87f 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -341,7 +341,7 @@ static void osst_end_async(struct request *req, blk_status_t status) blk_rq_unmap_user(SRpnt->bio); } - __blk_put_request(req->q, req); + blk_put_request(req); } /* osst_request memory management */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index fff128aa9ec2..dd338a8cd275 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1932,7 +1932,7 @@ maybe_retry: static void eh_lock_door_done(struct request *req, blk_status_t status) { - __blk_put_request(req->q, req); + blk_put_request(req); } /** diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c6ad00703c5b..4e27460ec926 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1390,7 +1390,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status) */ srp->rq = NULL; scsi_req_free_cmd(scsi_req(rq)); - __blk_put_request(rq->q, rq); + blk_put_request(rq); write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 307df2fa39a3..7ff22d3f03e3 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -530,7 +530,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status) complete(SRpnt->waiting); blk_rq_unmap_user(tmp); - __blk_put_request(req->q, req); + blk_put_request(req); } static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 47d76c862014..c062d363dce3 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1094,7 +1094,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status) break; } - __blk_put_request(req->q, req); + blk_put_request(req); kfree(pt); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a9f6db8abcda..c502a7f40e84 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -906,7 +906,6 @@ extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); -extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); extern int blk_lld_busy(struct request_queue *q); -- cgit v1.2.3 From 4316b79e4321d4140164e42f228778e5bc66c84f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:25:07 -0600 Subject: block: kill legacy parts of timeout handling The only user of legacy timing now is BSG, which is invoked from the mq timeout handler. Kill the legacy code, and rename the q->rq_timed_out_fn to q->bsg_job_timeout_fn. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-settings.c | 7 ---- block/blk-timeout.c | 99 ++++---------------------------------------------- block/blk.h | 1 - block/bsg-lib.c | 6 +-- include/linux/blkdev.h | 4 +- 6 files changed, 11 insertions(+), 107 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 700dd4587282..ccfe2a65cc22 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -656,7 +656,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, laptop_mode_timer_fn, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); INIT_WORK(&q->timeout_work, NULL); - INIT_LIST_HEAD(&q->timeout_list); INIT_LIST_HEAD(&q->icq_list); #ifdef CONFIG_BLK_CGROUP INIT_LIST_HEAD(&q->blkg_list); diff --git a/block/blk-settings.c b/block/blk-settings.c index 39c3c301a687..e3f07d94b18d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -32,13 +32,6 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) } EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); -void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) -{ - WARN_ON_ONCE(q->mq_ops); - q->rq_timed_out_fn = fn; -} -EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); - /** * blk_set_default_limits - reset limits to default values * @lim: the queue_limits structure to reset diff --git a/block/blk-timeout.c b/block/blk-timeout.c index f2cfd56e1606..6428d458072a 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -78,70 +78,6 @@ void blk_delete_timer(struct request *req) list_del_init(&req->timeout_list); } -static void blk_rq_timed_out(struct request *req) -{ - struct request_queue *q = req->q; - enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; - - if (q->rq_timed_out_fn) - ret = q->rq_timed_out_fn(req); - switch (ret) { - case BLK_EH_RESET_TIMER: - blk_add_timer(req); - blk_clear_rq_complete(req); - break; - case BLK_EH_DONE: - /* - * LLD handles this for now but in the future - * we can send a request msg to abort the command - * and we can move more of the generic scsi eh code to - * the blk layer. - */ - break; - default: - printk(KERN_ERR "block: bad eh return: %d\n", ret); - break; - } -} - -static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, - unsigned int *next_set) -{ - const unsigned long deadline = blk_rq_deadline(rq); - - if (time_after_eq(jiffies, deadline)) { - list_del_init(&rq->timeout_list); - - /* - * Check if we raced with end io completion - */ - if (!blk_mark_rq_complete(rq)) - blk_rq_timed_out(rq); - } else if (!*next_set || time_after(*next_timeout, deadline)) { - *next_timeout = deadline; - *next_set = 1; - } -} - -void blk_timeout_work(struct work_struct *work) -{ - struct request_queue *q = - container_of(work, struct request_queue, timeout_work); - unsigned long flags, next = 0; - struct request *rq, *tmp; - int next_set = 0; - - spin_lock_irqsave(q->queue_lock, flags); - - list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) - blk_rq_check_expired(rq, &next, &next_set); - - if (next_set) - mod_timer(&q->timeout, round_jiffies_up(next)); - - spin_unlock_irqrestore(q->queue_lock, flags); -} - /** * blk_abort_request -- Request request recovery for the specified command * @req: pointer to the request of interest @@ -153,20 +89,13 @@ void blk_timeout_work(struct work_struct *work) */ void blk_abort_request(struct request *req) { - if (req->q->mq_ops) { - /* - * All we need to ensure is that timeout scan takes place - * immediately and that scan sees the new timeout value. - * No need for fancy synchronizations. - */ - blk_rq_set_deadline(req, jiffies); - kblockd_schedule_work(&req->q->timeout_work); - } else { - if (blk_mark_rq_complete(req)) - return; - blk_delete_timer(req); - blk_rq_timed_out(req); - } + /* + * All we need to ensure is that timeout scan takes place + * immediately and that scan sees the new timeout value. + * No need for fancy synchronizations. + */ + blk_rq_set_deadline(req, jiffies); + kblockd_schedule_work(&req->q->timeout_work); } EXPORT_SYMBOL_GPL(blk_abort_request); @@ -194,13 +123,6 @@ void blk_add_timer(struct request *req) struct request_queue *q = req->q; unsigned long expiry; - if (!q->mq_ops) - lockdep_assert_held(q->queue_lock); - - /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ - if (!q->mq_ops && !q->rq_timed_out_fn) - return; - BUG_ON(!list_empty(&req->timeout_list)); /* @@ -213,13 +135,6 @@ void blk_add_timer(struct request *req) req->rq_flags &= ~RQF_TIMED_OUT; blk_rq_set_deadline(req, jiffies + req->timeout); - /* - * Only the non-mq case needs to add the request to a protected list. - * For the mq case we simply scan the tag map. - */ - if (!q->mq_ops) - list_add_tail(&req->timeout_list, &req->q->timeout_list); - /* * If the timer isn't already pending or this timeout is earlier * than an existing one, modify the timer. Round up to next nearest diff --git a/block/blk.h b/block/blk.h index e2604ae7ddfa..4ae6cacb4548 100644 --- a/block/blk.h +++ b/block/blk.h @@ -224,7 +224,6 @@ static inline bool bio_integrity_endio(struct bio *bio) } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -void blk_timeout_work(struct work_struct *work); unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); void blk_delete_timer(struct request *); diff --git a/block/bsg-lib.c b/block/bsg-lib.c index faf20f4500c9..f38c7bc272c0 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -307,8 +307,8 @@ static enum blk_eh_timer_return bsg_timeout(struct request *rq, bool reserved) enum blk_eh_timer_return ret = BLK_EH_DONE; struct request_queue *q = rq->q; - if (q->rq_timed_out_fn) - ret = q->rq_timed_out_fn(rq); + if (q->bsg_job_timeout_fn) + ret = q->bsg_job_timeout_fn(rq); return ret; } @@ -357,9 +357,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, q->queuedata = dev; q->bsg_job_fn = job_fn; + q->bsg_job_timeout_fn = timeout; blk_queue_flag_set(QUEUE_FLAG_BIDI, q); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); - q->rq_timed_out_fn = timeout; ret = bsg_register_queue(q, dev, name, &bsg_transport_ops); if (ret) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c502a7f40e84..0364fc53f5c8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -441,7 +441,6 @@ struct request_queue { make_request_fn *make_request_fn; poll_q_fn *poll_fn; softirq_done_fn *softirq_done_fn; - rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; /* Called just after a request is allocated */ init_rq_fn *init_rq_fn; @@ -541,7 +540,6 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - struct list_head timeout_list; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -601,6 +599,7 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; + rq_timed_out_fn *bsg_job_timeout_fn; struct bsg_class_device bsg_dev; #endif @@ -1156,7 +1155,6 @@ extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -- cgit v1.2.3 From 1028e4b335665290dc563d5272f3c6b84e7fd66e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 09:47:17 -0600 Subject: bsg: move bsg-lib parts outside of request queue Get rid of the special bsg job fn and timeout handler, move them into a private bsg_set instead. Mostly from Christoph, with fixes for error handling and cleanups. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/bsg-lib.c | 43 +++++++++++++++++++++++++++---------------- include/linux/blkdev.h | 5 ----- include/linux/bsg-lib.h | 5 ++++- 3 files changed, 31 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index f38c7bc272c0..192129856342 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -31,6 +31,12 @@ #define uptr64(val) ((void __user *)(uintptr_t)(val)) +struct bsg_set { + struct blk_mq_tag_set tag_set; + bsg_job_fn *job_fn; + bsg_timeout_fn *timeout_fn; +}; + static int bsg_transport_check_proto(struct sg_io_v4 *hdr) { if (hdr->protocol != BSG_PROTOCOL_SCSI || @@ -239,6 +245,8 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx, struct request_queue *q = hctx->queue; struct device *dev = q->queuedata; struct request *req = bd->rq; + struct bsg_set *bset = + container_of(q->tag_set, struct bsg_set, tag_set); int ret; blk_mq_start_request(req); @@ -249,7 +257,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx, if (!bsg_prepare_job(dev, req)) return BLK_STS_IOERR; - ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req)); + ret = bset->job_fn(blk_mq_rq_to_pdu(req)); if (ret) return BLK_STS_IOERR; @@ -292,25 +300,25 @@ static void bsg_exit_rq(struct blk_mq_tag_set *set, struct request *req, void bsg_remove_queue(struct request_queue *q) { if (q) { - struct blk_mq_tag_set *set = q->tag_set; + struct bsg_set *bset = + container_of(q->tag_set, struct bsg_set, tag_set); bsg_unregister_queue(q); blk_cleanup_queue(q); - blk_mq_free_tag_set(set); - kfree(set); + blk_mq_free_tag_set(&bset->tag_set); + kfree(bset); } } EXPORT_SYMBOL_GPL(bsg_remove_queue); static enum blk_eh_timer_return bsg_timeout(struct request *rq, bool reserved) { - enum blk_eh_timer_return ret = BLK_EH_DONE; - struct request_queue *q = rq->q; - - if (q->bsg_job_timeout_fn) - ret = q->bsg_job_timeout_fn(rq); + struct bsg_set *bset = + container_of(rq->q->tag_set, struct bsg_set, tag_set); - return ret; + if (!bset->timeout_fn) + return BLK_EH_DONE; + return bset->timeout_fn(rq); } static const struct blk_mq_ops bsg_mq_ops = { @@ -330,16 +338,21 @@ static const struct blk_mq_ops bsg_mq_ops = { * @dd_job_size: size of LLD data needed for each job */ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size) + bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size) { + struct bsg_set *bset; struct blk_mq_tag_set *set; struct request_queue *q; int ret = -ENOMEM; - set = kzalloc(sizeof(*set), GFP_KERNEL); - if (!set) + bset = kzalloc(sizeof(*bset), GFP_KERNEL); + if (!bset) return ERR_PTR(-ENOMEM); + bset->job_fn = job_fn; + bset->timeout_fn = timeout; + + set = &bset->tag_set; set->ops = &bsg_mq_ops, set->nr_hw_queues = 1; set->queue_depth = 128; @@ -356,8 +369,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, } q->queuedata = dev; - q->bsg_job_fn = job_fn; - q->bsg_job_timeout_fn = timeout; blk_queue_flag_set(QUEUE_FLAG_BIDI, q); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); @@ -374,7 +385,7 @@ out_cleanup_queue: out_queue: blk_mq_free_tag_set(set); out_tag_set: - kfree(set); + kfree(bset); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(bsg_setup_queue); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0364fc53f5c8..877a3d235c45 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -312,7 +312,6 @@ typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (bsg_job_fn) (struct bsg_job *); typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); typedef void (exit_rq_fn)(struct request_queue *, struct request *); @@ -321,8 +320,6 @@ enum blk_eh_timer_return { BLK_EH_RESET_TIMER, /* reset timer and try again */ }; -typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); - enum blk_queue_state { Queue_down, Queue_up, @@ -598,8 +595,6 @@ struct request_queue { atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) - bsg_job_fn *bsg_job_fn; - rq_timed_out_fn *bsg_job_timeout_fn; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 9c9b134b1fa5..b356e0006731 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -31,6 +31,9 @@ struct device; struct scatterlist; struct request_queue; +typedef int (bsg_job_fn) (struct bsg_job *); +typedef enum blk_eh_timer_return (bsg_timeout_fn)(struct request *); + struct bsg_buffer { unsigned int payload_len; int sg_cnt; @@ -72,7 +75,7 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size); + bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size); void bsg_remove_queue(struct request_queue *q); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit v1.2.3 From db6d995235606191fa9db0c717e9d843200b71ea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 2 Nov 2018 08:46:15 -0600 Subject: block: remove request_list code It's now dead code, nobody uses it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 47 ---------------------- block/blk-core.c | 75 ----------------------------------- block/blk-mq.c | 4 -- block/blk.h | 3 -- include/linux/blk-cgroup.h | 97 ---------------------------------------------- include/linux/blkdev.h | 34 ---------------- 6 files changed, 260 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 41b2470042d1..6c65791bc3fe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -76,9 +76,6 @@ static void blkg_free(struct blkcg_gq *blkg) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); - if (blkg->blkcg != &blkcg_root) - blk_exit_rl(blkg->q, &blkg->rl); - blkg_rwstat_exit(&blkg->stat_ios); blkg_rwstat_exit(&blkg->stat_bytes); kfree(blkg); @@ -112,13 +109,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->blkcg = blkcg; atomic_set(&blkg->refcnt, 1); - /* root blkg uses @q->root_rl, init rl only for !root blkgs */ - if (blkcg != &blkcg_root) { - if (blk_init_rl(&blkg->rl, q, gfp_mask)) - goto err_free; - blkg->rl.blkg = blkg; - } - for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkg_policy_data *pd; @@ -377,7 +367,6 @@ static void blkg_destroy_all(struct request_queue *q) } q->root_blkg = NULL; - q->root_rl.blkg = NULL; } /* @@ -403,41 +392,6 @@ void __blkg_release_rcu(struct rcu_head *rcu_head) } EXPORT_SYMBOL_GPL(__blkg_release_rcu); -/* - * The next function used by blk_queue_for_each_rl(). It's a bit tricky - * because the root blkg uses @q->root_rl instead of its own rl. - */ -struct request_list *__blk_queue_next_rl(struct request_list *rl, - struct request_queue *q) -{ - struct list_head *ent; - struct blkcg_gq *blkg; - - /* - * Determine the current blkg list_head. The first entry is - * root_rl which is off @q->blkg_list and mapped to the head. - */ - if (rl == &q->root_rl) { - ent = &q->blkg_list; - /* There are no more block groups, hence no request lists */ - if (list_empty(ent)) - return NULL; - } else { - blkg = container_of(rl, struct blkcg_gq, rl); - ent = &blkg->q_node; - } - - /* walk to the next list_head, skip root blkcg */ - ent = ent->next; - if (ent == &q->root_blkg->q_node) - ent = ent->next; - if (ent == &q->blkg_list) - return NULL; - - blkg = container_of(ent, struct blkcg_gq, q_node); - return &blkg->rl; -} - static int blkcg_reset_stats(struct cgroup_subsys_state *css, struct cftype *cftype, u64 val) { @@ -1230,7 +1184,6 @@ int blkcg_init_queue(struct request_queue *q) if (IS_ERR(blkg)) goto err_unlock; q->root_blkg = blkg; - q->root_rl.blkg = blkg; spin_unlock_irq(q->queue_lock); rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index ccfe2a65cc22..45f5c5898fd7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -450,81 +450,6 @@ void blk_cleanup_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_cleanup_queue); -/* Allocate memory local to the request queue */ -static void *alloc_request_simple(gfp_t gfp_mask, void *data) -{ - struct request_queue *q = data; - - return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node); -} - -static void free_request_simple(void *element, void *data) -{ - kmem_cache_free(request_cachep, element); -} - -static void *alloc_request_size(gfp_t gfp_mask, void *data) -{ - struct request_queue *q = data; - struct request *rq; - - rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask, - q->node); - if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) { - kfree(rq); - rq = NULL; - } - return rq; -} - -static void free_request_size(void *element, void *data) -{ - struct request_queue *q = data; - - if (q->exit_rq_fn) - q->exit_rq_fn(q, element); - kfree(element); -} - -int blk_init_rl(struct request_list *rl, struct request_queue *q, - gfp_t gfp_mask) -{ - if (unlikely(rl->rq_pool) || q->mq_ops) - return 0; - - rl->q = q; - rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; - rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; - init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); - init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); - - if (q->cmd_size) { - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, - alloc_request_size, free_request_size, - q, gfp_mask, q->node); - } else { - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, - alloc_request_simple, free_request_simple, - q, gfp_mask, q->node); - } - if (!rl->rq_pool) - return -ENOMEM; - - if (rl != &q->root_rl) - WARN_ON_ONCE(!blk_get_queue(q)); - - return 0; -} - -void blk_exit_rl(struct request_queue *q, struct request_list *rl) -{ - if (rl->rq_pool) { - mempool_destroy(rl->rq_pool); - if (rl != &q->root_rl) - blk_put_queue(q); - } -} - struct request_queue *blk_alloc_queue(gfp_t gfp_mask) { return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL); diff --git a/block/blk-mq.c b/block/blk-mq.c index d106d7a970cc..2600cba56408 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -326,10 +326,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->end_io_data = NULL; rq->next_rq = NULL; -#ifdef CONFIG_BLK_CGROUP - rq->rl = NULL; -#endif - data->ctx->rq_dispatched[op_is_sync(op)]++; refcount_set(&rq->ref, 1); return rq; diff --git a/block/blk.h b/block/blk.h index 4ae6cacb4548..e925cf4fe4de 100644 --- a/block/blk.h +++ b/block/blk.h @@ -120,9 +120,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); -int blk_init_rl(struct request_list *rl, struct request_queue *q, - gfp_t gfp_mask); -void blk_exit_rl(struct request_queue *q, struct request_list *rl); void blk_exit_queue(struct request_queue *q); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6d766a19f2bb..1b299e025e83 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -122,9 +122,6 @@ struct blkcg_gq { /* all non-root blkcg_gq's are guaranteed to have access to parent */ struct blkcg_gq *parent; - /* request allocation list for this blkcg-q pair */ - struct request_list rl; - /* reference count */ atomic_t refcnt; @@ -515,94 +512,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -/** - * blk_get_rl - get request_list to use - * @q: request_queue of interest - * @bio: bio which will be attached to the allocated request (may be %NULL) - * - * The caller wants to allocate a request from @q to use for @bio. Find - * the request_list to use and obtain a reference on it. Should be called - * under queue_lock. This function is guaranteed to return non-%NULL - * request_list. - */ -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) -{ - struct blkcg *blkcg; - struct blkcg_gq *blkg; - - rcu_read_lock(); - - blkcg = bio_blkcg(bio); - - /* bypass blkg lookup and use @q->root_rl directly for root */ - if (blkcg == &blkcg_root) - goto root_rl; - - /* - * Try to use blkg->rl. blkg lookup may fail under memory pressure - * or if either the blkcg or queue is going away. Fall back to - * root_rl in such cases. - */ - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) - goto root_rl; - - blkg_get(blkg); - rcu_read_unlock(); - return &blkg->rl; -root_rl: - rcu_read_unlock(); - return &q->root_rl; -} - -/** - * blk_put_rl - put request_list - * @rl: request_list to put - * - * Put the reference acquired by blk_get_rl(). Should be called under - * queue_lock. - */ -static inline void blk_put_rl(struct request_list *rl) -{ - if (rl->blkg->blkcg != &blkcg_root) - blkg_put(rl->blkg); -} - -/** - * blk_rq_set_rl - associate a request with a request_list - * @rq: request of interest - * @rl: target request_list - * - * Associate @rq with @rl so that accounting and freeing can know the - * request_list @rq came from. - */ -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) -{ - rq->rl = rl; -} - -/** - * blk_rq_rl - return the request_list a request came from - * @rq: request of interest - * - * Return the request_list @rq is allocated from. - */ -static inline struct request_list *blk_rq_rl(struct request *rq) -{ - return rq->rl; -} - -struct request_list *__blk_queue_next_rl(struct request_list *rl, - struct request_queue *q); -/** - * blk_queue_for_each_rl - iterate through all request_lists of a request_queue - * - * Should be used under queue_lock. - */ -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) - static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) { int ret; @@ -939,12 +848,6 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) { return &q->root_rl; } -static inline void blk_put_rl(struct request_list *rl) { } -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } -static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } - static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 877a3d235c45..e0c661a95c39 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -58,22 +58,6 @@ struct blk_stat_callback; typedef void (rq_end_io_fn)(struct request *, blk_status_t); -struct request_list { - struct request_queue *q; /* the queue this rl belongs to */ -#ifdef CONFIG_BLK_CGROUP - struct blkcg_gq *blkg; /* blkg this request pool belongs to */ -#endif - /* - * count[], starved[], and wait[] are indexed by - * BLK_RW_SYNC/BLK_RW_ASYNC - */ - int count[2]; - int starved[2]; - mempool_t *rq_pool; - wait_queue_head_t wait[2]; - unsigned int flags; -}; - /* * request flags */ typedef __u32 __bitwise req_flags_t; @@ -259,10 +243,6 @@ struct request { /* for bidi */ struct request *next_rq; - -#ifdef CONFIG_BLK_CGROUP - struct request_list *rl; /* rl this rq is alloced from */ -#endif }; static inline bool blk_op_is_scsi(unsigned int op) @@ -312,8 +292,6 @@ typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); -typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ @@ -427,22 +405,10 @@ struct request_queue { struct blk_queue_stats *stats; struct rq_qos *rq_qos; - /* - * If blkcg is not used, @q->root_rl serves all requests. If blkcg - * is used, root blkg allocates from @q->root_rl and all other - * blkgs from their own blkg->rl. Which one to use should be - * determined using bio_request_list(). - */ - struct request_list root_rl; - make_request_fn *make_request_fn; poll_q_fn *poll_fn; softirq_done_fn *softirq_done_fn; dma_drain_needed_fn *dma_drain_needed; - /* Called just after a request is allocated */ - init_rq_fn *init_rq_fn; - /* Called just before a request is freed */ - exit_rq_fn *exit_rq_fn; const struct blk_mq_ops *mq_ops; -- cgit v1.2.3 From 7d692330e7cd581ccfee982334bf06b236cb999a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 10:48:12 -0600 Subject: block: get rid of blk_queued_rq() No point in hiding what this does, just open code it in the one spot where we are still using it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- include/linux/blkdev.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 2600cba56408..b49f5bd86f42 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -692,7 +692,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) /* this request will be re-inserted to io scheduler queue */ blk_mq_sched_requeue_request(rq); - BUG_ON(blk_queued_rq(rq)); + BUG_ON(!list_empty(&rq->queuelist)); blk_mq_add_to_requeue_list(rq, true, kick_requeue_list); } EXPORT_SYMBOL(blk_mq_requeue_request); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e0c661a95c39..c675e2b5af62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -673,8 +673,6 @@ static inline bool blk_account_rq(struct request *rq) #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) -/* rq->queuelist of dequeued request must be list_empty() */ -#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -- cgit v1.2.3 From c7bb9ad1744ea14e61e5fff99ee5282709b0c9d9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 31 Oct 2018 09:43:30 -0600 Subject: block: get rid of q->softirq_done_fn() With the legacy path gone, all we do is funnel it through the mq_ops->complete() operation. Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 17 ++++++++--------- block/blk-settings.c | 6 ------ block/blk-softirq.c | 4 ++-- include/linux/blk-mq.h | 3 ++- include/linux/blkdev.h | 3 --- 5 files changed, 12 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index b49f5bd86f42..5e7982918c54 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -546,13 +546,15 @@ EXPORT_SYMBOL(blk_mq_end_request); static void __blk_mq_complete_request_remote(void *data) { struct request *rq = data; + struct request_queue *q = rq->q; - rq->q->softirq_done_fn(rq); + q->mq_ops->complete(rq); } static void __blk_mq_complete_request(struct request *rq) { struct blk_mq_ctx *ctx = rq->mq_ctx; + struct request_queue *q = rq->q; bool shared = false; int cpu; @@ -568,18 +570,18 @@ static void __blk_mq_complete_request(struct request *rq) * So complete IO reqeust in softirq context in case of single queue * for not degrading IO performance by irqsoff latency. */ - if (rq->q->nr_hw_queues == 1) { + if (q->nr_hw_queues == 1) { __blk_complete_request(rq); return; } - if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { - rq->q->softirq_done_fn(rq); + if (!test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) { + q->mq_ops->complete(rq); return; } cpu = get_cpu(); - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) shared = cpus_share_cache(cpu, ctx->cpu); if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { @@ -588,7 +590,7 @@ static void __blk_mq_complete_request(struct request *rq) rq->csd.flags = 0; smp_call_function_single_async(ctx->cpu, &rq->csd); } else { - rq->q->softirq_done_fn(rq); + q->mq_ops->complete(rq); } put_cpu(); } @@ -2701,9 +2703,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, */ q->poll_nsec = -1; - if (set->ops->complete) - blk_queue_softirq_done(q, set->ops->complete); - blk_mq_init_cpu_queues(q, set->nr_hw_queues); blk_mq_add_queue_tag_set(set, q); blk_mq_map_swqueue(q); diff --git a/block/blk-settings.c b/block/blk-settings.c index e3f07d94b18d..cca83590a1dc 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -20,12 +20,6 @@ EXPORT_SYMBOL(blk_max_low_pfn); unsigned long blk_max_pfn; -void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) -{ - q->softirq_done_fn = fn; -} -EXPORT_SYMBOL(blk_queue_softirq_done); - void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) { q->rq_timeout = timeout; diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 8ca0f6caf174..727d64436ec4 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -34,7 +34,7 @@ static __latent_entropy void blk_done_softirq(struct softirq_action *h) rq = list_entry(local_list.next, struct request, ipi_list); list_del_init(&rq->ipi_list); - rq->q->softirq_done_fn(rq); + rq->q->mq_ops->complete(rq); } } @@ -102,7 +102,7 @@ void __blk_complete_request(struct request *req) unsigned long flags; bool shared = false; - BUG_ON(!q->softirq_done_fn); + BUG_ON(!q->mq_ops->complete); local_irq_save(flags); cpu = smp_processor_id(); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5c8418ebbfd6..9dd574e5436a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -115,6 +115,7 @@ typedef void (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); typedef bool (busy_fn)(struct request_queue *); +typedef void (complete_fn)(struct request *); struct blk_mq_ops { @@ -142,7 +143,7 @@ struct blk_mq_ops { */ poll_fn *poll; - softirq_done_fn *complete; + complete_fn *complete; /* * Called when the block layer side of a hardware queue has been diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c675e2b5af62..d4104844d6bb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -290,7 +290,6 @@ typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; -typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); enum blk_eh_timer_return { @@ -407,7 +406,6 @@ struct request_queue { make_request_fn *make_request_fn; poll_q_fn *poll_fn; - softirq_done_fn *softirq_done_fn; dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; @@ -1113,7 +1111,6 @@ extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); -extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -- cgit v1.2.3 From 9cf2bab6307659b940da65d16dcc8f82c69f3a97 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 31 Oct 2018 17:01:22 -0600 Subject: block: kill request ->cpu member This was used for completion placement for the legacy path, but for mq we have rq->mq_ctx->cpu for that. Add a helper to get the request CPU assignment, as the mq_ctx type is private to blk-mq. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- block/blk-merge.c | 2 -- block/blk-mq.c | 7 ++++++- block/blk-softirq.c | 2 +- drivers/scsi/bnx2i/bnx2i_hwi.c | 8 +------- drivers/scsi/csiostor/csio_scsi.c | 8 +------- drivers/scsi/qla2xxx/qla_os.c | 2 +- include/linux/blk-mq.h | 2 ++ include/linux/blkdev.h | 2 -- 9 files changed, 12 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index a14dab57ff8b..3daab9df24e0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -145,7 +145,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->timeout_list); - rq->cpu = -1; rq->q = q; rq->__sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); @@ -1770,7 +1769,6 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); */ static void __blk_rq_prep_clone(struct request *dst, struct request *src) { - dst->cpu = src->cpu; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); if (src->rq_flags & RQF_SPECIAL_PAYLOAD) { diff --git a/block/blk-merge.c b/block/blk-merge.c index a399b2fa8bc8..91b2af332a84 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -806,8 +806,6 @@ static struct request *attempt_merge(struct request_queue *q, blk_account_io_merge(next); req->ioprio = ioprio_best(req->ioprio, next->ioprio); - if (blk_rq_cpu_valid(next)) - req->cpu = next->cpu; /* * ownership of bio passed from next to req, return 'next' for diff --git a/block/blk-mq.c b/block/blk-mq.c index 5e7982918c54..67a2bafd4b29 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -297,7 +297,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->q = data->q; rq->mq_ctx = data->ctx; rq->rq_flags = rq_flags; - rq->cpu = -1; rq->cmd_flags = op; if (data->flags & BLK_MQ_REQ_PREEMPT) rq->rq_flags |= RQF_PREEMPT; @@ -3282,6 +3281,12 @@ static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) return __blk_mq_poll(hctx, rq); } +unsigned int blk_mq_rq_cpu(struct request *rq) +{ + return rq->mq_ctx->cpu; +} +EXPORT_SYMBOL(blk_mq_rq_cpu); + static int __init blk_mq_init(void) { cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 727d64436ec4..1534066e306e 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -98,7 +98,7 @@ static int blk_softirq_cpu_dead(unsigned int cpu) void __blk_complete_request(struct request *req) { struct request_queue *q = req->q; - int cpu, ccpu = q->mq_ops ? req->mq_ctx->cpu : req->cpu; + int cpu, ccpu = req->mq_ctx->cpu; unsigned long flags; bool shared = false; diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index e9e669a6c2bc..6bad2689edd4 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1906,7 +1906,6 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session, struct iscsi_task *task; struct scsi_cmnd *sc; int rc = 0; - int cpu; spin_lock(&session->back_lock); task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data, @@ -1917,14 +1916,9 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session, } sc = task->sc; - if (!blk_rq_cpu_valid(sc->request)) - cpu = smp_processor_id(); - else - cpu = sc->request->cpu; - spin_unlock(&session->back_lock); - p = &per_cpu(bnx2i_percpu, cpu); + p = &per_cpu(bnx2i_percpu, blk_mq_rq_cpu(sc->request)); spin_lock(&p->p_work_lock); if (unlikely(!p->iothread)) { rc = -EINVAL; diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 8c15b7acb4b7..a95debbea0e4 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1780,16 +1780,10 @@ csio_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmnd) int nsge = 0; int rv = SCSI_MLQUEUE_HOST_BUSY, nr; int retval; - int cpu; struct csio_scsi_qset *sqset; struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); - if (!blk_rq_cpu_valid(cmnd->request)) - cpu = smp_processor_id(); - else - cpu = cmnd->request->cpu; - - sqset = &hw->sqset[ln->portid][cpu]; + sqset = &hw->sqset[ln->portid][blk_mq_rq_cpu(cmnd->request)]; nr = fc_remote_port_chkready(rport); if (nr) { diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4ea9f2b4e04f..29dfd1bd164d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1460,7 +1460,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type, goto eh_reset_failed; } err = 2; - if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1) + if (do_reset(fcport, cmd->device->lun, blk_mq_rq_cpu(cmd->request) + 1) != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x800c, "do_reset failed for cmd=%p.\n", cmd); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9dd574e5436a..d83a26fb37e5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -300,6 +300,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); +unsigned int blk_mq_rq_cpu(struct request *rq); + /** * blk_mq_mark_complete() - Set request state to complete * @rq: request to set to complete state diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4104844d6bb..c8fa4d3d7fee 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -130,7 +130,6 @@ struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; - int cpu; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; @@ -669,7 +668,6 @@ static inline bool blk_account_rq(struct request *rq) return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); } -#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -- cgit v1.2.3 From a8908939af569ce2419f43fd56eeaf003bc3d85d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Oct 2018 14:23:06 -0600 Subject: blk-mq: kill q->mq_map It's just a pointer to set->mq_map, use that instead. Move the assignment a bit earlier, so we always know it's valid. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 ++++--------- block/blk-mq.h | 4 +++- include/linux/blkdev.h | 2 -- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 67a2bafd4b29..766facfa1f08 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2322,7 +2322,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) * If the cpu isn't present, the cpu is mapped to first hctx. */ for_each_possible_cpu(i) { - hctx_idx = q->mq_map[i]; + hctx_idx = set->mq_map[i]; /* unmapped hw queue can be remapped after CPU topo changed */ if (!set->tags[hctx_idx] && !__blk_mq_alloc_rq_map(set, hctx_idx)) { @@ -2332,7 +2332,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) * case, remap the current ctx to hctx[0] which * is guaranteed to always have tags allocated */ - q->mq_map[i] = 0; + set->mq_map[i] = 0; } ctx = per_cpu_ptr(q->queue_ctx, i); @@ -2430,8 +2430,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, struct request_queue *q) { - q->tag_set = set; - mutex_lock(&set->tag_list_lock); /* @@ -2468,8 +2466,6 @@ void blk_mq_release(struct request_queue *q) kobject_put(&hctx->kobj); } - q->mq_map = NULL; - kfree(q->queue_hw_ctx); /* @@ -2589,7 +2585,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, int node; struct blk_mq_hw_ctx *hctx; - node = blk_mq_hw_queue_to_node(q->mq_map, i); + node = blk_mq_hw_queue_to_node(set->mq_map, i); /* * If the hw queue has been mapped to another numa node, * we need to realloc the hctx. If allocation fails, fallback @@ -2666,8 +2662,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_hw_ctx) goto err_percpu; - q->mq_map = set->mq_map; - blk_mq_realloc_hw_ctxs(set, q); if (!q->nr_hw_queues) goto err_hctxs; @@ -2676,6 +2670,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); q->nr_queues = nr_cpu_ids; + q->tag_set = set; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; diff --git a/block/blk-mq.h b/block/blk-mq.h index 9497b47e2526..9536be06d022 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -75,7 +75,9 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, int cpu) { - return q->queue_hw_ctx[q->mq_map[cpu]]; + struct blk_mq_tag_set *set = q->tag_set; + + return q->queue_hw_ctx[set->mq_map[cpu]]; } /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8fa4d3d7fee..2ae7465d68ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -409,8 +409,6 @@ struct request_queue { const struct blk_mq_ops *mq_ops; - unsigned int *mq_map; - /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; unsigned int nr_queues; -- cgit v1.2.3 From ed76e329d74a4b15ac0f5fd3adbd52ec0178a134 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 13:06:14 -0600 Subject: blk-mq: abstract out queue map This is in preparation for allowing multiple sets of maps per queue, if so desired. Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq-cpumap.c | 10 +++++----- block/blk-mq-pci.c | 10 +++++----- block/blk-mq-rdma.c | 4 ++-- block/blk-mq-virtio.c | 8 ++++---- block/blk-mq.c | 34 ++++++++++++++++++---------------- block/blk-mq.h | 8 ++++---- drivers/block/virtio_blk.c | 2 +- drivers/nvme/host/pci.c | 2 +- drivers/scsi/qla2xxx/qla_os.c | 5 +++-- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/smartpqi/smartpqi_init.c | 3 ++- drivers/scsi/virtio_scsi.c | 3 ++- include/linux/blk-mq-pci.h | 4 ++-- include/linux/blk-mq-virtio.h | 4 ++-- include/linux/blk-mq.h | 15 ++++++++++++--- 15 files changed, 64 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 3eb169f15842..6e6686c55984 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -30,10 +30,10 @@ static int get_first_sibling(unsigned int cpu) return cpu; } -int blk_mq_map_queues(struct blk_mq_tag_set *set) +int blk_mq_map_queues(struct blk_mq_queue_map *qmap) { - unsigned int *map = set->mq_map; - unsigned int nr_queues = set->nr_hw_queues; + unsigned int *map = qmap->mq_map; + unsigned int nr_queues = qmap->nr_queues; unsigned int cpu, first_sibling; for_each_possible_cpu(cpu) { @@ -62,12 +62,12 @@ EXPORT_SYMBOL_GPL(blk_mq_map_queues); * We have no quick way of doing reverse lookups. This is only used at * queue init time, so runtime isn't important. */ -int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index) +int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index) { int i; for_each_possible_cpu(i) { - if (index == mq_map[i]) + if (index == qmap->mq_map[i]) return local_memory_node(cpu_to_node(i)); } diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index db644ec624f5..40333d60a850 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -31,26 +31,26 @@ * that maps a queue to the CPUs that have irq affinity for the corresponding * vector. */ -int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev, +int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, int offset) { const struct cpumask *mask; unsigned int queue, cpu; - for (queue = 0; queue < set->nr_hw_queues; queue++) { + for (queue = 0; queue < qmap->nr_queues; queue++) { mask = pci_irq_get_affinity(pdev, queue + offset); if (!mask) goto fallback; for_each_cpu(cpu, mask) - set->mq_map[cpu] = queue; + qmap->mq_map[cpu] = queue; } return 0; fallback: - WARN_ON_ONCE(set->nr_hw_queues > 1); - blk_mq_clear_mq_map(set); + WARN_ON_ONCE(qmap->nr_queues > 1); + blk_mq_clear_mq_map(qmap); return 0; } EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues); diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c index 996167f1de18..a71576aff3a5 100644 --- a/block/blk-mq-rdma.c +++ b/block/blk-mq-rdma.c @@ -41,12 +41,12 @@ int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, goto fallback; for_each_cpu(cpu, mask) - set->mq_map[cpu] = queue; + set->map[0].mq_map[cpu] = queue; } return 0; fallback: - return blk_mq_map_queues(set); + return blk_mq_map_queues(&set->map[0]); } EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues); diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c index c3afbca11299..661fbfef480f 100644 --- a/block/blk-mq-virtio.c +++ b/block/blk-mq-virtio.c @@ -29,7 +29,7 @@ * that maps a queue to the CPUs that have irq affinity for the corresponding * vector. */ -int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, +int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, struct virtio_device *vdev, int first_vec) { const struct cpumask *mask; @@ -38,17 +38,17 @@ int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, if (!vdev->config->get_vq_affinity) goto fallback; - for (queue = 0; queue < set->nr_hw_queues; queue++) { + for (queue = 0; queue < qmap->nr_queues; queue++) { mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); if (!mask) goto fallback; for_each_cpu(cpu, mask) - set->mq_map[cpu] = queue; + qmap->mq_map[cpu] = queue; } return 0; fallback: - return blk_mq_map_queues(set); + return blk_mq_map_queues(qmap); } EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues); diff --git a/block/blk-mq.c b/block/blk-mq.c index 766facfa1f08..fac88d16988b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1975,7 +1975,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags; int node; - node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx); + node = blk_mq_hw_queue_to_node(&set->map[0], hctx_idx); if (node == NUMA_NO_NODE) node = set->numa_node; @@ -2031,7 +2031,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, size_t rq_size, left; int node; - node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx); + node = blk_mq_hw_queue_to_node(&set->map[0], hctx_idx); if (node == NUMA_NO_NODE) node = set->numa_node; @@ -2322,7 +2322,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) * If the cpu isn't present, the cpu is mapped to first hctx. */ for_each_possible_cpu(i) { - hctx_idx = set->mq_map[i]; + hctx_idx = set->map[0].mq_map[i]; /* unmapped hw queue can be remapped after CPU topo changed */ if (!set->tags[hctx_idx] && !__blk_mq_alloc_rq_map(set, hctx_idx)) { @@ -2332,7 +2332,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) * case, remap the current ctx to hctx[0] which * is guaranteed to always have tags allocated */ - set->mq_map[i] = 0; + set->map[0].mq_map[i] = 0; } ctx = per_cpu_ptr(q->queue_ctx, i); @@ -2585,7 +2585,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, int node; struct blk_mq_hw_ctx *hctx; - node = blk_mq_hw_queue_to_node(set->mq_map, i); + node = blk_mq_hw_queue_to_node(&set->map[0], i); /* * If the hw queue has been mapped to another numa node, * we need to realloc the hctx. If allocation fails, fallback @@ -2791,18 +2791,18 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) * for (queue = 0; queue < set->nr_hw_queues; queue++) { * mask = get_cpu_mask(queue) * for_each_cpu(cpu, mask) - * set->mq_map[cpu] = queue; + * set->map.mq_map[cpu] = queue; * } * * When we need to remap, the table has to be cleared for * killing stale mapping since one CPU may not be mapped * to any hw queue. */ - blk_mq_clear_mq_map(set); + blk_mq_clear_mq_map(&set->map[0]); return set->ops->map_queues(set); } else - return blk_mq_map_queues(set); + return blk_mq_map_queues(&set->map[0]); } /* @@ -2857,10 +2857,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) return -ENOMEM; ret = -ENOMEM; - set->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*set->mq_map), - GFP_KERNEL, set->numa_node); - if (!set->mq_map) + set->map[0].mq_map = kcalloc_node(nr_cpu_ids, + sizeof(*set->map[0].mq_map), + GFP_KERNEL, set->numa_node); + if (!set->map[0].mq_map) goto out_free_tags; + set->map[0].nr_queues = set->nr_hw_queues; ret = blk_mq_update_queue_map(set); if (ret) @@ -2876,8 +2878,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) return 0; out_free_mq_map: - kfree(set->mq_map); - set->mq_map = NULL; + kfree(set->map[0].mq_map); + set->map[0].mq_map = NULL; out_free_tags: kfree(set->tags); set->tags = NULL; @@ -2892,8 +2894,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) for (i = 0; i < nr_cpu_ids; i++) blk_mq_free_map_and_requests(set, i); - kfree(set->mq_map); - set->mq_map = NULL; + kfree(set->map[0].mq_map); + set->map[0].mq_map = NULL; kfree(set->tags); set->tags = NULL; @@ -3054,7 +3056,7 @@ fallback: pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n", nr_hw_queues, prev_nr_hw_queues); set->nr_hw_queues = prev_nr_hw_queues; - blk_mq_map_queues(set); + blk_mq_map_queues(&set->map[0]); goto fallback; } blk_mq_map_swqueue(q); diff --git a/block/blk-mq.h b/block/blk-mq.h index 9536be06d022..889f0069dd80 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -70,14 +70,14 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, /* * CPU -> queue mappings */ -extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); +extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, int cpu) { struct blk_mq_tag_set *set = q->tag_set; - return q->queue_hw_ctx[set->mq_map[cpu]]; + return q->queue_hw_ctx[set->map[0].mq_map[cpu]]; } /* @@ -206,12 +206,12 @@ static inline void blk_mq_put_driver_tag(struct request *rq) __blk_mq_put_driver_tag(hctx, rq); } -static inline void blk_mq_clear_mq_map(struct blk_mq_tag_set *set) +static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) { int cpu; for_each_possible_cpu(cpu) - set->mq_map[cpu] = 0; + qmap->mq_map[cpu] = 0; } #endif diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 086c6bb12baa..6e869d05f91e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -624,7 +624,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; - return blk_mq_virtio_map_queues(set, vblk->vdev, 0); + return blk_mq_virtio_map_queues(&set->map[0], vblk->vdev, 0); } #ifdef CONFIG_VIRTIO_BLK_SCSI diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c33bb201b884..49ad854d1b91 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -435,7 +435,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), + return blk_mq_pci_map_queues(&set->map[0], to_pci_dev(dev->dev), dev->num_vecs > 1 ? 1 /* admin queue */ : 0); } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 29dfd1bd164d..fdf3e52ee908 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6934,11 +6934,12 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost) { int rc; scsi_qla_host_t *vha = (scsi_qla_host_t *)shost->hostdata; + struct blk_mq_queue_map *qmap = &shost->tag_set.map[0]; if (USER_CTRL_IRQ(vha->hw)) - rc = blk_mq_map_queues(&shost->tag_set); + rc = blk_mq_map_queues(qmap); else - rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0); + rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, 0); return rc; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 651be30ba96a..ed81b8e74cfe 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1812,7 +1812,7 @@ static int scsi_map_queues(struct blk_mq_tag_set *set) if (shost->hostt->map_queues) return shost->hostt->map_queues(shost); - return blk_mq_map_queues(set); + return blk_mq_map_queues(&set->map[0]); } void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index a25a07a0b7f0..bac084260d80 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -5319,7 +5319,8 @@ static int pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0); + return blk_mq_pci_map_queues(&shost->tag_set.map[0], + ctrl_info->pci_dev, 0); } static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info, diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 1c72db94270e..c3c95b314286 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -719,8 +719,9 @@ static void virtscsi_target_destroy(struct scsi_target *starget) static int virtscsi_map_queues(struct Scsi_Host *shost) { struct virtio_scsi *vscsi = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[0]; - return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2); + return blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2); } /* diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h index 9f4c17f0d2d8..0b1f45c62623 100644 --- a/include/linux/blk-mq-pci.h +++ b/include/linux/blk-mq-pci.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_PCI_H #define _LINUX_BLK_MQ_PCI_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct pci_dev; -int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev, +int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, int offset); #endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h index 69b4da262c45..687ae287e1dc 100644 --- a/include/linux/blk-mq-virtio.h +++ b/include/linux/blk-mq-virtio.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_VIRTIO_H #define _LINUX_BLK_MQ_VIRTIO_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct virtio_device; -int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, +int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, struct virtio_device *vdev, int first_vec); #endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d83a26fb37e5..176164888628 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -74,10 +74,19 @@ struct blk_mq_hw_ctx { struct srcu_struct srcu[0]; }; +struct blk_mq_queue_map { + unsigned int *mq_map; + unsigned int nr_queues; +}; + +enum { + HCTX_MAX_TYPES = 1, +}; + struct blk_mq_tag_set { - unsigned int *mq_map; + struct blk_mq_queue_map map[HCTX_MAX_TYPES]; const struct blk_mq_ops *ops; - unsigned int nr_hw_queues; + unsigned int nr_hw_queues; /* nr hw queues across maps */ unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; unsigned int cmd_size; /* per-request extra data */ @@ -295,7 +304,7 @@ void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -int blk_mq_map_queues(struct blk_mq_tag_set *set); +int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); -- cgit v1.2.3 From f31967f0e455d08d3ea1d2f849bf62dafc92dbf4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 13:13:29 -0600 Subject: blk-mq: allow software queue to map to multiple hardware queues The mapping used to be dependent on just the CPU location, but now it's a tuple of (type, cpu) instead. This is a prep patch for allowing a single software queue to map to multiple hardware queues. No functional changes in this patch. This changes the software queue count to an unsigned short to save a bit of space. We can still support 64K-1 CPUs, which should be enough. Add a check to catch a wrap. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 2 +- block/blk-mq.c | 22 ++++++++++++++++------ block/blk-mq.h | 2 +- block/kyber-iosched.c | 6 +++--- include/linux/blk-mq.h | 3 ++- 5 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 68087bf71a61..bbabc3877d5a 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -109,7 +109,7 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { - unsigned idx = ctx->index_hw; + unsigned short idx = ctx->index_hw[hctx->type]; if (++idx == hctx->nr_ctx) idx = 0; diff --git a/block/blk-mq.c b/block/blk-mq.c index 67dec64440dd..31976bff8ad2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -75,14 +75,18 @@ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { - if (!sbitmap_test_bit(&hctx->ctx_map, ctx->index_hw)) - sbitmap_set_bit(&hctx->ctx_map, ctx->index_hw); + const int bit = ctx->index_hw[hctx->type]; + + if (!sbitmap_test_bit(&hctx->ctx_map, bit)) + sbitmap_set_bit(&hctx->ctx_map, bit); } static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { - sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw); + const int bit = ctx->index_hw[hctx->type]; + + sbitmap_clear_bit(&hctx->ctx_map, bit); } struct mq_inflight { @@ -955,7 +959,7 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start) { - unsigned off = start ? start->index_hw : 0; + unsigned off = start ? start->index_hw[hctx->type] : 0; struct dispatch_rq_data data = { .hctx = hctx, .rq = NULL, @@ -2343,10 +2347,16 @@ static void blk_mq_map_swqueue(struct request_queue *q) ctx = per_cpu_ptr(q->queue_ctx, i); hctx = blk_mq_map_queue_type(q, 0, i); - + hctx->type = 0; cpumask_set_cpu(i, hctx->cpumask); - ctx->index_hw = hctx->nr_ctx; + ctx->index_hw[hctx->type] = hctx->nr_ctx; hctx->ctxs[hctx->nr_ctx++] = ctx; + + /* + * If the nr_ctx type overflows, we have exceeded the + * amount of sw queues we can support. + */ + BUG_ON(!hctx->nr_ctx); } mutex_unlock(&q->sysfs_lock); diff --git a/block/blk-mq.h b/block/blk-mq.h index 6a8f8b60d8ba..1821f448f7c4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -17,7 +17,7 @@ struct blk_mq_ctx { } ____cacheline_aligned_in_smp; unsigned int cpu; - unsigned int index_hw; + unsigned short index_hw[HCTX_MAX_TYPES]; /* incremented at dispatch time */ unsigned long rq_dispatched[2]; diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 1fd83a91e749..de78e8aa7b0a 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -576,7 +576,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) { struct kyber_hctx_data *khd = hctx->sched_data; struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); - struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw]; + struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); struct list_head *rq_list = &kcq->rq_list[sched_domain]; bool merged; @@ -602,7 +602,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, list_for_each_entry_safe(rq, next, rq_list, queuelist) { unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags); - struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw]; + struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]]; struct list_head *head = &kcq->rq_list[sched_domain]; spin_lock(&kcq->lock); @@ -611,7 +611,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, else list_move_tail(&rq->queuelist, head); sbitmap_set_bit(&khd->kcq_map[sched_domain], - rq->mq_ctx->index_hw); + rq->mq_ctx->index_hw[hctx->type]); blk_mq_sched_request_inserted(rq); spin_unlock(&kcq->lock); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 176164888628..6c39d546c50b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -37,7 +37,8 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx *dispatch_from; unsigned int dispatch_busy; - unsigned int nr_ctx; + unsigned short type; + unsigned short nr_ctx; struct blk_mq_ctx **ctxs; spinlock_t dispatch_wait_lock; -- cgit v1.2.3 From b3c661b15d5ab11d982e58bee23e05c1780528a1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 30 Oct 2018 10:36:06 -0600 Subject: blk-mq: support multiple hctx maps Add support for the tag set carrying multiple queue maps, and for the driver to inform blk-mq how many it wishes to support through setting set->nr_maps. This adds an mq_ops helper for drivers that support more than 1 map, mq_ops->rq_flags_to_type(). The function takes request/bio flags and CPU, and returns a queue map index for that. We then use the type information in blk_mq_map_queue() to index the map set. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/blk-mq.c | 92 +++++++++++++++++++++++++++++++++----------------- block/blk-mq.h | 33 +++++++++++++----- include/linux/blk-mq.h | 14 ++++++++ 3 files changed, 100 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 31976bff8ad2..2e730c95513f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2258,7 +2258,8 @@ static int blk_mq_init_hctx(struct request_queue *q, static void blk_mq_init_cpu_queues(struct request_queue *q, unsigned int nr_hw_queues) { - unsigned int i; + struct blk_mq_tag_set *set = q->tag_set; + unsigned int i, j; for_each_possible_cpu(i) { struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); @@ -2273,9 +2274,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, * Set local node, IFF we have more than one hw queue. If * not, we remain on the home node of the device */ - hctx = blk_mq_map_queue_type(q, 0, i); - if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) - hctx->numa_node = local_memory_node(cpu_to_node(i)); + for (j = 0; j < set->nr_maps; j++) { + hctx = blk_mq_map_queue_type(q, j, i); + if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) + hctx->numa_node = local_memory_node(cpu_to_node(i)); + } } } @@ -2310,7 +2313,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, static void blk_mq_map_swqueue(struct request_queue *q) { - unsigned int i, hctx_idx; + unsigned int i, j, hctx_idx; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; @@ -2346,17 +2349,28 @@ static void blk_mq_map_swqueue(struct request_queue *q) } ctx = per_cpu_ptr(q->queue_ctx, i); - hctx = blk_mq_map_queue_type(q, 0, i); - hctx->type = 0; - cpumask_set_cpu(i, hctx->cpumask); - ctx->index_hw[hctx->type] = hctx->nr_ctx; - hctx->ctxs[hctx->nr_ctx++] = ctx; + for (j = 0; j < set->nr_maps; j++) { + hctx = blk_mq_map_queue_type(q, j, i); - /* - * If the nr_ctx type overflows, we have exceeded the - * amount of sw queues we can support. - */ - BUG_ON(!hctx->nr_ctx); + /* + * If the CPU is already set in the mask, then we've + * mapped this one already. This can happen if + * devices share queues across queue maps. + */ + if (cpumask_test_cpu(i, hctx->cpumask)) + continue; + + cpumask_set_cpu(i, hctx->cpumask); + hctx->type = j; + ctx->index_hw[hctx->type] = hctx->nr_ctx; + hctx->ctxs[hctx->nr_ctx++] = ctx; + + /* + * If the nr_ctx type overflows, we have exceeded the + * amount of sw queues we can support. + */ + BUG_ON(!hctx->nr_ctx); + } } mutex_unlock(&q->sysfs_lock); @@ -2524,6 +2538,7 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->nr_hw_queues = 1; + set->nr_maps = 1; set->queue_depth = queue_depth; set->numa_node = NUMA_NO_NODE; set->flags = set_flags; @@ -2800,6 +2815,8 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) { if (set->ops->map_queues) { + int i; + /* * transport .map_queues is usually done in the following * way: @@ -2807,18 +2824,21 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) * for (queue = 0; queue < set->nr_hw_queues; queue++) { * mask = get_cpu_mask(queue) * for_each_cpu(cpu, mask) - * set->map.mq_map[cpu] = queue; + * set->map[x].mq_map[cpu] = queue; * } * * When we need to remap, the table has to be cleared for * killing stale mapping since one CPU may not be mapped * to any hw queue. */ - blk_mq_clear_mq_map(&set->map[0]); + for (i = 0; i < set->nr_maps; i++) + blk_mq_clear_mq_map(&set->map[i]); return set->ops->map_queues(set); - } else + } else { + BUG_ON(set->nr_maps > 1); return blk_mq_map_queues(&set->map[0]); + } } /* @@ -2829,7 +2849,7 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) */ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) { - int ret; + int i, ret; BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS); @@ -2852,6 +2872,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) set->queue_depth = BLK_MQ_MAX_DEPTH; } + if (!set->nr_maps) + set->nr_maps = 1; + else if (set->nr_maps > HCTX_MAX_TYPES) + return -EINVAL; + /* * If a crashdump is active, then we are potentially in a very * memory constrained environment. Limit us to 1 queue and @@ -2873,12 +2898,14 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) return -ENOMEM; ret = -ENOMEM; - set->map[0].mq_map = kcalloc_node(nr_cpu_ids, - sizeof(*set->map[0].mq_map), - GFP_KERNEL, set->numa_node); - if (!set->map[0].mq_map) - goto out_free_tags; - set->map[0].nr_queues = set->nr_hw_queues; + for (i = 0; i < set->nr_maps; i++) { + set->map[i].mq_map = kcalloc_node(nr_cpu_ids, + sizeof(struct blk_mq_queue_map), + GFP_KERNEL, set->numa_node); + if (!set->map[i].mq_map) + goto out_free_mq_map; + set->map[i].nr_queues = set->nr_hw_queues; + } ret = blk_mq_update_queue_map(set); if (ret) @@ -2894,9 +2921,10 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) return 0; out_free_mq_map: - kfree(set->map[0].mq_map); - set->map[0].mq_map = NULL; -out_free_tags: + for (i = 0; i < set->nr_maps; i++) { + kfree(set->map[i].mq_map); + set->map[i].mq_map = NULL; + } kfree(set->tags); set->tags = NULL; return ret; @@ -2905,13 +2933,15 @@ EXPORT_SYMBOL(blk_mq_alloc_tag_set); void blk_mq_free_tag_set(struct blk_mq_tag_set *set) { - int i; + int i, j; for (i = 0; i < nr_cpu_ids; i++) blk_mq_free_map_and_requests(set, i); - kfree(set->map[0].mq_map); - set->map[0].mq_map = NULL; + for (j = 0; j < set->nr_maps; j++) { + kfree(set->map[j].mq_map); + set->map[j].mq_map = NULL; + } kfree(set->tags); set->tags = NULL; diff --git a/block/blk-mq.h b/block/blk-mq.h index 1821f448f7c4..053862270125 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -72,20 +72,37 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, */ extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); -static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, - unsigned int flags, - unsigned int cpu) +/* + * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue + * @q: request queue + * @hctx_type: the hctx type index + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + unsigned int hctx_type, + unsigned int cpu) { struct blk_mq_tag_set *set = q->tag_set; - return q->queue_hw_ctx[set->map[0].mq_map[cpu]]; + return q->queue_hw_ctx[set->map[hctx_type].mq_map[cpu]]; } -static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, - unsigned int hctx_type, - unsigned int cpu) +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) { - return blk_mq_map_queue(q, hctx_type, cpu); + int hctx_type = 0; + + if (q->mq_ops->rq_flags_to_type) + hctx_type = q->mq_ops->rq_flags_to_type(q, flags); + + return blk_mq_map_queue_type(q, hctx_type, cpu); } /* diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 6c39d546c50b..8994c95056a8 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -85,7 +85,14 @@ enum { }; struct blk_mq_tag_set { + /* + * map[] holds ctx -> hctx mappings, one map exists for each type + * that the driver wishes to support. There are no restrictions + * on maps being of the same size, and it's perfectly legal to + * share maps between types. + */ struct blk_mq_queue_map map[HCTX_MAX_TYPES]; + unsigned int nr_maps; /* nr entries in map[] */ const struct blk_mq_ops *ops; unsigned int nr_hw_queues; /* nr hw queues across maps */ unsigned int queue_depth; /* max hw supported */ @@ -109,6 +116,8 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +/* takes rq->cmd_flags as input, returns a hardware type index */ +typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); @@ -134,6 +143,11 @@ struct blk_mq_ops { */ queue_rq_fn *queue_rq; + /* + * Return a queue map type for the given request/bio flags + */ + rq_flags_to_type_fn *rq_flags_to_type; + /* * Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the -- cgit v1.2.3 From ea4f995ee8b8f0578b3319949f2edd5d812fdb0a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 15:06:13 -0600 Subject: blk-mq: cache request hardware queue mapping We call blk_mq_map_queue() a lot, at least two times for each request per IO, sometimes more. Since we now have an indirect call as well in that function. cache the mapping so we don't have to re-call blk_mq_map_queue() for the same request multiple times. Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-flush.c | 12 ++++-------- block/blk-mq-debugfs.c | 4 +--- block/blk-mq-sched.c | 6 ++---- block/blk-mq-tag.c | 9 +-------- block/blk-mq.c | 22 +++++++++------------- block/blk-mq.h | 5 +---- include/linux/blkdev.h | 1 + 7 files changed, 19 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/block/blk-flush.c b/block/blk-flush.c index 77e9f5b2ee05..c53197dcdd70 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -215,7 +215,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); - hctx = blk_mq_map_queue(q, flush_rq->cmd_flags, flush_rq->mq_ctx->cpu); + hctx = flush_rq->mq_hctx; if (!q->elevator) { blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); flush_rq->tag = -1; @@ -262,7 +262,6 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, struct request *first_rq = list_first_entry(pending, struct request, flush.list); struct request *flush_rq = fq->flush_rq; - struct blk_mq_hw_ctx *hctx; /* C1 described at the top of this file */ if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending)) @@ -297,13 +296,12 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, * just for cheating put/get driver tag. */ flush_rq->mq_ctx = first_rq->mq_ctx; + flush_rq->mq_hctx = first_rq->mq_hctx; if (!q->elevator) { fq->orig_rq = first_rq; flush_rq->tag = first_rq->tag; - hctx = blk_mq_map_queue(q, first_rq->cmd_flags, - first_rq->mq_ctx->cpu); - blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); + blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq); } else { flush_rq->internal_tag = first_rq->internal_tag; } @@ -320,13 +318,11 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, static void mq_flush_data_end_io(struct request *rq, blk_status_t error) { struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct blk_mq_ctx *ctx = rq->mq_ctx; unsigned long flags; struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx); - hctx = blk_mq_map_queue(q, rq->cmd_flags, ctx->cpu); - if (q->elevator) { WARN_ON(rq->tag < 0); blk_mq_put_driver_tag_hctx(hctx, rq); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index fac70c81b7de..cde19be36135 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -427,10 +427,8 @@ struct show_busy_params { static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved) { const struct show_busy_params *params = data; - struct blk_mq_hw_ctx *hctx; - hctx = blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); - if (hctx == params->hctx) + if (rq->mq_hctx == params->hctx) __blk_mq_debugfs_rq_show(params->m, list_entry_rq(&rq->queuelist)); } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index bbabc3877d5a..641df3f00632 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -366,9 +366,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = rq->mq_ctx; - struct blk_mq_hw_ctx *hctx; - - hctx = blk_mq_map_queue(q, rq->cmd_flags, ctx->cpu); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; /* flush rq in flush machinery need to be dispatched directly */ if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) { @@ -407,7 +405,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q, /* For list inserts, requests better be on the same hw queue */ rq = list_first_entry(list, struct request, queuelist); - hctx = blk_mq_map_queue(q, rq->cmd_flags, ctx->cpu); + hctx = rq->mq_hctx; e = hctx->queue->elevator; if (e && e->type->ops.insert_requests) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 478a959357f5..fb836d818b80 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -527,14 +527,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, */ u32 blk_mq_unique_tag(struct request *rq) { - struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx; - int hwq = 0; - - hctx = blk_mq_map_queue(q, rq->cmd_flags, rq->mq_ctx->cpu); - hwq = hctx->queue_num; - - return (hwq << BLK_MQ_UNIQUE_TAG_BITS) | + return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) | (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); } EXPORT_SYMBOL(blk_mq_unique_tag); diff --git a/block/blk-mq.c b/block/blk-mq.c index ccf135cf41b0..6b2859d3ad23 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -300,6 +300,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, /* csd/requeue_work/fifo_time is initialized before use */ rq->q = data->q; rq->mq_ctx = data->ctx; + rq->mq_hctx = data->hctx; rq->rq_flags = rq_flags; rq->cmd_flags = op; if (data->flags & BLK_MQ_REQ_PREEMPT) @@ -472,10 +473,11 @@ static void __blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; struct blk_mq_ctx *ctx = rq->mq_ctx; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->cmd_flags, ctx->cpu); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; const int sched_tag = rq->internal_tag; blk_pm_mark_last_busy(rq); + rq->mq_hctx = NULL; if (rq->tag != -1) blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) @@ -489,7 +491,7 @@ void blk_mq_free_request(struct request *rq) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = rq->mq_ctx; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->cmd_flags, ctx->cpu); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; if (rq->rq_flags & RQF_ELVPRIV) { if (e && e->type->ops.finish_request) @@ -983,7 +985,7 @@ bool blk_mq_get_driver_tag(struct request *rq) { struct blk_mq_alloc_data data = { .q = rq->q, - .hctx = blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu), + .hctx = rq->mq_hctx, .flags = BLK_MQ_REQ_NOWAIT, .cmd_flags = rq->cmd_flags, }; @@ -1149,7 +1151,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, rq = list_first_entry(list, struct request, queuelist); - hctx = blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); + hctx = rq->mq_hctx; if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) break; @@ -1579,9 +1581,7 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, */ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) { - struct blk_mq_ctx *ctx = rq->mq_ctx; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, rq->cmd_flags, - ctx->cpu); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; spin_lock(&hctx->lock); list_add_tail(&rq->queuelist, &hctx->dispatch); @@ -1790,9 +1790,7 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq) blk_status_t ret; int srcu_idx; blk_qc_t unused_cookie; - struct blk_mq_ctx *ctx = rq->mq_ctx; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, rq->cmd_flags, - ctx->cpu); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; hctx_lock(hctx, &srcu_idx); ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true); @@ -1917,9 +1915,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); if (same_queue_rq) { - data.hctx = blk_mq_map_queue(q, - same_queue_rq->cmd_flags, - same_queue_rq->mq_ctx->cpu); + data.hctx = same_queue_rq->mq_hctx; blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); } diff --git a/block/blk-mq.h b/block/blk-mq.h index 053862270125..facb6e9ddce4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -223,13 +223,10 @@ static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, static inline void blk_mq_put_driver_tag(struct request *rq) { - struct blk_mq_hw_ctx *hctx; - if (rq->tag == -1 || rq->internal_tag == -1) return; - hctx = blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); - __blk_mq_put_driver_tag(hctx, rq); + __blk_mq_put_driver_tag(rq->mq_hctx, rq); } static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2ae7465d68ab..9b1f470cc784 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -129,6 +129,7 @@ enum mq_rq_state { struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; + struct blk_mq_hw_ctx *mq_hctx; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; -- cgit v1.2.3 From 843477d4cc5c4bb4e346c561ecd3b9d0bd67e8c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 13:16:11 -0600 Subject: blk-mq: initial support for multiple queue maps Add a queue offset to the tag map. This enables users to map iteratively, for each queue map type they support. Bump maximum number of supported maps to 2, we're now fully able to support more than 1 map. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/blk-mq-cpumap.c | 9 +++++---- block/blk-mq-pci.c | 2 +- block/blk-mq-virtio.c | 2 +- include/linux/blk-mq.h | 3 ++- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 6e6686c55984..03a534820271 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -14,9 +14,10 @@ #include "blk.h" #include "blk-mq.h" -static int cpu_to_queue_index(unsigned int nr_queues, const int cpu) +static int cpu_to_queue_index(struct blk_mq_queue_map *qmap, + unsigned int nr_queues, const int cpu) { - return cpu % nr_queues; + return qmap->queue_offset + (cpu % nr_queues); } static int get_first_sibling(unsigned int cpu) @@ -44,11 +45,11 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap) * performace optimizations. */ if (cpu < nr_queues) { - map[cpu] = cpu_to_queue_index(nr_queues, cpu); + map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu); } else { first_sibling = get_first_sibling(cpu); if (first_sibling == cpu) - map[cpu] = cpu_to_queue_index(nr_queues, cpu); + map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu); else map[cpu] = map[first_sibling]; } diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index 40333d60a850..1dce18553984 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -43,7 +43,7 @@ int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, goto fallback; for_each_cpu(cpu, mask) - qmap->mq_map[cpu] = queue; + qmap->mq_map[cpu] = qmap->queue_offset + queue; } return 0; diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c index 661fbfef480f..370827163835 100644 --- a/block/blk-mq-virtio.c +++ b/block/blk-mq-virtio.c @@ -44,7 +44,7 @@ int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, goto fallback; for_each_cpu(cpu, mask) - qmap->mq_map[cpu] = queue; + qmap->mq_map[cpu] = qmap->queue_offset + queue; } return 0; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8994c95056a8..729ce0f00433 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -78,10 +78,11 @@ struct blk_mq_hw_ctx { struct blk_mq_queue_map { unsigned int *mq_map; unsigned int nr_queues; + unsigned int queue_offset; }; enum { - HCTX_MAX_TYPES = 1, + HCTX_MAX_TYPES = 2, }; struct blk_mq_tag_set { -- cgit v1.2.3 From d1e36282b0bbd5de6a9c4d5275e94ef3b3438f48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 29 Aug 2018 10:36:56 -0600 Subject: block: add REQ_HIPRI and inherit it from IOCB_HIPRI We use IOCB_HIPRI to poll for IO in the caller instead of scheduling. This information is not available for (or after) IO submission. The driver may make different queue choices based on the type of IO, so make the fact that we will poll for this IO known to the lower layers as well. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 ++ fs/direct-io.c | 2 ++ fs/iomap.c | 9 ++++++++- include/linux/blk_types.h | 4 +++- 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/block_dev.c b/fs/block_dev.c index a80b4f0ee7c4..c039abfb2052 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -232,6 +232,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bio.bi_opf = dio_bio_write_op(iocb); task_io_account_write(ret); } + if (iocb->ki_flags & IOCB_HIPRI) + bio.bi_opf |= REQ_HIPRI; qc = submit_bio(&bio); for (;;) { diff --git a/fs/direct-io.c b/fs/direct-io.c index 722d17c88edb..ea07d5a34317 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1265,6 +1265,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } else { dio->op = REQ_OP_READ; } + if (iocb->ki_flags & IOCB_HIPRI) + dio->op_flags |= REQ_HIPRI; /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue diff --git a/fs/iomap.c b/fs/iomap.c index 64ce240217a1..f61d13dfdf09 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1553,6 +1553,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, unsigned len) { struct page *page = ZERO_PAGE(0); + int flags = REQ_SYNC | REQ_IDLE; struct bio *bio; bio = bio_alloc(GFP_KERNEL, 1); @@ -1561,9 +1562,12 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; + if (dio->iocb->ki_flags & IOCB_HIPRI) + flags |= REQ_HIPRI; + get_page(page); __bio_add_page(bio, page, len, 0); - bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); + bio_set_op_attrs(bio, REQ_OP_WRITE, flags); atomic_inc(&dio->ref); return submit_bio(bio); @@ -1662,6 +1666,9 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, bio_set_pages_dirty(bio); } + if (dio->iocb->ki_flags & IOCB_HIPRI) + bio->bi_opf |= REQ_HIPRI; + iov_iter_advance(dio->submit.iter, n); dio->size += n; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1dcf652ba0aa..dbdbfbd6a987 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -323,6 +323,8 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ + __REQ_HIPRI, + /* for driver use */ __REQ_DRV, __REQ_SWAP, /* swapping request. */ @@ -343,8 +345,8 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) - #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) +#define REQ_HIPRI (1ULL << __REQ_HIPRI) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) -- cgit v1.2.3 From 4b04cc6a8f86c4842314def22332de1f15de8523 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Nov 2018 12:44:33 -0700 Subject: nvme: add separate poll queue map Adds support for defining a variable number of poll queues, currently configurable with the 'poll_queues' module parameter. Defaults to a single poll queue. And now we finally have poll support without triggering interrupts! Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 97 ++++++++++++++++++++++++++++++++++++++++--------- include/linux/blk-mq.h | 2 +- 2 files changed, 81 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1987df13b73e..6aa86dfcb32c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -86,6 +86,10 @@ MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); +static int poll_queues = 1; +module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644); +MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); + struct nvme_dev; struct nvme_queue; @@ -94,6 +98,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); enum { NVMEQ_TYPE_READ, NVMEQ_TYPE_WRITE, + NVMEQ_TYPE_POLL, NVMEQ_TYPE_NR, }; @@ -202,6 +207,7 @@ struct nvme_queue { u16 last_cq_head; u16 qid; u8 cq_phase; + u8 polled; u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; @@ -250,7 +256,7 @@ static inline void _nvme_check_size(void) static unsigned int max_io_queues(void) { - return num_possible_cpus() + write_queues; + return num_possible_cpus() + write_queues + poll_queues; } static unsigned int max_queue_count(void) @@ -500,8 +506,15 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) offset = queue_irq_offset(dev); } + /* + * The poll queue(s) doesn't have an IRQ (and hence IRQ + * affinity), so use the regular blk-mq cpu mapping + */ map->queue_offset = qoff; - blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); + if (i != NVMEQ_TYPE_POLL) + blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); + else + blk_mq_map_queues(map); qoff += map->nr_queues; offset += map->nr_queues; } @@ -892,7 +905,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * We should not need to do this, but we're still using this to * ensure we can drain requests on a dying queue. */ - if (unlikely(nvmeq->cq_vector < 0)) + if (unlikely(nvmeq->cq_vector < 0 && !nvmeq->polled)) return BLK_STS_IOERR; ret = nvme_setup_cmd(ns, req, &cmnd); @@ -921,6 +934,8 @@ out_free_cmd: static int nvme_rq_flags_to_type(struct request_queue *q, unsigned int flags) { + if ((flags & REQ_HIPRI) && test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return NVMEQ_TYPE_POLL; if ((flags & REQ_OP_MASK) == REQ_OP_READ) return NVMEQ_TYPE_READ; @@ -1094,7 +1109,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq, s16 vector) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; + int flags = NVME_QUEUE_PHYS_CONTIG; + + if (vector != -1) + flags |= NVME_CQ_IRQ_ENABLED; /* * Note: we (ab)use the fact that the prp fields survive if no data @@ -1106,7 +1124,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - c.create_cq.irq_vector = cpu_to_le16(vector); + if (vector != -1) + c.create_cq.irq_vector = cpu_to_le16(vector); + else + c.create_cq.irq_vector = 0; return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1348,13 +1369,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) int vector; spin_lock_irq(&nvmeq->cq_lock); - if (nvmeq->cq_vector == -1) { + if (nvmeq->cq_vector == -1 && !nvmeq->polled) { spin_unlock_irq(&nvmeq->cq_lock); return 1; } vector = nvmeq->cq_vector; nvmeq->dev->online_queues--; nvmeq->cq_vector = -1; + nvmeq->polled = false; spin_unlock_irq(&nvmeq->cq_lock); /* @@ -1366,7 +1388,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); - pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq); + if (vector != -1) + pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq); return 0; } @@ -1500,7 +1523,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) spin_unlock_irq(&nvmeq->cq_lock); } -static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) +static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) { struct nvme_dev *dev = nvmeq->dev; int result; @@ -1510,7 +1533,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) * A queue's vector matches the queue identifier unless the controller * has only one vector available. */ - vector = dev->num_vecs == 1 ? 0 : qid; + if (!polled) + vector = dev->num_vecs == 1 ? 0 : qid; + else + vector = -1; + result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result) return result; @@ -1527,15 +1554,20 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) * xxx' warning if the create CQ/SQ command times out. */ nvmeq->cq_vector = vector; + nvmeq->polled = polled; nvme_init_queue(nvmeq, qid); - result = queue_request_irq(nvmeq); - if (result < 0) - goto release_sq; + + if (vector != -1) { + result = queue_request_irq(nvmeq); + if (result < 0) + goto release_sq; + } return result; release_sq: nvmeq->cq_vector = -1; + nvmeq->polled = false; dev->online_queues--; adapter_delete_sq(dev, qid); release_cq: @@ -1686,7 +1718,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) static int nvme_create_io_queues(struct nvme_dev *dev) { - unsigned i, max; + unsigned i, max, rw_queues; int ret = 0; for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { @@ -1697,8 +1729,17 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } max = min(dev->max_qid, dev->ctrl.queue_count - 1); + if (max != 1 && dev->io_queues[NVMEQ_TYPE_POLL]) { + rw_queues = dev->io_queues[NVMEQ_TYPE_READ] + + dev->io_queues[NVMEQ_TYPE_WRITE]; + } else { + rw_queues = max; + } + for (i = dev->online_queues; i <= max; i++) { - ret = nvme_create_queue(&dev->queues[i], i); + bool polled = i > rw_queues; + + ret = nvme_create_queue(&dev->queues[i], i, polled); if (ret) break; } @@ -1973,6 +2014,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) { unsigned int this_w_queues = write_queues; + unsigned int this_p_queues = poll_queues; /* * Setup read/write queue split @@ -1980,9 +2022,28 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) if (nr_io_queues == 1) { dev->io_queues[NVMEQ_TYPE_READ] = 1; dev->io_queues[NVMEQ_TYPE_WRITE] = 0; + dev->io_queues[NVMEQ_TYPE_POLL] = 0; return; } + /* + * Configure number of poll queues, if set + */ + if (this_p_queues) { + /* + * We need at least one queue left. With just one queue, we'll + * have a single shared read/write set. + */ + if (this_p_queues >= nr_io_queues) { + this_w_queues = 0; + this_p_queues = nr_io_queues - 1; + } + + dev->io_queues[NVMEQ_TYPE_POLL] = this_p_queues; + nr_io_queues -= this_p_queues; + } else + dev->io_queues[NVMEQ_TYPE_POLL] = 0; + /* * If 'write_queues' is set, ensure it leaves room for at least * one read queue @@ -2099,11 +2160,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return -EIO; dev->num_vecs = result; - dev->max_qid = max(result - 1, 1); + result = max(result - 1, 1); + dev->max_qid = result + dev->io_queues[NVMEQ_TYPE_POLL]; - dev_info(dev->ctrl.device, "%d/%d read/write queues\n", + dev_info(dev->ctrl.device, "%d/%d/%d read/write/poll queues\n", dev->io_queues[NVMEQ_TYPE_READ], - dev->io_queues[NVMEQ_TYPE_WRITE]); + dev->io_queues[NVMEQ_TYPE_WRITE], + dev->io_queues[NVMEQ_TYPE_POLL]); /* * Should investigate if there's a performance win from allocating diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 729ce0f00433..9f5e93f40857 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -82,7 +82,7 @@ struct blk_mq_queue_map { }; enum { - HCTX_MAX_TYPES = 2, + HCTX_MAX_TYPES = 3, }; struct blk_mq_tag_set { -- cgit v1.2.3 From 24c9d423e86b17b25b4b510e81f10aa232fdaa60 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 6 Nov 2018 10:39:15 -0800 Subject: EDAC, skx: Fix randconfig builds in a better way It was previously noted that Kconfig complained about unmet dependencies when trying to configure skx_edac together with CONFIG_ACPI=n. First fix for this checked for ACPI when doing select ACPI_ADXL but this required stub functions for the case where ACPI wasn't selected. It also allowed building a driver that didn't actually work for a system that has non-volatile DIMMs. Arnd Bergmann pointed out that the right fix is to make EDAC_SKX "depend on ACPI". Fixes: a324e9396ca3 ("EDAC, skx: Fix randconfig builds") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov CC: "Rafael J. Wysocki" CC: Arnd Bergmann CC: Mauro Carvalho Chehab CC: linux-edac CC: qiuxu.zhuo@intel.com Link: http://lkml.kernel.org/r/20181106183914.GA26731@agluck-desk --- drivers/edac/Kconfig | 4 ++-- include/linux/adxl.h | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index ec2727b27556..e286b5b99003 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -231,10 +231,10 @@ config EDAC_SBRIDGE config EDAC_SKX tristate "Intel Skylake server Integrated MC" - depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG && ACPI depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y select DMI - select ACPI_ADXL if ACPI + select ACPI_ADXL help Support for error detection and correction the Intel Skylake server Integrated Memory Controllers. If your diff --git a/include/linux/adxl.h b/include/linux/adxl.h index 2d29f55923e3..2a629acb4c3f 100644 --- a/include/linux/adxl.h +++ b/include/linux/adxl.h @@ -7,12 +7,7 @@ #ifndef _LINUX_ADXL_H #define _LINUX_ADXL_H -#ifdef CONFIG_ACPI_ADXL const char * const *adxl_get_component_names(void); int adxl_decode(u64 addr, u64 component_values[]); -#else -static inline const char * const *adxl_get_component_names(void) { return NULL; } -static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; } -#endif #endif /* _LINUX_ADXL_H */ -- cgit v1.2.3 From 60fb9567bf30937e6bedfa939d7c8fd4ee6a1b1c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:28 +0100 Subject: udp: implement complete book-keeping for encap_needed The *encap_needed static keys are enabled by UDP tunnels and several UDP encapsulations type, but they are never turned off. This can cause unneeded overall performance degradation for systems where such features are used transiently. This patch introduces complete book-keeping for such keys, decreasing the usage at socket destruction time, if needed, and avoiding that the same socket could increase the key usage multiple times. rfc v3 -> v1: - add socket lock around udp_tunnel_encap_enable() rfc v2 -> rfc v3: - use udp_tunnel_encap_enable() in setsockopt() Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 7 ++++++- include/net/udp_tunnel.h | 6 ++++++ net/ipv4/udp.c | 19 +++++++++++++------ net/ipv6/udp.c | 14 +++++++++----- 4 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 320d49d85484..a4dafff407fb 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -49,7 +49,12 @@ struct udp_sock { unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ + no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + encap_enabled:1; /* This socket enabled encap + * processing; UDP tunnels and + * different encapsulation layer set + * this + */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index fe680ab6b15a..3fbe56430e3b 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -165,6 +165,12 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) static inline void udp_tunnel_encap_enable(struct socket *sock) { + struct udp_sock *up = udp_sk(sock->sk); + + if (up->encap_enabled) + return; + + up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf73c9194bb6..f81409921e27 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -115,6 +115,7 @@ #include "udp_impl.h" #include #include +#include struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -2395,11 +2396,15 @@ void udp_destroy_sock(struct sock *sk) bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); - if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { - void (*encap_destroy)(struct sock *sk); - encap_destroy = READ_ONCE(up->encap_destroy); - if (encap_destroy) - encap_destroy(sk); + if (static_branch_unlikely(&udp_encap_needed_key)) { + if (up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = READ_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + if (up->encap_enabled) + static_branch_disable(&udp_encap_needed_key); } } @@ -2444,7 +2449,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, /* FALLTHROUGH */ case UDP_ENCAP_L2TPINUDP: up->encap_type = val; - udp_encap_enable(); + lock_sock(sk); + udp_tunnel_encap_enable(sk->sk_socket); + release_sock(sk); break; default: err = -ENOPROTOOPT; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a25571c12a8a..bdf7e071a63b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1456,11 +1456,15 @@ void udpv6_destroy_sock(struct sock *sk) udp_v6_flush_pending_frames(sk); release_sock(sk); - if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { - void (*encap_destroy)(struct sock *sk); - encap_destroy = READ_ONCE(up->encap_destroy); - if (encap_destroy) - encap_destroy(sk); + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + if (up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = READ_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + if (up->encap_enabled) + static_branch_disable(&udpv6_encap_needed_key); } inet6_destroy_sock(sk); -- cgit v1.2.3 From e20cf8d3f1f763ad28a9cb3b41305b8a8a42653e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:29 +0100 Subject: udp: implement GRO for plain UDP sockets. This is the RX counterpart of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also eligible for GRO in the rx path: UDP segments directed to such socket are assembled into a larger GSO_UDP_L4 packet. The core UDP GRO support is enabled with setsockopt(UDP_GRO). Initial benchmark numbers: Before: udp rx: 1079 MB/s 769065 calls/s After: udp rx: 1466 MB/s 24877 calls/s This change introduces a side effect in respect to UDP tunnels: after a UDP tunnel creation, now the kernel performs a lookup per ingress UDP packet, while before such lookup happened only if the ingress packet carried a valid internal header csum. rfc v2 -> rfc v3: - fixed typos in macro name and comments - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1 - acquire socket lock in UDP_GRO setsockopt rfc v1 -> rfc v2: - use a new option to enable UDP GRO - use static keys to protect the UDP GRO socket lookup Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 3 +- include/uapi/linux/udp.h | 1 + net/ipv4/udp.c | 8 ++++ net/ipv4/udp_offload.c | 109 +++++++++++++++++++++++++++++++++++++---------- net/ipv6/udp_offload.c | 6 +-- 5 files changed, 99 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index a4dafff407fb..f613b329852e 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -50,11 +50,12 @@ struct udp_sock { __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1; /* This socket enabled encap + encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this */ + gro_enabled:1; /* Can accept GRO packets */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09502de447f5..30baccb6c9c4 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -33,6 +33,7 @@ struct udphdr { #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ +#define UDP_GRO 104 /* This socket can receive UDP GRO packets */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f81409921e27..9fc08b098ced 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2473,6 +2473,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, up->gso_size = val; break; + case UDP_GRO: + lock_sock(sk); + if (valbool) + udp_tunnel_encap_enable(sk->sk_socket); + up->gro_enabled = valbool; + release_sock(sk); + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 802f2bc00d69..0646d61f4fa8 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -343,6 +343,54 @@ out: return segs; } +#define UDP_GRO_CNT_MAX 64 +static struct sk_buff *udp_gro_receive_segment(struct list_head *head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + struct sk_buff *pp = NULL; + struct udphdr *uh2; + struct sk_buff *p; + + /* requires non zero csum, for symmetry with GSO */ + if (!uh->check) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + /* pull encapsulating udp header */ + skb_gro_pull(skb, sizeof(struct udphdr)); + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); + + list_for_each_entry(p, head, list) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + uh2 = udp_hdr(p); + + /* Match ports only, as csum is always non zero */ + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* Terminate the flow on len mismatch or if it grow "too much". + * Under small packet flood GRO count could elsewhere grow a lot + * leading to execessive truesize values + */ + if (!skb_gro_receive(p, skb) && + NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) + pp = p; + else if (uh->len != uh2->len) + pp = p; + + return pp; + } + + /* mismatch, but we never need to flush */ + return NULL; +} + struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup) { @@ -353,23 +401,27 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, int flush = 1; struct sock *sk; + rcu_read_lock(); + sk = (*lookup)(skb, uh->source, uh->dest); + if (!sk) + goto out_unlock; + + if (udp_sk(sk)->gro_enabled) { + pp = call_gro_receive(udp_gro_receive_segment, head, skb); + rcu_read_unlock(); + return pp; + } + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && - !NAPI_GRO_CB(skb)->csum_valid)) - goto out; + !NAPI_GRO_CB(skb)->csum_valid) || + !udp_sk(sk)->gro_receive) + goto out_unlock; /* mark that this skb passed once through the tunnel gro layer */ NAPI_GRO_CB(skb)->encap_mark = 1; - rcu_read_lock(); - sk = (*lookup)(skb, uh->source, uh->dest); - - if (sk && udp_sk(sk)->gro_receive) - goto unflush; - goto out_unlock; - -unflush: flush = 0; list_for_each_entry(p, head, list) { @@ -394,7 +446,6 @@ unflush: out_unlock: rcu_read_unlock(); -out: skb_gro_flush_final(skb, pp, flush); return pp; } @@ -427,6 +478,19 @@ flush: return NULL; } +static int udp_gro_complete_segment(struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + return 0; +} + int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup) { @@ -437,16 +501,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, uh->len = newlen; - /* Set encapsulation before calling into inner gro_complete() functions - * to make them set up the inner offsets. - */ - skb->encapsulation = 1; - rcu_read_lock(); sk = (*lookup)(skb, uh->source, uh->dest); - if (sk && udp_sk(sk)->gro_complete) + if (sk && udp_sk(sk)->gro_enabled) { + err = udp_gro_complete_segment(skb); + } else if (sk && udp_sk(sk)->gro_complete) { + skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM + : SKB_GSO_UDP_TUNNEL; + + /* Set encapsulation before calling into inner gro_complete() + * functions to make them set up the inner offsets. + */ + skb->encapsulation = 1; err = udp_sk(sk)->gro_complete(sk, skb, nhoff + sizeof(struct udphdr)); + } rcu_read_unlock(); if (skb->remcsum_offload) @@ -461,13 +530,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + if (uh->check) uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, iph->daddr, 0); - } else { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 1b8e161ac527..828b2457f97b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + if (uh->check) uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); - } else { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } -- cgit v1.2.3 From bcd1665e3569b0a6f569514f023a41fc7df0b4a3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:30 +0100 Subject: udp: add support for UDP_GRO cmsg When UDP GRO is enabled, the UDP_GRO cmsg will carry the ingress datagram size. User-space can use such info to compute the original packets layout. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 11 +++++++++++ net/ipv4/udp.c | 4 ++++ net/ipv6/udp.c | 3 +++ 3 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index f613b329852e..e23d5024f42f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -121,6 +121,17 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } +static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + int gso_size; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + gso_size = skb_shinfo(skb)->gso_size; + put_cmsg(msg, SOL_UDP, UDP_GRO, sizeof(gso_size), &gso_size); + } +} + #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9fc08b098ced..dddc6fe90f51 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1711,6 +1711,10 @@ try_again: memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); } + + if (udp_sk(sk)->gro_enabled) + udp_cmsg_recv(msg, sk, skb); + if (inet->cmsg_flags) ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bdf7e071a63b..4c79dc5329bc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -417,6 +417,9 @@ try_again: *addr_len = sizeof(*sin6); } + if (udp_sk(sk)->gro_enabled) + udp_cmsg_recv(msg, sk, skb); + if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); -- cgit v1.2.3 From cf329aa42b6659204fee865bbce0ea20462552eb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:33 +0100 Subject: udp: cope with UDP GRO packet misdirection In some scenarios, the GRO engine can assemble an UDP GRO packet that ultimately lands on a non GRO-enabled socket. This patch tries to address the issue explicitly checking for the UDP socket features before enqueuing the packet, and eventually segmenting the unexpected GRO packet, as needed. We must also cope with re-insertion requests: after segmentation the UDP code calls the helper introduced by the previous patches, as needed. Segmentation is performed by a common helper, which takes care of updating socket and protocol stats is case of failure. rfc v3 -> v1 - fix compile issues with rxrpc - when gso_segment returns NULL, treat is as an error - added 'ipv4' argument to udp_rcv_segment() rfc v2 -> rfc v3 - moved udp_rcv_segment() into net/udp.h, account errors to socket and ns, always return NULL or segs list Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 6 ++++++ include/net/udp.h | 45 +++++++++++++++++++++++++++++++++++++-------- net/ipv4/udp.c | 23 ++++++++++++++++++++++- net/ipv6/udp.c | 24 +++++++++++++++++++++++- 4 files changed, 88 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index e23d5024f42f..0a9c54e76305 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -132,6 +132,12 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) +{ + return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) && + skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4; +} + #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) diff --git a/include/net/udp.h b/include/net/udp.h index a496e441645e..eccca2325ee6 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -417,17 +417,24 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define __UDPX_INC_STATS(sk, field) \ -do { \ - if ((sk)->sk_family == AF_INET) \ - __UDP_INC_STATS(sock_net(sk), field, 0); \ - else \ - __UDP6_INC_STATS(sock_net(sk), field, 0); \ -} while (0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics) : \ + (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ + sock_net(sk)->mib.udp_stats_in6); \ +}) #else -#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics; \ +}) #endif +#define __UDPX_INC_STATS(sk, field) \ + __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) + #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; @@ -461,4 +468,26 @@ DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif +static inline struct sk_buff *udp_rcv_segment(struct sock *sk, + struct sk_buff *skb, bool ipv4) +{ + struct sk_buff *segs; + + /* the GSO CB lays after the UDP one, no need to save and restore any + * CB fragment + */ + segs = __skb_gso_segment(skb, NETIF_F_SG, false); + if (unlikely(IS_ERR_OR_NULL(segs))) { + int segs_nr = skb_shinfo(skb)->gso_segs; + + atomic_add(segs_nr, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); + kfree_skb(skb); + return NULL; + } + + consume_skb(skb); + return segs; +} + #endif /* _UDP_H */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dddc6fe90f51..3488650b90ac 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1906,7 +1906,7 @@ EXPORT_SYMBOL(udp_encap_enable); * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ -static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -2009,6 +2009,27 @@ drop: return -1; } +static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *next, *segs; + int ret; + + if (likely(!udp_unexpected_gso(sk, skb))) + return udp_queue_rcv_one_skb(sk, skb); + + BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET); + __skb_push(skb, -skb_mac_offset(skb)); + segs = udp_rcv_segment(sk, skb, true); + for (skb = segs; skb; skb = next) { + next = skb->next; + __skb_pull(skb, skb_transport_offset(skb)); + ret = udp_queue_rcv_one_skb(sk, skb); + if (ret > 0) + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret); + } + return 0; +} + /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4c79dc5329bc..c55698d19d68 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -554,7 +554,7 @@ void udpv6_encap_enable(void) } EXPORT_SYMBOL(udpv6_encap_enable); -static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -637,6 +637,28 @@ drop: return -1; } +static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *next, *segs; + int ret; + + if (likely(!udp_unexpected_gso(sk, skb))) + return udpv6_queue_rcv_one_skb(sk, skb); + + __skb_push(skb, -skb_mac_offset(skb)); + segs = udp_rcv_segment(sk, skb, false); + for (skb = segs; skb; skb = next) { + next = skb->next; + __skb_pull(skb, skb_transport_offset(skb)); + + ret = udpv6_queue_rcv_one_skb(sk, skb); + if (ret > 0) + ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, + true); + } + return 0; +} + static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, -- cgit v1.2.3 From 8572a1b4dbc0e03d7082d8e8f7a282c0f55c3ca5 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Tue, 6 Nov 2018 16:37:44 -0800 Subject: net: phy: bcm7xxx: Add entry for BCM7255 Add support for BCM7255 EPHY. Signed-off-by: Justin Chen Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 2 ++ include/linux/brcmphy.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index b2b6307d64a4..712224cc442d 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -650,6 +650,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) static struct phy_driver bcm7xxx_driver[] = { BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"), + BCM7XXX_28NM_EPHY(PHY_ID_BCM7255, "Broadcom BCM7255"), BCM7XXX_28NM_EPHY(PHY_ID_BCM7260, "Broadcom BCM7260"), BCM7XXX_28NM_EPHY(PHY_ID_BCM7268, "Broadcom BCM7268"), BCM7XXX_28NM_EPHY(PHY_ID_BCM7271, "Broadcom BCM7271"), @@ -670,6 +671,7 @@ static struct phy_driver bcm7xxx_driver[] = { static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { { PHY_ID_BCM7250, 0xfffffff0, }, + { PHY_ID_BCM7255, 0xfffffff0, }, { PHY_ID_BCM7260, 0xfffffff0, }, { PHY_ID_BCM7268, 0xfffffff0, }, { PHY_ID_BCM7271, 0xfffffff0, }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 949e9af8d9d6..9cd00a37b8d3 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -28,6 +28,7 @@ #define PHY_ID_BCM89610 0x03625cd0 #define PHY_ID_BCM7250 0xae025280 +#define PHY_ID_BCM7255 0xae025120 #define PHY_ID_BCM7260 0xae025190 #define PHY_ID_BCM7268 0xae025090 #define PHY_ID_BCM7271 0xae0253b0 -- cgit v1.2.3 From a3320bcf28e07163354b0acfad874bf46209df63 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 7 Nov 2018 08:15:58 +0100 Subject: net: phy: make phy_trigger_machine static phy_trigger_machine() is used in phy.c only, so we can make it static. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 33 ++++++++++++--------------------- include/linux/phy.h | 1 - 2 files changed, 12 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1d73ac3309ce..476578746d91 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -467,6 +467,18 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) } EXPORT_SYMBOL(phy_mii_ioctl); +static void phy_queue_state_machine(struct phy_device *phydev, + unsigned int secs) +{ + mod_delayed_work(system_power_efficient_wq, &phydev->state_queue, + secs * HZ); +} + +static void phy_trigger_machine(struct phy_device *phydev) +{ + phy_queue_state_machine(phydev, 0); +} + static int phy_config_aneg(struct phy_device *phydev) { if (phydev->drv->config_aneg) @@ -620,13 +632,6 @@ int phy_speed_up(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(phy_speed_up); -static void phy_queue_state_machine(struct phy_device *phydev, - unsigned int secs) -{ - mod_delayed_work(system_power_efficient_wq, &phydev->state_queue, - secs * HZ); -} - /** * phy_start_machine - start PHY state machine tracking * @phydev: the phy_device struct @@ -643,20 +648,6 @@ void phy_start_machine(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(phy_start_machine); -/** - * phy_trigger_machine - trigger the state machine to run - * - * @phydev: the phy_device struct - * - * Description: There has been a change in state which requires that the - * state machine runs. - */ - -void phy_trigger_machine(struct phy_device *phydev) -{ - phy_queue_state_machine(phydev, 0); -} - /** * phy_stop_machine - stop the PHY state machine tracking * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 3ea87f774a76..9e4d49ef4bca 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1054,7 +1054,6 @@ void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -void phy_trigger_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd); -- cgit v1.2.3 From c8accd5a0a6abfc0405a331afa5bfc06ee92623a Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Wed, 7 Nov 2018 18:07:02 +0100 Subject: net/vlan: introduce __vlan_hwaccel_clear_tag() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 83ea4df6ab81..c438fa0a1c6a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -461,6 +461,17 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, return skb; } +/** + * __vlan_hwaccel_clear_tag - clear hardware accelerated VLAN info + * @skb: skbuff to clear + * + * Clears the VLAN information from @skb + */ +static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) +{ + skb->vlan_tci = 0; +} + /* * __vlan_hwaccel_push_inside - pushes vlan tag to the payload * @skb: skbuff to tag @@ -475,7 +486,7 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); if (likely(skb)) - skb->vlan_tci = 0; + __vlan_hwaccel_clear_tag(skb); return skb; } -- cgit v1.2.3 From e0a6b8097351255a2dbbb45274a8b9c52850cbb6 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Wed, 7 Nov 2018 18:07:02 +0100 Subject: net/vlan: introduce __vlan_hwaccel_copy_tag() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c438fa0a1c6a..941da4bf3929 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -472,6 +472,19 @@ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) skb->vlan_tci = 0; } +/** + * __vlan_hwaccel_copy_tag - copy hardware accelerated VLAN info from another skb + * @dst: skbuff to copy to + * @src: skbuff to copy from + * + * Copies VLAN information from @src to @dst (for branchless code) + */ +static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src) +{ + dst->vlan_proto = src->vlan_proto; + dst->vlan_tci = src->vlan_tci; +} + /* * __vlan_hwaccel_push_inside - pushes vlan tag to the payload * @skb: skbuff to tag -- cgit v1.2.3 From 9b319148cb34ecccacff09eca87765c87d5e19ff Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Wed, 7 Nov 2018 18:07:03 +0100 Subject: net/vlan: include the shift in skb_vlan_tag_get_prio() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- net/core/flow_dissector.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 941da4bf3929..b14bf87999aa 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -81,7 +81,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) #define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) -#define skb_vlan_tag_get_prio(__skb) ((__skb)->vlan_tci & VLAN_PRIO_MASK) +#define skb_vlan_tag_get_prio(__skb) (((__skb)->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) static inline int vlan_get_rx_ctag_filter_info(struct net_device *dev) { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 676f3ad629f9..56d1e9b73142 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -952,8 +952,7 @@ proto_again: if (!vlan) { key_vlan->vlan_id = skb_vlan_tag_get_id(skb); - key_vlan->vlan_priority = - (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); + key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb); } else { key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & VLAN_VID_MASK; -- cgit v1.2.3 From 295d072a42fe1a654e765fffcaadb2f08a692dd0 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Wed, 7 Nov 2018 18:07:03 +0100 Subject: net/vlan: remove unused #define HAVE_VLAN_GET_TAG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b14bf87999aa..03b08ffded07 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -555,8 +555,6 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, } } -#define HAVE_VLAN_GET_TAG - /** * vlan_get_tag - get the VLAN ID from the skb * @skb: skbuff to query -- cgit v1.2.3 From 40c223efaa17e9bc3d964ee285967ebbe09c3e12 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 5 Oct 2018 18:36:33 +0300 Subject: regulator: core: Limit regulators coupling to a single couple Device tree binding was changed in a way that now max-spread values must be defied per regulator pair. Limit number of pairs in order to adapt to the new binding without changing regulators code. Signed-off-by: Dmitry Osipenko Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index a9c030192147..a05d37d0efa1 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -15,7 +15,7 @@ #ifndef __LINUX_REGULATOR_DRIVER_H_ #define __LINUX_REGULATOR_DRIVER_H_ -#define MAX_COUPLED 4 +#define MAX_COUPLED 2 #include #include -- cgit v1.2.3 From 85254bcf394f93a8955814da1eef4d477b63eb84 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 5 Oct 2018 18:36:35 +0300 Subject: regulator: core: Add new max_uV_step constraint On NVIDIA Tegra30 there is a requirement for regulator "A" to have voltage higher than voltage of regulator "B" by N microvolts, the N value changes depending on the voltage of regulator "B". This is similar to min-spread between voltages of regulators, the difference is that the spread value isn't fixed. This means that extra carefulness is required for regulator "A" to drop its voltage without violating the requirement, hence its voltage should be changed in steps so that its couple "B" could follow (there is also max-spread requirement). Add new "max_uV_step" constraint that breaks voltage change into several steps, each step is limited by the max_uV_step value. Signed-off-by: Dmitry Osipenko Signed-off-by: Mark Brown --- drivers/regulator/core.c | 41 +++++++++++++++++++++++++++++++++++++++ drivers/regulator/of_regulator.c | 4 ++++ include/linux/regulator/machine.h | 3 +++ 3 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 089e8ad8ef57..ba03bdf3716f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3191,6 +3191,36 @@ out: return ret; } +static int regulator_limit_voltage_step(struct regulator_dev *rdev, + int *current_uV, int *min_uV) +{ + struct regulation_constraints *constraints = rdev->constraints; + + /* Limit voltage change only if necessary */ + if (!constraints->max_uV_step || !_regulator_is_enabled(rdev)) + return 1; + + if (*current_uV < 0) { + *current_uV = _regulator_get_voltage(rdev); + + if (*current_uV < 0) + return *current_uV; + } + + if (abs(*current_uV - *min_uV) <= constraints->max_uV_step) + return 1; + + /* Clamp target voltage within the given step */ + if (*current_uV < *min_uV) + *min_uV = min(*current_uV + constraints->max_uV_step, + *min_uV); + else + *min_uV = max(*current_uV - constraints->max_uV_step, + *min_uV); + + return 0; +} + static int regulator_get_optimal_voltage(struct regulator_dev *rdev, int *current_uV, int *min_uV, int *max_uV, @@ -3302,6 +3332,17 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev, desired_min_uV = possible_uV; finish: + /* Apply max_uV_step constraint if necessary */ + if (state == PM_SUSPEND_ON) { + ret = regulator_limit_voltage_step(rdev, current_uV, + &desired_min_uV); + if (ret < 0) + return ret; + + if (ret == 0) + done = false; + } + /* Set current_uV if wasn't done earlier in the code and if necessary */ if (n_coupled > 1 && *current_uV == -1) { diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index c4223b3e0dff..a732f09d207b 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -170,6 +170,10 @@ static void of_get_regulation_constraints(struct device_node *np, &pval)) constraints->max_spread = pval; + if (!of_property_read_u32(np, "regulator-max-step-microvolt", + &pval)) + constraints->max_uV_step = pval; + constraints->over_current_protection = of_property_read_bool(np, "regulator-over-current-protection"); diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index a459a5e973a7..1d34a70ffda2 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -158,6 +158,9 @@ struct regulation_constraints { /* used for coupled regulators */ int max_spread; + /* used for changing voltage in steps */ + int max_uV_step; + /* valid regulator operating modes for this machine */ unsigned int valid_modes_mask; -- cgit v1.2.3 From 7baa85727d0406ffd2b2303cd803a145aa35c505 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Nov 2018 10:24:07 -0700 Subject: blk-mq-tag: change busy_iter_fn to return whether to continue or not We have this functionality in sbitmap, but we don't export it in blk-mq for users of the tags busy iteration. This can be useful for stopping the iteration, if the caller doesn't need to find more requests. Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 7 +++++-- block/blk-mq-tag.c | 4 ++-- block/blk-mq.c | 16 +++++++++++----- drivers/block/mtip32xx/mtip32xx.c | 9 ++++++--- drivers/block/nbd.c | 3 ++- drivers/block/skd_main.c | 8 +++++--- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/fc.c | 3 ++- drivers/nvme/host/nvme.h | 2 +- include/linux/blk-mq.h | 4 ++-- 10 files changed, 38 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index cde19be36135..f021f4817b80 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -422,15 +422,18 @@ struct show_busy_params { /* * Note: the state of a request may change while this function is in progress, - * e.g. due to a concurrent blk_mq_finish_request() call. + * e.g. due to a concurrent blk_mq_finish_request() call. Returns true to + * keep iterating requests. */ -static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved) +static bool hctx_show_busy_rq(struct request *rq, void *data, bool reserved) { const struct show_busy_params *params = data; if (rq->mq_hctx == params->hctx) __blk_mq_debugfs_rq_show(params->m, list_entry_rq(&rq->queuelist)); + + return true; } static int hctx_busy_show(void *data, struct seq_file *m) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index fb836d818b80..097e9a67d5f5 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -236,7 +236,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * test and set the bit before assigning ->rqs[]. */ if (rq && rq->q == hctx->queue) - iter_data->fn(hctx, rq, iter_data->data, reserved); + return iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } @@ -289,7 +289,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) */ rq = tags->rqs[bitnr]; if (rq && blk_mq_request_started(rq)) - iter_data->fn(rq, iter_data->data, reserved); + return iter_data->fn(rq, iter_data->data, reserved); return true; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 45c92b8d4795..4a622c832b31 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -94,7 +94,7 @@ struct mq_inflight { unsigned int *inflight; }; -static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, +static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { @@ -109,6 +109,8 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, mi->inflight[0]++; if (mi->part->partno) mi->inflight[1]++; + + return true; } void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, @@ -120,7 +122,7 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); } -static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx, +static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { @@ -128,6 +130,8 @@ static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx, if (rq->part == mi->part) mi->inflight[rq_data_dir(rq)]++; + + return true; } void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, @@ -821,7 +825,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) return false; } -static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, +static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { unsigned long *next = priv; @@ -831,7 +835,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, * so we're not unnecessarilly synchronizing across CPUs. */ if (!blk_mq_req_expired(rq, next)) - return; + return true; /* * We have reason to believe the request may be expired. Take a @@ -843,7 +847,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, * timeout handler to posting a natural completion. */ if (!refcount_inc_not_zero(&rq->ref)) - return; + return true; /* * The request is now locked and cannot be reallocated underneath the @@ -855,6 +859,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, blk_mq_rq_timed_out(rq, reserved); if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); + + return true; } static void blk_mq_timeout_work(struct work_struct *work) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a7daa8acbab3..947aa10107a6 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2720,7 +2720,7 @@ static void mtip_softirq_done_fn(struct request *rq) blk_mq_end_request(rq, cmd->status); } -static void mtip_abort_cmd(struct request *req, void *data, bool reserved) +static bool mtip_abort_cmd(struct request *req, void *data, bool reserved) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; @@ -2730,14 +2730,16 @@ static void mtip_abort_cmd(struct request *req, void *data, bool reserved) clear_bit(req->tag, dd->port->cmds_to_issue); cmd->status = BLK_STS_IOERR; mtip_softirq_done_fn(req); + return true; } -static void mtip_queue_cmd(struct request *req, void *data, bool reserved) +static bool mtip_queue_cmd(struct request *req, void *data, bool reserved) { struct driver_data *dd = data; set_bit(req->tag, dd->port->cmds_to_issue); blk_abort_request(req); + return true; } /* @@ -3920,12 +3922,13 @@ protocol_init_error: return rv; } -static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) +static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(rq); + return true; } /* diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4d4d6129ff66..08696f5f00bb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -734,12 +734,13 @@ static void recv_work(struct work_struct *work) kfree(args); } -static void nbd_clear_req(struct request *req, void *data, bool reserved) +static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_IOERR; blk_mq_complete_request(req); + return true; } static void nbd_clear_que(struct nbd_device *nbd) diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 2459dcc04b1c..a0196477165f 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -382,11 +382,12 @@ static void skd_log_skreq(struct skd_device *skdev, * READ/WRITE REQUESTS ***************************************************************************** */ -static void skd_inc_in_flight(struct request *rq, void *data, bool reserved) +static bool skd_inc_in_flight(struct request *rq, void *data, bool reserved) { int *count = data; count++; + return true; } static int skd_in_flight(struct skd_device *skdev) @@ -1887,13 +1888,13 @@ static void skd_isr_fwstate(struct skd_device *skdev) skd_skdev_state_to_str(skdev->state), skdev->state); } -static void skd_recover_request(struct request *req, void *data, bool reserved) +static bool skd_recover_request(struct request *req, void *data, bool reserved) { struct skd_device *const skdev = data; struct skd_request_context *skreq = blk_mq_rq_to_pdu(req); if (skreq->state != SKD_REQ_STATE_BUSY) - return; + return true; skd_log_skreq(skdev, skreq, "recover"); @@ -1904,6 +1905,7 @@ static void skd_recover_request(struct request *req, void *data, bool reserved) skreq->state = SKD_REQ_STATE_IDLE; skreq->status = BLK_STS_IOERR; blk_mq_complete_request(req); + return true; } static void skd_recover_requests(struct skd_device *skdev) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2e65be8b1387..f172d63db2b5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -268,14 +268,14 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); -void nvme_cancel_request(struct request *req, void *data, bool reserved) +bool nvme_cancel_request(struct request *req, void *data, bool reserved) { dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); nvme_req(req)->status = NVME_SC_ABORT_REQ; blk_mq_complete_request(req); - + return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0b70c8bab045..98c3c77f48f6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2386,7 +2386,7 @@ nvme_fc_complete_rq(struct request *rq) * status. The done path will return the io request back to the block * layer with an error status. */ -static void +static bool nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) { struct nvme_ctrl *nctrl = data; @@ -2394,6 +2394,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); __nvme_fc_abort_op(ctrl, op); + return true; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cee79cb388af..32a1f1cfdfb4 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -408,7 +408,7 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) } void nvme_complete_rq(struct request *req); -void nvme_cancel_request(struct request *req, void *data, bool reserved); +bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9f5e93f40857..ff497dfcbbf9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -129,9 +129,9 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); -typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, +typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); -typedef void (busy_tag_iter_fn)(struct request *, void *, bool); +typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); typedef bool (busy_fn)(struct request_queue *); -- cgit v1.2.3 From ae8799125d565c798e49dcab4bf182dbfc483524 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Nov 2018 09:03:51 -0700 Subject: blk-mq: provide a helper to check if a queue is busy Returns true if the queue currently has requests pending, false if not. DM can use this to replace the atomic_inc/dec they do per device to see if a device is busy. Reviewed-by: Mike Snitzer Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 26 ++++++++++++++++++++++++++ include/linux/blk-mq.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 4a622c832b31..4880e13e2394 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -790,6 +790,32 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) } EXPORT_SYMBOL(blk_mq_tag_to_rq); +static bool blk_mq_check_busy(struct blk_mq_hw_ctx *hctx, struct request *rq, + void *priv, bool reserved) +{ + /* + * If we find a request, we know the queue is busy. Return false + * to stop the iteration. + */ + if (rq->q == hctx->queue) { + bool *busy = priv; + + *busy = true; + return false; + } + + return true; +} + +bool blk_mq_queue_busy(struct request_queue *q) +{ + bool busy = false; + + blk_mq_queue_tag_busy_iter(q, blk_mq_check_busy, &busy); + return busy; +} +EXPORT_SYMBOL_GPL(blk_mq_queue_busy); + static void blk_mq_rq_timed_out(struct request *req, bool reserved) { req->rq_flags |= RQF_TIMED_OUT; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff497dfcbbf9..929e8abc5535 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -250,6 +250,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); +bool blk_mq_queue_busy(struct request_queue *q); + enum { /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), -- cgit v1.2.3 From 5cf8114d6e90b3822be5eb6a2faedf99d1c08f77 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 8 Nov 2018 10:08:46 -0500 Subject: cpuset: Expose cpuset.cpus.subpartitions with cgroup_debug For debugging purpose, it will be useful to expose the content of the subparts_cpus as a read-only file to see if the code work correctly. However, subparts_cpus will not be used at all in most use cases. So adding a new cpuset file that clutters the cgroup directory may not be desirable. This is now being done by using the hidden "cgroup_debug" kernel command line option to expose a new "cpuset.cpus.subpartitions" file. That option was originally used by the debug controller to expose itself when configured into the kernel. This is now extended to set an internal flag used by cgroup_addrm_files(). A new CFTYPE_DEBUG flag can now be used to specify that a cgroup file should only be created when the "cgroup_debug" option is specified. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra (Intel) Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + kernel/cgroup/cgroup-internal.h | 2 ++ kernel/cgroup/cgroup.c | 14 +++++++++++++- kernel/cgroup/cpuset.c | 11 +++++++++++ kernel/cgroup/debug.c | 4 +--- 5 files changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 5e1694fe035b..8fcbae1b8db0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -92,6 +92,7 @@ enum { CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ + CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 75568fcf2180..c950864016e2 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -11,6 +11,8 @@ #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; +extern bool cgroup_debug; +extern void __init enable_debug_cgroup(void); /* * cgroup_path() takes a spin lock. It is good practice not to take diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2e5d90dfcb49..ed7f0bfe6429 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -86,6 +86,7 @@ EXPORT_SYMBOL_GPL(css_set_lock); DEFINE_SPINLOCK(trace_cgroup_path_lock); char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; +bool cgroup_debug __read_mostly; /* * Protects cgroup_idr and css_idr so that IDs can be released without @@ -3639,7 +3640,8 @@ restart: continue; if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp)) continue; - + if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug) + continue; if (is_add) { ret = cgroup_add_file(css, cgrp, cft); if (ret) { @@ -5743,6 +5745,16 @@ static int __init cgroup_disable(char *str) } __setup("cgroup_disable=", cgroup_disable); +void __init __weak enable_debug_cgroup(void) { } + +static int __init enable_cgroup_debug(char *str) +{ + cgroup_debug = true; + enable_debug_cgroup(); + return 1; +} +__setup("cgroup_debug", enable_cgroup_debug); + /** * css_tryget_online_from_dir - get corresponding css from a cgroup dentry * @dentry: directory dentry of interest diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index c739fda805e0..b897314bab53 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2204,6 +2204,7 @@ typedef enum { FILE_MEMLIST, FILE_EFFECTIVE_CPULIST, FILE_EFFECTIVE_MEMLIST, + FILE_SUBPARTS_CPULIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_MEM_HARDWALL, @@ -2382,6 +2383,9 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) case FILE_EFFECTIVE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); break; + case FILE_SUBPARTS_CPULIST: + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus)); + break; default: ret = -EINVAL; } @@ -2634,6 +2638,13 @@ static struct cftype dfl_files[] = { .flags = CFTYPE_NOT_ON_ROOT, }, + { + .name = "cpus.subpartitions", + .seq_show = cpuset_common_seq_show, + .private = FILE_SUBPARTS_CPULIST, + .flags = CFTYPE_DEBUG, + }, + { } /* terminate */ }; diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index 9caeda610249..5f1b87330bee 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c @@ -373,11 +373,9 @@ struct cgroup_subsys debug_cgrp_subsys = { * On v2, debug is an implicit controller enabled by "cgroup_debug" boot * parameter. */ -static int __init enable_cgroup_debug(char *str) +void __init enable_debug_cgroup(void) { debug_cgrp_subsys.dfl_cftypes = debug_files; debug_cgrp_subsys.implicit_on_dfl = true; debug_cgrp_subsys.threaded = true; - return 1; } -__setup("cgroup_debug", enable_cgroup_debug); -- cgit v1.2.3 From 85a1f31d6392fb2c6726fcc4e072de008e3f0656 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 7 Nov 2018 20:46:51 +0100 Subject: net: phy: remove state PHY_AN After the recent changes in the state machine state PHY_AN isn't used any longer and can be removed. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 27 --------------------------- include/linux/phy.h | 19 +------------------ 2 files changed, 1 insertion(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 87ed000307b7..226824804208 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -50,7 +50,6 @@ static const char *phy_state_to_str(enum phy_state st) PHY_STATE_STR(READY) PHY_STATE_STR(PENDING) PHY_STATE_STR(UP) - PHY_STATE_STR(AN) PHY_STATE_STR(RUNNING) PHY_STATE_STR(NOLINK) PHY_STATE_STR(FORCING) @@ -944,32 +943,6 @@ void phy_state_machine(struct work_struct *work) case PHY_UP: needs_aneg = true; - phydev->link_timeout = PHY_AN_TIMEOUT; - - break; - case PHY_AN: - err = phy_read_status(phydev); - if (err < 0) - break; - - /* If the link is down, give up on negotiation for now */ - if (!phydev->link) { - phydev->state = PHY_NOLINK; - phy_link_down(phydev, true); - break; - } - - /* Check if negotiation is done. Break if there's an error */ - err = phy_aneg_done(phydev); - if (err < 0) - break; - - /* If AN is done, we're running */ - if (err > 0) { - phydev->state = PHY_RUNNING; - phy_link_up(phydev); - } else if (0 == phydev->link_timeout--) - needs_aneg = true; break; case PHY_NOLINK: if (!phy_polling_mode(phydev)) diff --git a/include/linux/phy.h b/include/linux/phy.h index 9e4d49ef4bca..2090277eac4f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -178,7 +178,6 @@ static inline const char *phy_modes(phy_interface_t interface) #define PHY_INIT_TIMEOUT 100000 #define PHY_STATE_TIME 1 #define PHY_FORCE_TIMEOUT 10 -#define PHY_AN_TIMEOUT 10 #define PHY_MAX_ADDR 32 @@ -297,24 +296,10 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * * UP: The PHY and attached device are ready to do work. * Interrupts should be started here. - * - timer moves to AN - * - * AN: The PHY is currently negotiating the link state. Link is - * therefore down for now. phy_timer will set this state when it - * detects the state is UP. config_aneg will set this state - * whenever called with phydev->autoneg set to AUTONEG_ENABLE. - * - If autonegotiation finishes, but there's no link, it sets - * the state to NOLINK. - * - If aneg finishes with link, it sets the state to RUNNING, - * and calls adjust_link - * - If autonegotiation did not finish after an arbitrary amount - * of time, autonegotiation should be tried again if the PHY - * supports "magic" autonegotiation (back to AN) - * - If it didn't finish, and no magic_aneg, move to FORCING. + * - timer moves to NOLINK or RUNNING * * NOLINK: PHY is up, but not currently plugged in. * - If the timer notes that the link comes back, we move to RUNNING - * - config_aneg moves to AN * - phy_stop moves to HALTED * * FORCING: PHY is being configured with forced settings @@ -329,7 +314,6 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * link state is polled every other cycle of this state machine, * which makes it every other second) * - irq will set CHANGELINK - * - config_aneg will set AN * - phy_stop moves to HALTED * * CHANGELINK: PHY experienced a change in link state @@ -353,7 +337,6 @@ enum phy_state { PHY_READY, PHY_PENDING, PHY_UP, - PHY_AN, PHY_RUNNING, PHY_NOLINK, PHY_FORCING, -- cgit v1.2.3 From a36e185e8c85523413c1ae3e03a0bdde5501f403 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:14 +0100 Subject: udp: Handle ICMP errors for tunnels with same destination port on both endpoints For both IPv4 and IPv6, if we can't match errors to a socket, try tunnels before ignoring them. Look up a socket with the original source and destination ports as found in the UDP packet inside the ICMP payload, this will work for tunnels that force the same destination port for both endpoints, i.e. VXLAN and GENEVE. Actually, lwtunnels could break this assumption if they are configured by an external control plane to have different destination ports on the endpoints: in this case, we won't be able to trace ICMP messages back to them. For IPv6 redirect messages, call ip6_redirect() directly with the output interface argument set to the interface we received the packet from (as it's the very interface we should build the exception on), otherwise the new nexthop will be rejected. There's no such need for IPv4. Tunnels can now export an encap_err_lookup() operation that indicates a match. Pass the packet to the lookup function, and if the tunnel driver reports a matching association, continue with regular ICMP error handling. v2: - Added newline between network and transport header sets in __udp{4,6}_lib_err_encap() (David Miller) - Removed redundant skb_reset_network_header(skb); in __udp4_lib_err_encap() - Removed redundant reassignment of iph in __udp4_lib_err_encap() (Sabrina Dubroca) - Edited comment to __udp{4,6}_lib_err_encap() to reflect the fact this won't work with lwtunnels configured to use asymmetric ports. By the way, it's VXLAN, not VxLAN (Jiri Benc) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/linux/udp.h | 1 + include/net/udp_tunnel.h | 3 ++ net/ipv4/udp.c | 79 +++++++++++++++++++++++++++++++++++++----- net/ipv4/udp_tunnel.c | 1 + net/ipv6/udp.c | 89 ++++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 153 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 0a9c54e76305..2725c83395bf 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -77,6 +77,7 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); /* GRO functions for UDP socket */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 3fbe56430e3b..dc8d804af3b4 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -64,6 +64,8 @@ static inline int udp_sock_create(struct net *net, } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); +typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, + struct sk_buff *skb); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, @@ -76,6 +78,7 @@ struct udp_tunnel_sock_cfg { /* Used for setting up udp_sock fields, see udp.h for details */ __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; + udp_tunnel_encap_err_lookup_t encap_err_lookup; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3488650b90ac..ce759b61f6cd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -583,6 +583,62 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, return true; } +DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +void udp_encap_enable(void) +{ + static_branch_enable(&udp_encap_needed_key); +} +EXPORT_SYMBOL(udp_encap_enable); + +/* Try to match ICMP errors to UDP tunnels by looking up a socket without + * reversing source and destination port: this will match tunnels that force the + * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that + * lwtunnels might actually break this assumption by being configured with + * different destination ports on endpoints, in this case we won't be able to + * trace ICMP messages back to them. + * + * Then ask the tunnel implementation to match the error against a valid + * association. + * + * Return the socket if we have a match. + */ +static struct sock *__udp4_lib_err_encap(struct net *net, + const struct iphdr *iph, + struct udphdr *uh, + struct udp_table *udptable, + struct sk_buff *skb) +{ + int (*lookup)(struct sock *sk, struct sk_buff *skb); + int network_offset, transport_offset; + struct udp_sock *up; + struct sock *sk; + + sk = __udp4_lib_lookup(net, iph->daddr, uh->source, + iph->saddr, uh->dest, skb->dev->ifindex, 0, + udptable, NULL); + if (!sk) + return NULL; + + network_offset = skb_network_offset(skb); + transport_offset = skb_transport_offset(skb); + + /* Network header needs to point to the outer IPv4 header inside ICMP */ + skb_reset_network_header(skb); + + /* Transport header needs to point to the UDP header */ + skb_set_transport_header(skb, iph->ihl << 2); + + up = udp_sk(sk); + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + + skb_set_transport_header(skb, transport_offset); + skb_set_network_header(skb, network_offset); + + return sk; +} + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -601,6 +657,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + bool tunnel = false; struct sock *sk; int harderr; int err; @@ -610,8 +667,15 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); if (!sk) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; /* No socket for error */ + /* No socket for error: try tunnels before discarding */ + if (static_branch_unlikely(&udp_encap_needed_key)) + sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb); + + if (!sk) { + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); + return; + } + tunnel = true; } err = 0; @@ -654,6 +718,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ + if (tunnel) { + /* ...not for tunnels though: we don't have a sending socket */ + goto out; + } if (!inet->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; @@ -1891,13 +1959,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); -void udp_encap_enable(void) -{ - static_branch_enable(&udp_encap_needed_key); -} -EXPORT_SYMBOL(udp_encap_enable); - /* returns: * -1: error * 0: success diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6539ff15e9a3..d0c412fc56ad 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -68,6 +68,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->encap_type = cfg->encap_type; udp_sk(sk)->encap_rcv = cfg->encap_rcv; + udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; udp_sk(sk)->encap_destroy = cfg->encap_destroy; udp_sk(sk)->gro_receive = cfg->gro_receive; udp_sk(sk)->gro_complete = cfg->gro_complete; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c55698d19d68..1216c920f945 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -462,6 +462,61 @@ csum_copy_err: goto try_again; } +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +void udpv6_encap_enable(void) +{ + static_branch_enable(&udpv6_encap_needed_key); +} +EXPORT_SYMBOL(udpv6_encap_enable); + +/* Try to match ICMP errors to UDP tunnels by looking up a socket without + * reversing source and destination port: this will match tunnels that force the + * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that + * lwtunnels might actually break this assumption by being configured with + * different destination ports on endpoints, in this case we won't be able to + * trace ICMP messages back to them. + * + * Then ask the tunnel implementation to match the error against a valid + * association. + * + * Return the socket if we have a match. + */ +static struct sock *__udp6_lib_err_encap(struct net *net, + const struct ipv6hdr *hdr, int offset, + struct udphdr *uh, + struct udp_table *udptable, + struct sk_buff *skb) +{ + int (*lookup)(struct sock *sk, struct sk_buff *skb); + int network_offset, transport_offset; + struct udp_sock *up; + struct sock *sk; + + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, + &hdr->saddr, uh->dest, + inet6_iif(skb), 0, udptable, skb); + if (!sk) + return NULL; + + network_offset = skb_network_offset(skb); + transport_offset = skb_transport_offset(skb); + + /* Network header needs to point to the outer IPv6 header inside ICMP */ + skb_reset_network_header(skb); + + /* Transport header needs to point to the UDP header */ + skb_set_transport_header(skb, offset); + + up = udp_sk(sk); + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + + skb_set_transport_header(skb, transport_offset); + skb_set_network_header(skb, network_offset); + return sk; +} + void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info, struct udp_table *udptable) @@ -471,6 +526,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); + bool tunnel = false; struct sock *sk; int harderr; int err; @@ -479,9 +535,18 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; + /* No socket for error: try tunnels before discarding */ + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + sk = __udp6_lib_err_encap(net, hdr, offset, uh, + udptable, skb); + } + + if (!sk) { + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); + return; + } + tunnel = true; } harderr = icmpv6_err_convert(type, code, &err); @@ -495,10 +560,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, harderr = 1; } if (type == NDISC_REDIRECT) { - ip6_sk_redirect(skb, sk); + if (tunnel) { + ip6_redirect(skb, sock_net(sk), inet6_iif(skb), + sk->sk_mark, sk->sk_uid); + } else { + ip6_sk_redirect(skb, sk); + } goto out; } + /* Tunnels don't have an application socket: don't pass errors back */ + if (tunnel) + goto out; + if (!np->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; @@ -547,13 +621,6 @@ static __inline__ void udpv6_err(struct sk_buff *skb, __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -void udpv6_encap_enable(void) -{ - static_branch_enable(&udpv6_encap_needed_key); -} -EXPORT_SYMBOL(udpv6_encap_enable); - static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); -- cgit v1.2.3 From c74d90c11c05bdfd78f8e29ee96b8a6f23daea99 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 7 Nov 2018 20:31:37 +0200 Subject: net/mlx5: Fix offsets of ifc reserved fields Fix wrong offsets of reserved fields in ifc file. Issues found using pahole. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 34e17e6f8942..6f64e814cc10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -349,7 +349,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_l3_tunnel_to_l2[0x1]; u8 reformat_l2_to_l3_tunnel[0x1]; u8 reformat_and_modify_action[0x1]; - u8 reserved_at_14[0xb]; + u8 reserved_at_15[0xb]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -586,7 +586,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 fdb_multi_path_to_table[0x1]; u8 reserved_at_1d[0x1]; u8 multi_fdb_encap[0x1]; - u8 reserved_at_1e[0x1e1]; + u8 reserved_at_1f[0x1e1]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; @@ -829,7 +829,7 @@ struct mlx5_ifc_vector_calc_cap_bits { struct mlx5_ifc_calc_op calc2; struct mlx5_ifc_calc_op calc3; - u8 reserved_at_e0[0x720]; + u8 reserved_at_c0[0x720]; }; enum { @@ -5567,7 +5567,7 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { struct mlx5_ifc_modify_nic_vport_field_select_bits { u8 reserved_at_0[0x12]; u8 affiliation[0x1]; - u8 reserved_at_e[0x1]; + u8 reserved_at_13[0x1]; u8 disable_uc_local_lb[0x1]; u8 disable_mc_local_lb[0x1]; u8 node_guid[0x1]; @@ -9028,7 +9028,7 @@ struct mlx5_ifc_dcbx_param_bits { u8 dcbx_cee_cap[0x1]; u8 dcbx_ieee_cap[0x1]; u8 dcbx_standby_cap[0x1]; - u8 reserved_at_0[0x5]; + u8 reserved_at_3[0x5]; u8 port_number[0x8]; u8 reserved_at_10[0xa]; u8 max_application_table_size[6]; -- cgit v1.2.3 From e7946760de5852f32c4e52ce47f37e85346981b9 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 8 Nov 2018 22:27:54 +0200 Subject: net: core: dev_addr_lists: add auxiliary func to handle reference address updates In order to avoid all table update, and only remove or add new address, the auxiliary function exists, named __hw_addr_sync_dev(). It allows end driver do nothing when nothing changed and add/rm when concrete address is firstly added or lastly removed. But it doesn't include cases when an address of real device or vlan was reused by other vlans or vlan/macval devices. For handaling events when address was reused/unreused the patch adds new auxiliary routine - __hw_addr_ref_sync_dev(). It allows to do nothing when nothing was changed and do updates only for an address being added/reused/deleted/unreused. Thus, clone address changes for vlans can be mirrored in the table. The function is exclusive with __hw_addr_sync_dev(). It's responsibility of the end driver to identify address vlan device, if it needs so. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++ net/core/dev_addr_lists.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 857f8abf7b91..487fa5e0e165 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4068,6 +4068,16 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, int (*sync)(struct net_device *, const unsigned char *), int (*unsync)(struct net_device *, const unsigned char *)); +int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *, int), + int (*unsync)(struct net_device *, + const unsigned char *, int)); +void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *, int)); void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index d884d8f5f0e5..81a8cd4ea3bd 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -277,6 +277,103 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, } EXPORT_SYMBOL(__hw_addr_sync_dev); +/** + * __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking + * into account references + * @list: address list to synchronize + * @dev: device to sync + * @sync: function to call if address or reference on it should be added + * @unsync: function to call if address or some reference on it should removed + * + * This function is intended to be called from the ndo_set_rx_mode + * function of devices that require explicit address or references on it + * add/remove notifications. The unsync function may be NULL in which case + * the addresses or references on it requiring removal will simply be + * removed without any notification to the device. That is responsibility of + * the driver to identify and distribute address or references on it between + * internal address tables. + **/ +int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *, int), + int (*unsync)(struct net_device *, + const unsigned char *, int)) +{ + struct netdev_hw_addr *ha, *tmp; + int err, ref_cnt; + + /* first go through and flush out any unsynced/stale entries */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + /* sync if address is not used */ + if ((ha->sync_cnt << 1) <= ha->refcount) + continue; + + /* if fails defer unsyncing address */ + ref_cnt = ha->refcount - ha->sync_cnt; + if (unsync && unsync(dev, ha->addr, ref_cnt)) + continue; + + ha->refcount = (ref_cnt << 1) + 1; + ha->sync_cnt = ref_cnt; + __hw_addr_del_entry(list, ha, false, false); + } + + /* go through and sync updated/new entries to the list */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + /* sync if address added or reused */ + if ((ha->sync_cnt << 1) >= ha->refcount) + continue; + + ref_cnt = ha->refcount - ha->sync_cnt; + err = sync(dev, ha->addr, ref_cnt); + if (err) + return err; + + ha->refcount = ref_cnt << 1; + ha->sync_cnt = ref_cnt; + } + + return 0; +} +EXPORT_SYMBOL(__hw_addr_ref_sync_dev); + +/** + * __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on + * it from device + * @list: address list to remove synchronized addresses (references on it) from + * @dev: device to sync + * @unsync: function to call if address and references on it should be removed + * + * Remove all addresses that were added to the device by + * __hw_addr_ref_sync_dev(). This function is intended to be called from the + * ndo_stop or ndo_open functions on devices that require explicit address (or + * references on it) add/remove notifications. If the unsync function pointer + * is NULL then this function can be used to just reset the sync_cnt for the + * addresses in the list. + **/ +void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *, int)) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt) + continue; + + /* if fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr, ha->sync_cnt)) + continue; + + ha->refcount -= ha->sync_cnt - 1; + ha->sync_cnt = 0; + __hw_addr_del_entry(list, ha, false, false); + } +} +EXPORT_SYMBOL(__hw_addr_ref_unsync_dev); + /** * __hw_addr_unsync_dev - Remove synchronized addresses from device * @list: address list to remove synchronized addresses from -- cgit v1.2.3 From 960abf68d2023f0d0b08c6f5d05971630496cfb0 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 8 Nov 2018 22:27:55 +0200 Subject: net: 8021q: vlan_core: allow use list of vlans for real device It's redundancy for the drivers to hold the list of vlans when absolutely the same list exists in vlan core. In most cases it's needed only to traverse the vlan devices, their vids and sync some settings with h/w, so add API to simplify this. At least some of these drivers also can benefit: grep "for_each.*vid" -r drivers/net/ethernet/ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c: drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c: drivers/net/ethernet/qlogic/qlge/qlge_main.c: drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c: drivers/net/ethernet/via/via-rhine.c: drivers/net/ethernet/via/via-velocity.c: drivers/net/ethernet/intel/igb/igb_main.c: drivers/net/ethernet/intel/ice/ice_main.c: drivers/net/ethernet/intel/e1000/e1000_main.c: drivers/net/ethernet/intel/i40e/i40e_main.c: drivers/net/ethernet/intel/e1000e/netdev.c: drivers/net/ethernet/intel/igbvf/netdev.c: drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c: drivers/net/ethernet/intel/ixgb/ixgb_main.c: drivers/net/ethernet/intel/ixgbe/ixgbe_main.c: drivers/net/ethernet/amd/xgbe/xgbe-dev.c: drivers/net/ethernet/emulex/benet/be_main.c: drivers/net/ethernet/neterion/vxge/vxge-main.c: drivers/net/ethernet/adaptec/starfire.c: drivers/net/ethernet/brocade/bna/bnad.c: Reviewed-by: Grygorii Strashko Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 11 +++++++++++ net/8021q/vlan_core.c | 27 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 03b08ffded07..1be5230921b5 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -133,6 +133,9 @@ struct vlan_pcpu_stats { extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id); +extern int vlan_for_each(struct net_device *dev, + int (*action)(struct net_device *dev, int vid, + void *arg), void *arg); extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern __be16 vlan_dev_vlan_proto(const struct net_device *dev); @@ -236,6 +239,14 @@ __vlan_find_dev_deep_rcu(struct net_device *real_dev, return NULL; } +static inline int +vlan_for_each(struct net_device *dev, + int (*action)(struct net_device *dev, int vid, void *arg), + void *arg) +{ + return 0; +} + static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { BUG(); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 4f60e86f4b8d..6308b5427a66 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -223,6 +223,33 @@ static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vi return -ENODEV; } +int vlan_for_each(struct net_device *dev, + int (*action)(struct net_device *dev, int vid, void *arg), + void *arg) +{ + struct vlan_vid_info *vid_info; + struct vlan_info *vlan_info; + struct net_device *vdev; + int ret; + + ASSERT_RTNL(); + + vlan_info = rtnl_dereference(dev->vlan_info); + if (!vlan_info) + return 0; + + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + vdev = vlan_group_get_device(&vlan_info->grp, vid_info->proto, + vid_info->vid); + ret = action(vdev, vid_info->vid, arg); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL(vlan_for_each); + int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto) { struct net_device *real_dev = vlan_info->real_dev; -- cgit v1.2.3 From 309ba859b95085f61f4f2a154df6be9cb9713a12 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 Jul 2018 14:36:49 -0700 Subject: rcu: Eliminate synchronize_rcu_mult() Now that synchronize_rcu() waits for both RCU read-side critical sections and preempt-disabled regions of code, the sole caller of synchronize_rcu_mult() can be replaced by synchronize_rcu(). This patch makes this change and removes synchronize_rcu_mult(). Note that _wait_rcu_gp() still supports synchronize_rcu_mult(), and thus might be simplified in the future to take only take a single call_rcu() function rather than the current list of them. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate_wait.h | 17 ----------------- kernel/rcu/update.c | 6 ++---- kernel/sched/core.c | 2 +- 3 files changed, 3 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 8a16c3eb3dd0..c0578ba23c1a 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -31,21 +31,4 @@ do { \ #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) -/** - * synchronize_rcu_mult - Wait concurrently for multiple grace periods - * @...: List of call_rcu() functions for different grace periods to wait on - * - * This macro waits concurrently for multiple types of RCU grace periods. - * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait - * on concurrent RCU and RCU-tasks grace periods. Waiting on a give SRCU - * domain requires you to write a wrapper function for that SRCU domain's - * call_srcu() function, supplying the corresponding srcu_struct. - * - * If Tiny RCU, tell _wait_rcu_gp() does not bother waiting for RCU, - * given that anywhere synchronize_rcu_mult() can be called is automatically - * a grace period. - */ -#define synchronize_rcu_mult(...) \ - _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) - #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f203b94f6b5b..c729ca5e6ee2 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -335,8 +335,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Initialize and register callbacks for each crcu_array element. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) { + (crcu_array[i] == call_rcu)) { might_sleep(); continue; } @@ -352,8 +351,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Wait for all callbacks to be invoked. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) + (crcu_array[i] == call_rcu)) continue; for (j = 0; j < i; j++) if (crcu_array[j] == crcu_array[i]) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f12225f26b70..ea12ebc57840 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5788,7 +5788,7 @@ int sched_cpu_deactivate(unsigned int cpu) * * Do sync before park smpboot threads to take care the rcu boost case. */ - synchronize_rcu_mult(call_rcu, call_rcu_sched); + synchronize_rcu(); if (!sched_smp_initialized) return 0; -- cgit v1.2.3 From f3e763c3e544b73ae5c4a3842cedb9ff6ca37715 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 3 Sep 2018 12:45:45 -0700 Subject: srcu: Fix kernel-doc missing notation Fix kernel-doc warnings for missing parameter descriptions: ../include/linux/srcu.h:175: warning: Function parameter or member 'p' not described in 'srcu_dereference_notrace' ../include/linux/srcu.h:175: warning: Function parameter or member 'sp' not described in 'srcu_dereference_notrace' Fixes: 0b764a6e4e19d ("srcu: Add notrace variant of srcu_dereference") Signed-off-by: Randy Dunlap Cc: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 67135d4a8a30..ebd5f1511690 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -171,6 +171,9 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) /** * srcu_dereference_notrace - no tracing and no lockdep calls from here + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. */ #define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1) -- cgit v1.2.3 From 144552c786925314c1e7cb8f91a71dae1aca8798 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 4 Oct 2018 20:24:17 -0700 Subject: of: overlay: add tests to validate kfrees from overlay removal Add checks: - attempted kfree due to refcount reaching zero before overlay is removed - properties linked to an overlay node when the node is removed - node refcount > one during node removal in a changeset destroy, if the node was created by the changeset After applying this patch, several validation warnings will be reported from the devicetree unittest during boot due to pre-existing devicetree bugs. The warnings will be similar to: OF: ERROR: of_node_release(), unexpected properties in /testcase-data/overlay-node/test-bus/test-unittest11 OF: ERROR: memory leak, expected refcount 1 instead of 2, of_node_get()/of_node_put() unbalanced - destroy cset entry: attach overlay node /testcase-data-2/substation@100/ hvac-medium-2 Tested-by: Alan Tull Signed-off-by: Frank Rowand --- drivers/of/dynamic.c | 29 +++++++++++++++++++++++++++++ drivers/of/overlay.c | 1 + include/linux/of.h | 15 ++++++++++----- 3 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index f4f8ed9b5454..12c3f9a15e94 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -330,6 +330,25 @@ void of_node_release(struct kobject *kobj) if (!of_node_check_flag(node, OF_DYNAMIC)) return; + if (of_node_check_flag(node, OF_OVERLAY)) { + + if (!of_node_check_flag(node, OF_OVERLAY_FREE_CSET)) { + /* premature refcount of zero, do not free memory */ + pr_err("ERROR: memory leak before free overlay changeset, %pOF\n", + node); + return; + } + + /* + * If node->properties non-empty then properties were added + * to this node either by different overlay that has not + * yet been removed, or by a non-overlay mechanism. + */ + if (node->properties) + pr_err("ERROR: %s(), unexpected properties in %pOF\n", + __func__, node); + } + property_list_free(node->properties); property_list_free(node->deadprops); @@ -434,6 +453,16 @@ struct device_node *__of_node_dup(const struct device_node *np, static void __of_changeset_entry_destroy(struct of_changeset_entry *ce) { + if (ce->action == OF_RECONFIG_ATTACH_NODE && + of_node_check_flag(ce->np, OF_OVERLAY)) { + if (kref_read(&ce->np->kobj.kref) > 1) { + pr_err("ERROR: memory leak, expected refcount 1 instead of %d, of_node_get()/of_node_put() unbalanced - destroy cset entry: attach overlay node %pOF\n", + kref_read(&ce->np->kobj.kref), ce->np); + } else { + of_node_set_flag(ce->np, OF_OVERLAY_FREE_CSET); + } + } + of_node_put(ce->np); list_del(&ce->node); kfree(ce); diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 42b1f73ac5f6..f5fc8859a7ee 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -373,6 +373,7 @@ static int add_changeset_node(struct overlay_changeset *ovcs, return -ENOMEM; tchild->parent = target_node; + of_node_set_flag(tchild, OF_OVERLAY); ret = of_changeset_attach_node(&ovcs->cset, tchild); if (ret) diff --git a/include/linux/of.h b/include/linux/of.h index a5aee3c438ad..664cd5573ae2 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -138,11 +138,16 @@ extern struct device_node *of_aliases; extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; -/* flag descriptions (need to be visible even when !CONFIG_OF) */ -#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ -#define OF_DETACHED 2 /* node has been detached from the device tree */ -#define OF_POPULATED 3 /* device already created for the node */ -#define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ +/* + * struct device_node flag descriptions + * (need to be visible even when !CONFIG_OF) + */ +#define OF_DYNAMIC 1 /* (and properties) allocated via kmalloc */ +#define OF_DETACHED 2 /* detached from the device tree */ +#define OF_POPULATED 3 /* device already created */ +#define OF_POPULATED_BUS 4 /* platform bus created for children */ +#define OF_OVERLAY 5 /* allocated for an overlay */ +#define OF_OVERLAY_FREE_CSET 6 /* in overlay cset being freed */ #define OF_BAD_ADDR ((u64)-1) -- cgit v1.2.3 From 6f75118800acf77f8ad6afec61ca1b2349ade371 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 4 Oct 2018 20:32:04 -0700 Subject: of: overlay: validate overlay properties #address-cells and #size-cells If overlay properties #address-cells or #size-cells are already in the live devicetree for any given node, then the values in the overlay must match the values in the live tree. If the properties are already in the live tree then there is no need to create a changeset entry to add them since they must have the same value. This reduces the memory used by the changeset and eliminates a possible memory leak. Tested-by: Alan Tull Signed-off-by: Frank Rowand --- drivers/of/overlay.c | 32 +++++++++++++++++++++++++++++--- include/linux/of.h | 6 ++++++ 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 15be3da34fef..72bf00adb9c8 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -287,7 +287,12 @@ err_free_target_path: * @target may be either in the live devicetree or in a new subtree that * is contained in the changeset. * - * Some special properties are not updated (no error returned). + * Some special properties are not added or updated (no error returned): + * "name", "phandle", "linux,phandle". + * + * Properties "#address-cells" and "#size-cells" are not updated if they + * are already in the live tree, but if present in the live tree, the values + * in the overlay must match the values in the live tree. * * Update of property in symbols node is not allowed. * @@ -300,6 +305,7 @@ static int add_changeset_property(struct overlay_changeset *ovcs, { struct property *new_prop = NULL, *prop; int ret = 0; + bool check_for_non_overlay_node = false; if (!of_prop_cmp(overlay_prop->name, "name") || !of_prop_cmp(overlay_prop->name, "phandle") || @@ -322,12 +328,32 @@ static int add_changeset_property(struct overlay_changeset *ovcs, if (!new_prop) return -ENOMEM; - if (!prop) + if (!prop) { + check_for_non_overlay_node = true; ret = of_changeset_add_property(&ovcs->cset, target->np, new_prop); - else + } else if (!of_prop_cmp(prop->name, "#address-cells")) { + if (!of_prop_val_eq(prop, new_prop)) { + pr_err("ERROR: changing value of #address-cells is not allowed in %pOF\n", + target->np); + ret = -EINVAL; + } + } else if (!of_prop_cmp(prop->name, "#size-cells")) { + if (!of_prop_val_eq(prop, new_prop)) { + pr_err("ERROR: changing value of #size-cells is not allowed in %pOF\n", + target->np); + ret = -EINVAL; + } + } else { + check_for_non_overlay_node = true; ret = of_changeset_update_property(&ovcs->cset, target->np, new_prop); + } + + if (check_for_non_overlay_node && + !of_node_check_flag(target->np, OF_OVERLAY)) + pr_err("WARNING: memory leak will occur if overlay removed, property: %pOF/%s\n", + target->np, new_prop->name); if (ret) { kfree(new_prop->name); diff --git a/include/linux/of.h b/include/linux/of.h index 664cd5573ae2..18ac8921e90c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -990,6 +990,12 @@ static inline int of_map_rid(struct device_node *np, u32 rid, #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif +static inline int of_prop_val_eq(struct property *p1, struct property *p2) +{ + return p1->length == p2->length && + !memcmp(p1->value, p2->value, (size_t)p1->length); +} + #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else -- cgit v1.2.3 From e647815a4d3b3be9d85b5750ed0f2947fd78fac7 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 8 Nov 2018 04:08:42 -0500 Subject: bpf: let verifier to calculate and record max_pkt_offset In check_packet_access, update max_pkt_offset after the offset has passed __check_packet_access. It should be safe to use u32 for max_pkt_offset as explained in code comment. Also, when there is tail call, the max_pkt_offset of the called program is unknown, so conservatively set max_pkt_offset to MAX_PACKET_OFF for such case. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + kernel/bpf/verifier.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 33014ae73103..b6a296e01f6a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -293,6 +293,7 @@ struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; + u32 max_pkt_offset; u32 stack_depth; u32 id; u32 func_cnt; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1971ca325fb4..75dab40b19a3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1455,6 +1455,17 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, verbose(env, "R%d offset is outside of the packet\n", regno); return err; } + + /* __check_packet_access has made sure "off + size - 1" is within u16. + * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff, + * otherwise find_good_pkt_pointers would have refused to set range info + * that __check_packet_access would have rejected this pkt access. + * Therefore, "off + reg->umax_value + size - 1" won't overflow u32. + */ + env->prog->aux->max_pkt_offset = + max_t(u32, env->prog->aux->max_pkt_offset, + off + reg->umax_value + size - 1); + return err; } @@ -6138,6 +6149,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) */ prog->cb_access = 1; env->prog->aux->stack_depth = MAX_BPF_STACK; + env->prog->aux->max_pkt_offset = MAX_PACKET_OFF; /* mark bpf_tail_call as different opcode to avoid * conditional branch in the interpeter for every normal -- cgit v1.2.3 From 801f87469ee8d97af5997ef52188bb0e1908b110 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Aug 2018 10:48:13 +0200 Subject: netlink: add nl_set_extack_cookie_u64() Add a helper function nl_set_extack_cookie_u64() to use a u64 as the netlink extended ACK cookie, to avoid having to open-code it in any users of the cookie. A u64 should be sufficient for most subsystems though we allow for up to 20 bytes right now. This also matches the cookies in nl80211 where I intend to use this. Signed-off-by: Johannes Berg Acked-by: David S. Miller Signed-off-by: Johannes Berg --- include/linux/netlink.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 4da90a6ab536..0b83dbae0a57 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -110,6 +110,15 @@ struct netlink_ext_ack { } \ } while (0) +static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, + u64 cookie) +{ + u64 __cookie = cookie; + + memcpy(extack->cookie, &__cookie, sizeof(__cookie)); + extack->cookie_len = sizeof(__cookie); +} + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From dbdaee7aa6e61f56aac61b71a7807e76f92cc895 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 25 Oct 2018 15:48:53 -0400 Subject: {nl,mac}80211: report gate connectivity in station info Capture the current state of gate connectivity from the mesh formation field in mesh config whenever we receive a beacon, and report that via GET_STATION. This allows applications doing mesh peering in userspace to make peering decisions based on peers' current upstream connectivity. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ include/net/cfg80211.h | 3 +++ include/uapi/linux/nl80211.h | 3 +++ net/mac80211/mesh_plink.c | 3 +++ net/mac80211/sta_info.c | 4 +++- net/mac80211/sta_info.h | 2 ++ net/wireless/nl80211.c | 1 + 7 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0ef67f837ae1..407d6fd66fa9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -812,6 +812,8 @@ enum mesh_config_capab_flags { IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL = 0x40, }; +#define IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE 0x1 + /** * mesh channel switch parameters element's flag indicator * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c21c5c70a2fd..24d2db8e082d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1296,6 +1296,7 @@ struct cfg80211_tid_stats { * @rx_beacon: number of beacons received from this peer * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received * from this peer + * @connected_to_gate: true if mesh STA has a path to mesh gate * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. @@ -1350,6 +1351,8 @@ struct station_info { u64 rx_beacon; u64 rx_duration; u8 rx_beacon_signal_avg; + u8 connected_to_gate; + struct cfg80211_tid_stats *pertid; s8 ack_signal; s8 avg_ack_signal; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e45b88925783..ff6005edf32f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3116,6 +3116,8 @@ enum nl80211_sta_bss_param { * with an FCS error (u32, from this station). This count may not include * some packets with an FCS error due to TA corruption. Hence this counter * might not be fully accurate. + * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a + * mesh gate * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3158,6 +3160,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_ACK_SIGNAL_AVG, NL80211_STA_INFO_RX_MPDUS, NL80211_STA_INFO_FCS_ERROR_COUNT, + NL80211_STA_INFO_CONNECTED_TO_GATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 5b5b0f95ffd1..5f45a2b273df 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -590,6 +590,9 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, if (!sta) goto out; + sta->mesh->connected_to_gate = elems->mesh_config->meshconf_form & + IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE; + if (mesh_peer_accepts_plinks(elems) && sta->mesh->plink_state == NL80211_PLINK_LISTEN && sdata->u.mesh.accepting_plinks && diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11b7ae691db0..c4a8f115ed33 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2264,7 +2264,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, BIT_ULL(NL80211_STA_INFO_PLINK_STATE) | BIT_ULL(NL80211_STA_INFO_LOCAL_PM) | BIT_ULL(NL80211_STA_INFO_PEER_PM) | - BIT_ULL(NL80211_STA_INFO_NONPEER_PM); + BIT_ULL(NL80211_STA_INFO_NONPEER_PM) | + BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE); sinfo->llid = sta->mesh->llid; sinfo->plid = sta->mesh->plid; @@ -2276,6 +2277,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->local_pm = sta->mesh->local_pm; sinfo->peer_pm = sta->mesh->peer_pm; sinfo->nonpeer_pm = sta->mesh->nonpeer_pm; + sinfo->connected_to_gate = sta->mesh->connected_to_gate; #endif } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9a04327d71d1..8eb29041be54 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -364,6 +364,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8) * @nonpeer_pm: STA power save mode towards non-peer neighbors * @processed_beacon: set to true after peer rates and capabilities are * processed + * @connected_to_gate: true if mesh STA has a path to a mesh gate * @fail_avg: moving percentage of failed MSDUs */ struct mesh_sta { @@ -381,6 +382,7 @@ struct mesh_sta { u8 plink_retries; bool processed_beacon; + bool connected_to_gate; enum nl80211_plink_state plink_state; u32 plink_timeout; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e7178954d61..f231059242cc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4883,6 +4883,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(LOCAL_PM, local_pm, u32); PUT_SINFO(PEER_PM, peer_pm, u32); PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); + PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8); if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); -- cgit v1.2.3 From 347a28b586802d09604a149c1a1f6de5dccbe6fa Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 30 Oct 2018 12:35:45 +0100 Subject: writeback: don't decrement wb->refcnt if !wb->bdi This happened while running in qemu-system-aarch64, the AMBA PL011 UART driver when enabling CONFIG_DEBUG_TEST_DRIVER_REMOVE. arch_initcall(pl011_init) came before subsys_initcall(default_bdi_init), devtmpfs' handle_remove() crashes because the reference count is a NULL pointer only because wb->bdi hasn't been initialized yet. Rework so that wb_put have an extra check if wb->bdi before decrement wb->refcnt and also add a WARN_ON_ONCE to get a warning if it happens again in other drivers. Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Greg Kroah-Hartman --- include/linux/backing-dev-defs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 9a6bc0951cfa..c31157135598 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -258,6 +258,14 @@ static inline void wb_get(struct bdi_writeback *wb) */ static inline void wb_put(struct bdi_writeback *wb) { + if (WARN_ON_ONCE(!wb->bdi)) { + /* + * A driver bug might cause a file to be removed before bdi was + * initialized. + */ + return; + } + if (wb != &wb->bdi->wb) percpu_ref_put(&wb->refcnt); } -- cgit v1.2.3 From d6e1935819db0c91ce4a5af82466f3ab50d17346 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 30 Oct 2018 15:11:04 -0700 Subject: serial: core: Allow processing sysrq at port unlock time Right now serial drivers process sysrq keys deep in their character receiving code. This means that they've already grabbed their port->lock spinlock. This can end up getting in the way if we've go to do serial stuff (especially kgdb) in response to the sysrq. Serial drivers have various hacks in them to handle this. Looking at '8250_port.c' you can see that the console_write() skips locking if we're in the sysrq handler. Looking at 'msm_serial.c' you can see that the port lock is dropped around uart_handle_sysrq_char(). It turns out that these hacks aren't exactly perfect. If you have lockdep turned on and use something like the 8250_port hack you'll get a splat that looks like: WARNING: possible circular locking dependency detected [...] is trying to acquire lock: ... (console_owner){-.-.}, at: console_unlock+0x2e0/0x5e4 but task is already holding lock: ... (&port_lock_key){-.-.}, at: serial8250_handle_irq+0x30/0xe4 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&port_lock_key){-.-.}: _raw_spin_lock_irqsave+0x58/0x70 serial8250_console_write+0xa8/0x250 univ8250_console_write+0x40/0x4c console_unlock+0x528/0x5e4 register_console+0x2c4/0x3b0 uart_add_one_port+0x350/0x478 serial8250_register_8250_port+0x350/0x3a8 dw8250_probe+0x67c/0x754 platform_drv_probe+0x58/0xa4 really_probe+0x150/0x294 driver_probe_device+0xac/0xe8 __driver_attach+0x98/0xd0 bus_for_each_dev+0x84/0xc8 driver_attach+0x2c/0x34 bus_add_driver+0xf0/0x1ec driver_register+0xb4/0x100 __platform_driver_register+0x60/0x6c dw8250_platform_driver_init+0x20/0x28 ... -> #0 (console_owner){-.-.}: lock_acquire+0x1e8/0x214 console_unlock+0x35c/0x5e4 vprintk_emit+0x230/0x274 vprintk_default+0x7c/0x84 vprintk_func+0x190/0x1bc printk+0x80/0xa0 __handle_sysrq+0x104/0x21c handle_sysrq+0x30/0x3c serial8250_read_char+0x15c/0x18c serial8250_rx_chars+0x34/0x74 serial8250_handle_irq+0x9c/0xe4 dw8250_handle_irq+0x98/0xcc serial8250_interrupt+0x50/0xe8 ... other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&port_lock_key); lock(console_owner); lock(&port_lock_key); lock(console_owner); *** DEADLOCK *** The hack used in 'msm_serial.c' doesn't cause the above splats but it seems a bit ugly to unlock / lock our spinlock deep in our irq handler. It seems like we could defer processing the sysrq until the end of the interrupt handler right after we've unlocked the port. With this scheme if a whole batch of sysrq characters comes in one irq then we won't handle them all, but that seems like it should be a fine compromise. Signed-off-by: Douglas Anderson Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 047fa67d039b..78de9d929762 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -175,6 +175,7 @@ struct uart_port { struct console *cons; /* struct console, if any */ #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(SUPPORT_SYSRQ) unsigned long sysrq; /* sysrq timeout */ + unsigned int sysrq_ch; /* char for sysrq */ #endif /* flags must be updated while holding port mutex */ @@ -485,8 +486,42 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) } return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (port->sysrq) { + if (ch && time_before(jiffies, port->sysrq)) { + port->sysrq_ch = ch; + port->sysrq = 0; + return 1; + } + port->sysrq = 0; + } + return 0; +} +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + int sysrq_ch; + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else -#define uart_handle_sysrq_char(port,ch) ({ (void)port; 0; }) +static inline int +uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + spin_unlock_irqrestore(&port->lock, irqflags); +} #endif /* -- cgit v1.2.3 From 3e6f88068314ffdba61a19f48ab0118f50424348 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 30 Oct 2018 15:11:06 -0700 Subject: serial: core: Include console.h from serial_core.h In the static inline function uart_handle_break() in serial_core.h we dereference port->cons. That gives an error unless console.h is also included. This error hasn't shown up till now because everyone who has defined SUPPORT_SYSRQ has also included console.h, but it's a bit ugly to make this requirement. Let's make the include explicit. Signed-off-by: Douglas Anderson Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 78de9d929762..5fe2b037e833 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 9d037ad707ed6069fbea4e38e6ee37e027b13f1d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 19:37:44 +0100 Subject: block: remove req->timeout_list Unused now that the legacy request path is gone. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-mq.c | 1 - block/blk-timeout.c | 12 ------------ block/blk.h | 2 -- include/linux/blkdev.h | 2 -- 5 files changed, 18 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 3daab9df24e0..fdc0ad2686c4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -144,7 +144,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) memset(rq, 0, sizeof(*rq)); INIT_LIST_HEAD(&rq->queuelist); - INIT_LIST_HEAD(&rq->timeout_list); rq->q = q; rq->__sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); diff --git a/block/blk-mq.c b/block/blk-mq.c index 4880e13e2394..411be60d0cb6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -327,7 +327,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->extra_len = 0; rq->__deadline = 0; - INIT_LIST_HEAD(&rq->timeout_list); rq->timeout = 0; rq->end_io = NULL; diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 6428d458072a..006cff4390c0 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -68,16 +68,6 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, #endif /* CONFIG_FAIL_IO_TIMEOUT */ -/* - * blk_delete_timer - Delete/cancel timer for a given function. - * @req: request that we are canceling timer for - * - */ -void blk_delete_timer(struct request *req) -{ - list_del_init(&req->timeout_list); -} - /** * blk_abort_request -- Request request recovery for the specified command * @req: pointer to the request of interest @@ -123,8 +113,6 @@ void blk_add_timer(struct request *req) struct request_queue *q = req->q; unsigned long expiry; - BUG_ON(!list_empty(&req->timeout_list)); - /* * Some LLDs, like scsi, peek at the timeout to prevent a * command from being retried forever. diff --git a/block/blk.h b/block/blk.h index 78ae94886acf..41b64e6e101b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -222,8 +222,6 @@ static inline bool bio_integrity_endio(struct bio *bio) unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); -void blk_delete_timer(struct request *); - bool bio_attempt_front_merge(struct request_queue *q, struct request *req, struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9b1f470cc784..dc2a6f625ecb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -228,8 +228,6 @@ struct request { /* access through blk_rq_set_deadline, blk_rq_deadline */ unsigned long __deadline; - struct list_head timeout_list; - union { struct __call_single_data csd; u64 fifo_time; -- cgit v1.2.3 From 4c96499c39e31b5a12f37c2396a5f81d1b6be1ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 19:39:03 +0100 Subject: USB: remove the unused struct hcd_timeout definition No users of this type anywhere in the tree. Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 97e2ddec18b1..7dc3a411bece 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -235,11 +235,6 @@ static inline struct usb_hcd *bus_to_hcd(struct usb_bus *bus) return container_of(bus, struct usb_hcd, self); } -struct hcd_timeout { /* timeouts we allocate */ - struct list_head timeout_list; - struct timer_list timer; -}; - /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From 1ae367a2451e0b249074461d2d8ac76d8e929a53 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 6 Nov 2018 18:07:37 -0600 Subject: of/pdt: Remove unused of_pdt_build_more function ptr There are no users of of_pdt_build_more since 2012, so remove it. Cc: Frank Rowand Signed-off-by: Rob Herring --- drivers/of/pdt.c | 5 ----- include/linux/of_pdt.h | 2 -- 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c index 013e65de074a..4fc0fd96ed04 100644 --- a/drivers/of/pdt.c +++ b/drivers/of/pdt.c @@ -21,8 +21,6 @@ static struct of_pdt_ops *of_pdt_prom_ops __initdata; -void __initdata (*of_pdt_build_more)(struct device_node *dp); - #if defined(CONFIG_SPARC) unsigned int of_pdt_unique_id __initdata; @@ -208,9 +206,6 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent, dp->child = of_pdt_build_tree(dp, of_pdt_prom_ops->getchild(node)); - if (of_pdt_build_more) - of_pdt_build_more(dp); - node = of_pdt_prom_ops->getsibling(node); } diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h index d0b183ab65c6..89e4eb076a01 100644 --- a/include/linux/of_pdt.h +++ b/include/linux/of_pdt.h @@ -35,6 +35,4 @@ extern void *prom_early_alloc(unsigned long size); /* for building the device tree */ extern void of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops); -extern void (*of_pdt_build_more)(struct device_node *dp); - #endif /* _LINUX_OF_PDT_H */ -- cgit v1.2.3 From 86131d933f9a9502d877fb37b90a856e6a8a7ed8 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 5 Nov 2018 15:39:07 +0800 Subject: power: supply: core: Add one field to present the battery internal resistance Add one field for 'struct power_supply_battery_info' to present the battery factory internal resistance. Signed-off-by: Baolin Wang Reviewed-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 3 +++ include/linux/power_supply.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index e85361878450..307e0995ca3c 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -579,6 +579,7 @@ int power_supply_get_battery_info(struct power_supply *psy, info->charge_term_current_ua = -EINVAL; info->constant_charge_current_max_ua = -EINVAL; info->constant_charge_voltage_max_uv = -EINVAL; + info->factory_internal_resistance_uohm = -EINVAL; if (!psy->of_node) { dev_warn(&psy->dev, "%s currently only supports devicetree\n", @@ -616,6 +617,8 @@ int power_supply_get_battery_info(struct power_supply *psy, &info->constant_charge_current_max_ua); of_property_read_u32(battery_np, "constant_charge_voltage_max_microvolt", &info->constant_charge_voltage_max_uv); + of_property_read_u32(battery_np, "factory-internal-resistance-micro-ohms", + &info->factory_internal_resistance_uohm); return 0; } diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f80769175c56..d089566828be 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -326,6 +326,7 @@ struct power_supply_battery_info { int charge_term_current_ua; /* microAmps */ int constant_charge_current_max_ua; /* microAmps */ int constant_charge_voltage_max_uv; /* microVolts */ + int factory_internal_resistance_uohm; /* microOhms */ }; extern struct atomic_notifier_head power_supply_notifier; -- cgit v1.2.3 From 3afb50d7125bcdbf71df843134e96ceffc78c8b8 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 5 Nov 2018 15:39:09 +0800 Subject: power: supply: core: Add some helpers to use the battery OCV capacity table We have introduced some battery properties to present the OCV table temperatures and OCV capacity table values. Thus this patch add OCV temperature and OCV table for battery information, as well as providing some helper functions to use the OCV capacity table for users. Signed-off-by: Baolin Wang Reviewed-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 137 ++++++++++++++++++++++++++++++- include/linux/power_supply.h | 19 +++++ 2 files changed, 155 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 307e0995ca3c..93007cb202f0 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -570,7 +570,7 @@ int power_supply_get_battery_info(struct power_supply *psy, { struct device_node *battery_np; const char *value; - int err; + int err, len, index; info->energy_full_design_uwh = -EINVAL; info->charge_full_design_uah = -EINVAL; @@ -581,6 +581,12 @@ int power_supply_get_battery_info(struct power_supply *psy, info->constant_charge_voltage_max_uv = -EINVAL; info->factory_internal_resistance_uohm = -EINVAL; + for (index = 0; index < POWER_SUPPLY_OCV_TEMP_MAX; index++) { + info->ocv_table[index] = NULL; + info->ocv_temp[index] = -EINVAL; + info->ocv_table_size[index] = -EINVAL; + } + if (!psy->of_node) { dev_warn(&psy->dev, "%s currently only supports devicetree\n", __func__); @@ -620,10 +626,139 @@ int power_supply_get_battery_info(struct power_supply *psy, of_property_read_u32(battery_np, "factory-internal-resistance-micro-ohms", &info->factory_internal_resistance_uohm); + len = of_property_count_u32_elems(battery_np, "ocv-capacity-celsius"); + if (len < 0 && len != -EINVAL) { + return len; + } else if (len > POWER_SUPPLY_OCV_TEMP_MAX) { + dev_err(&psy->dev, "Too many temperature values\n"); + return -EINVAL; + } else if (len > 0) { + of_property_read_u32_array(battery_np, "ocv-capacity-celsius", + info->ocv_temp, len); + } + + for (index = 0; index < len; index++) { + struct power_supply_battery_ocv_table *table; + char *propname; + const __be32 *list; + int i, tab_len, size; + + propname = kasprintf(GFP_KERNEL, "ocv-capacity-table-%d", index); + list = of_get_property(battery_np, propname, &size); + if (!list || !size) { + dev_err(&psy->dev, "failed to get %s\n", propname); + kfree(propname); + power_supply_put_battery_info(psy, info); + return -EINVAL; + } + + kfree(propname); + tab_len = size / (2 * sizeof(__be32)); + info->ocv_table_size[index] = tab_len; + + table = info->ocv_table[index] = + devm_kcalloc(&psy->dev, tab_len, sizeof(*table), GFP_KERNEL); + if (!info->ocv_table[index]) { + power_supply_put_battery_info(psy, info); + return -ENOMEM; + } + + for (i = 0; i < tab_len; i++) { + table[i].ocv = be32_to_cpu(*list++); + table[i].capacity = be32_to_cpu(*list++); + } + } + return 0; } EXPORT_SYMBOL_GPL(power_supply_get_battery_info); +void power_supply_put_battery_info(struct power_supply *psy, + struct power_supply_battery_info *info) +{ + int i; + + for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) { + if (info->ocv_table[i]) + devm_kfree(&psy->dev, info->ocv_table[i]); + } +} +EXPORT_SYMBOL_GPL(power_supply_put_battery_info); + +/** + * power_supply_ocv2cap_simple() - find the battery capacity + * @table: Pointer to battery OCV lookup table + * @table_len: OCV table length + * @ocv: Current OCV value + * + * This helper function is used to look up battery capacity according to + * current OCV value from one OCV table, and the OCV table must be ordered + * descending. + * + * Return: the battery capacity. + */ +int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, + int table_len, int ocv) +{ + int i, cap, tmp; + + for (i = 0; i < table_len; i++) + if (ocv > table[i].ocv) + break; + + if (i > 0 && i < table_len) { + tmp = (table[i - 1].capacity - table[i].capacity) * + (ocv - table[i].ocv); + tmp /= table[i - 1].ocv - table[i].ocv; + cap = tmp + table[i].capacity; + } else if (i == 0) { + cap = table[0].capacity; + } else { + cap = table[table_len - 1].capacity; + } + + return cap; +} +EXPORT_SYMBOL_GPL(power_supply_ocv2cap_simple); + +struct power_supply_battery_ocv_table * +power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, + int temp, int *table_len) +{ + int best_temp_diff = INT_MAX, temp_diff; + u8 i, best_index = 0; + + if (!info->ocv_table[0]) + return NULL; + + for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) { + temp_diff = abs(info->ocv_temp[i] - temp); + + if (temp_diff < best_temp_diff) { + best_temp_diff = temp_diff; + best_index = i; + } + } + + *table_len = info->ocv_table_size[best_index]; + return info->ocv_table[best_index]; +} +EXPORT_SYMBOL_GPL(power_supply_find_ocv2cap_table); + +int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, + int ocv, int temp) +{ + struct power_supply_battery_ocv_table *table; + int table_len; + + table = power_supply_find_ocv2cap_table(info, temp, &table_len); + if (!table) + return -EINVAL; + + return power_supply_ocv2cap_simple(table, table_len, ocv); +} +EXPORT_SYMBOL_GPL(power_supply_batinfo_ocv2cap); + int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index d089566828be..84fe93f674a0 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -309,6 +309,13 @@ struct power_supply_info { int use_for_apm; }; +struct power_supply_battery_ocv_table { + int ocv; /* microVolts */ + int capacity; /* percent */ +}; + +#define POWER_SUPPLY_OCV_TEMP_MAX 20 + /* * This is the recommended struct to manage static battery parameters, * populated by power_supply_get_battery_info(). Most platform drivers should @@ -327,6 +334,9 @@ struct power_supply_battery_info { int constant_charge_current_max_ua; /* microAmps */ int constant_charge_voltage_max_uv; /* microVolts */ int factory_internal_resistance_uohm; /* microOhms */ + int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX];/* celsius */ + struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; + int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; }; extern struct atomic_notifier_head power_supply_notifier; @@ -350,6 +360,15 @@ devm_power_supply_get_by_phandle(struct device *dev, const char *property) extern int power_supply_get_battery_info(struct power_supply *psy, struct power_supply_battery_info *info); +extern void power_supply_put_battery_info(struct power_supply *psy, + struct power_supply_battery_info *info); +extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, + int table_len, int ocv); +extern struct power_supply_battery_ocv_table * +power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, + int temp, int *table_len); +extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, + int ocv, int temp); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); extern int power_supply_set_input_current_limit_from_supplier( -- cgit v1.2.3 From 535ac5d3fe63b9ea1dda379f606f9d0d377d7184 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 14:42:35 +0100 Subject: ide: cleanup ->prep_rq calling convention The return value is just used as a binary yes/no decision, so switch it to a bool instead of the old BLKPREP_* values returned as an int. Also clean up a few related comments. Reviewed-by: Bart Van Assche Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/ide/ide-cd.c | 22 +++++++++++----------- drivers/ide/ide-disk.c | 8 ++++---- drivers/ide/ide-io.c | 4 ++-- include/linux/ide.h | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 4ecaf2ace4cb..69c1aede5f93 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -527,8 +527,8 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) return false; } -/* standard prep_rq_fn that builds 10 byte cmds */ -static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) +/* standard prep_rq that builds 10 byte cmds */ +static bool ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) { int hard_sect = queue_logical_block_size(q); long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); @@ -554,14 +554,14 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) req->cmd[7] = (blocks >> 8) & 0xff; req->cmd[8] = blocks & 0xff; req->cmd_len = 10; - return BLKPREP_OK; + return true; } /* * Most of the SCSI commands are supported directly by ATAPI devices. * This transform handles the few exceptions. */ -static int ide_cdrom_prep_pc(struct request *rq) +static bool ide_cdrom_prep_pc(struct request *rq) { u8 *c = scsi_req(rq)->cmd; @@ -575,7 +575,7 @@ static int ide_cdrom_prep_pc(struct request *rq) c[1] &= 0xe0; c[0] += (READ_10 - READ_6); scsi_req(rq)->cmd_len = 10; - return BLKPREP_OK; + return true; } /* @@ -585,13 +585,13 @@ static int ide_cdrom_prep_pc(struct request *rq) */ if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { scsi_req(rq)->result = ILLEGAL_REQUEST; - return BLKPREP_KILL; + return false; } - return BLKPREP_OK; + return true; } -static int ide_cdrom_prep_fn(ide_drive_t *drive, struct request *rq) +static bool ide_cdrom_prep_rq(ide_drive_t *drive, struct request *rq) { if (!blk_rq_is_passthrough(rq)) { scsi_req_init(scsi_req(rq)); @@ -600,7 +600,7 @@ static int ide_cdrom_prep_fn(ide_drive_t *drive, struct request *rq) } else if (blk_rq_is_scsi(rq)) return ide_cdrom_prep_pc(rq); - return 0; + return true; } static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) @@ -818,7 +818,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) * We may be retrying this request after an error. Fix up any * weirdness which might be present in the request packet. */ - ide_cdrom_prep_fn(drive, rq); + ide_cdrom_prep_rq(drive, rq); } /* fs requests *must* be hardware frame aligned */ @@ -1521,7 +1521,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) ide_debug_log(IDE_DBG_PROBE, "enter"); - drive->prep_rq = ide_cdrom_prep_fn; + drive->prep_rq = ide_cdrom_prep_rq; blk_queue_dma_alignment(q, 31); blk_queue_update_dma_pad(q, 15); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index f8567c8c9dd1..724db9af0d82 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -427,12 +427,12 @@ static void ide_disk_unlock_native_capacity(ide_drive_t *drive) drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */ } -static int idedisk_prep_fn(ide_drive_t *drive, struct request *rq) +static bool idedisk_prep_rq(ide_drive_t *drive, struct request *rq) { struct ide_cmd *cmd; if (req_op(rq) != REQ_OP_FLUSH) - return BLKPREP_OK; + return true; if (rq->special) { cmd = rq->special; @@ -458,7 +458,7 @@ static int idedisk_prep_fn(ide_drive_t *drive, struct request *rq) rq->special = cmd; cmd->rq = rq; - return BLKPREP_OK; + return true; } ide_devset_get(multcount, mult_count); @@ -547,7 +547,7 @@ static void update_flush(ide_drive_t *drive) if (barrier) { wc = true; - drive->prep_rq = idedisk_prep_fn; + drive->prep_rq = idedisk_prep_rq; } } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 5093c605c91c..64e72640acf8 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -326,7 +326,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) goto kill_rq; } - if (drive->prep_rq && drive->prep_rq(drive, rq)) + if (drive->prep_rq && !drive->prep_rq(drive, rq)) return ide_stopped; if (ata_pm_request(rq)) @@ -508,7 +508,7 @@ repeat: /* * we know that the queue isn't empty, but this can happen - * if the q->prep_rq_fn() decides to kill a request + * if ->prep_rq() decides to kill a request */ if (!rq) { rq = bd->rq; diff --git a/include/linux/ide.h b/include/linux/ide.h index 079f8bc0b0f4..272704ff21ee 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -529,7 +529,7 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ - int (*prep_rq)(struct ide_drive_s *, struct request *); + bool (*prep_rq)(struct ide_drive_s *, struct request *); struct blk_mq_tag_set tag_set; -- cgit v1.2.3 From 0e17e06cbf7ede285ab74bab44d888b40c21f828 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 14:42:41 +0100 Subject: block: remove the BLKPREP_* values. Unused now. Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dc2a6f625ecb..e67ad2dd025e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -776,16 +776,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) return q->nr_requests; } -/* - * q->prep_rq_fn return values - */ -enum { - BLKPREP_OK, /* serve it */ - BLKPREP_KILL, /* fatal error, kill, return -EIO */ - BLKPREP_DEFER, /* leave on queue */ - BLKPREP_INVALID, /* invalid command, kill, return -EREMOTEIO */ -}; - extern unsigned long blk_max_low_pfn, blk_max_pfn; /* -- cgit v1.2.3 From 361800876f80da3915c46e388fc682532228b2c3 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Nov 2018 11:14:44 +0100 Subject: ptp: add PTP_SYS_OFFSET_EXTENDED ioctl The PTP_SYS_OFFSET ioctl, which can be used to measure the offset between a PHC and the system clock, includes the total time that the driver needs to read the PHC timestamp. This typically involves reading of multiple PCI registers (sometimes in multiple iterations) and the register that contains the lowest bits of the timestamp is not read in the middle between the two readings of the system clock. This asymmetry causes the measured offset to have a significant error. Introduce a new ioctl, driver function, and helper functions, which allow the reading of the lowest register to be isolated from the other readings in order to reduce the asymmetry. The ioctl returns three timestamps for each measurement: - system time right before reading the lowest bits of the PHC timestamp - PHC time - system time immediately after reading the lowest bits of the PHC timestamp Cc: Richard Cochran Cc: Jacob Keller Cc: Marcelo Tosatti Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 33 +++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 31 +++++++++++++++++++++++++++++++ include/uapi/linux/ptp_clock.h | 12 ++++++++++++ 3 files changed, 76 insertions(+) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 3c681bed5703..aad0d36cf5c0 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -122,10 +122,12 @@ int ptp_open(struct posix_clock *pc, fmode_t fmode) long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_sys_offset_extended *extoff = NULL; struct ptp_sys_offset_precise precise_offset; struct system_device_crosststamp xtstamp; struct ptp_clock_info *ops = ptp->info; struct ptp_sys_offset *sysoff = NULL; + struct ptp_system_timestamp sts; struct ptp_clock_request req; struct ptp_clock_caps caps; struct ptp_clock_time *pct; @@ -211,6 +213,36 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EFAULT; break; + case PTP_SYS_OFFSET_EXTENDED: + if (!ptp->info->gettimex64) { + err = -EOPNOTSUPP; + break; + } + extoff = memdup_user((void __user *)arg, sizeof(*extoff)); + if (IS_ERR(extoff)) { + err = PTR_ERR(extoff); + extoff = NULL; + break; + } + if (extoff->n_samples > PTP_MAX_SAMPLES) { + err = -EINVAL; + break; + } + for (i = 0; i < extoff->n_samples; i++) { + err = ptp->info->gettimex64(ptp->info, &ts, &sts); + if (err) + goto out; + extoff->ts[i][0].sec = sts.pre_ts.tv_sec; + extoff->ts[i][0].nsec = sts.pre_ts.tv_nsec; + extoff->ts[i][1].sec = ts.tv_sec; + extoff->ts[i][1].nsec = ts.tv_nsec; + extoff->ts[i][2].sec = sts.post_ts.tv_sec; + extoff->ts[i][2].nsec = sts.post_ts.tv_nsec; + } + if (copy_to_user((void __user *)arg, extoff, sizeof(*extoff))) + err = -EFAULT; + break; + case PTP_SYS_OFFSET: sysoff = memdup_user((void __user *)arg, sizeof(*sysoff)); if (IS_ERR(sysoff)) { @@ -284,6 +316,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) } out: + kfree(extoff); kfree(sysoff); return err; } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 51349d124ee5..a1ec0448e341 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -39,6 +39,15 @@ struct ptp_clock_request { }; struct system_device_crosststamp; + +/** + * struct ptp_system_timestamp - system time corresponding to a PHC timestamp + */ +struct ptp_system_timestamp { + struct timespec64 pre_ts; + struct timespec64 post_ts; +}; + /** * struct ptp_clock_info - decribes a PTP hardware clock * @@ -75,6 +84,14 @@ struct system_device_crosststamp; * @gettime64: Reads the current time from the hardware clock. * parameter ts: Holds the result. * + * @gettimex64: Reads the current time from the hardware clock and optionally + * also the system clock. + * parameter ts: Holds the PHC timestamp. + * parameter sts: If not NULL, it holds a pair of timestamps from + * the system clock. The first reading is made right before + * reading the lowest bits of the PHC timestamp and the second + * reading immediately follows that. + * * @getcrosststamp: Reads the current time from the hardware clock and * system clock simultaneously. * parameter cts: Contains timestamp (device,system) pair, @@ -124,6 +141,8 @@ struct ptp_clock_info { int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*gettimex64)(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts); int (*getcrosststamp)(struct ptp_clock_info *ptp, struct system_device_crosststamp *cts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); @@ -247,4 +266,16 @@ static inline int ptp_schedule_worker(struct ptp_clock *ptp, #endif +static inline void ptp_read_system_prets(struct ptp_system_timestamp *sts) +{ + if (sts) + ktime_get_real_ts64(&sts->pre_ts); +} + +static inline void ptp_read_system_postts(struct ptp_system_timestamp *sts) +{ + if (sts) + ktime_get_real_ts64(&sts->post_ts); +} + #endif diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 3039bf6a742e..d73d83950265 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -84,6 +84,16 @@ struct ptp_sys_offset { struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; }; +struct ptp_sys_offset_extended { + unsigned int n_samples; /* Desired number of measurements. */ + unsigned int rsv[3]; /* Reserved for future use. */ + /* + * Array of [system, phc, system] time stamps. The kernel will provide + * 3*n_samples time stamps. + */ + struct ptp_clock_time ts[PTP_MAX_SAMPLES][3]; +}; + struct ptp_sys_offset_precise { struct ptp_clock_time device; struct ptp_clock_time sys_realtime; @@ -136,6 +146,8 @@ struct ptp_pin_desc { #define PTP_PIN_SETFUNC _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc) #define PTP_SYS_OFFSET_PRECISE \ _IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise) +#define PTP_SYS_OFFSET_EXTENDED \ + _IOW(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3 From 916444df305ef5b8a7d824aac7dd2aeba3a4db3b Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Nov 2018 11:14:45 +0100 Subject: ptp: deprecate gettime64() in favor of gettimex64() When a driver provides gettimex64(), use it in the PTP_SYS_OFFSET ioctl and POSIX clock's gettime() instead of gettime64(). Drivers should provide only one of the functions. Cc: Richard Cochran Cc: Jacob Keller Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 5 ++++- drivers/ptp/ptp_clock.c | 5 ++++- include/linux/ptp_clock_kernel.h | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index aad0d36cf5c0..797fab33bb98 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -260,7 +260,10 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; - err = ptp->info->gettime64(ptp->info, &ts); + if (ops->gettimex64) + err = ops->gettimex64(ops, &ts, NULL); + else + err = ops->gettime64(ops, &ts); if (err) goto out; pct->sec = ts.tv_sec; diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 5419a89d300e..40fda23e4b05 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -117,7 +117,10 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp) struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); int err; - err = ptp->info->gettime64(ptp->info, tp); + if (ptp->info->gettimex64) + err = ptp->info->gettimex64(ptp->info, tp, NULL); + else + err = ptp->info->gettime64(ptp->info, tp); return err; } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index a1ec0448e341..7121bbe76979 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -82,6 +82,8 @@ struct ptp_system_timestamp { * parameter delta: Desired change in nanoseconds. * * @gettime64: Reads the current time from the hardware clock. + * This method is deprecated. New drivers should implement + * the @gettimex64 method instead. * parameter ts: Holds the result. * * @gettimex64: Reads the current time from the hardware clock and optionally -- cgit v1.2.3 From 695bce8fd8e994999f40ee279e2fa9979cbae87a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 9 Nov 2018 18:35:52 +0100 Subject: net: phy: improve struct phy_device member interrupts handling As a heritage from the very early days of phylib member interrupts is defined as u32 even though it's just a flag whether interrupts are enabled. So we can change it to a bitfield member. In addition change the code dealing with this member in a way that it's clear we're dealing with a bool value. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 4 ++-- include/linux/phy.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index dd5bff955128..8dac890f32bf 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -115,9 +115,9 @@ static int phy_clear_interrupt(struct phy_device *phydev) * * Returns 0 on success or < 0 on error. */ -static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) +static int phy_config_interrupt(struct phy_device *phydev, bool interrupts) { - phydev->interrupts = interrupts; + phydev->interrupts = interrupts ? 1 : 0; if (phydev->drv->config_intr) return phydev->drv->config_intr(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 2090277eac4f..3299ec6e69f3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -263,8 +263,8 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); -#define PHY_INTERRUPT_DISABLED 0x0 -#define PHY_INTERRUPT_ENABLED 0x80000000 +#define PHY_INTERRUPT_DISABLED false +#define PHY_INTERRUPT_ENABLED true /* PHY state machine states: * @@ -410,6 +410,9 @@ struct phy_device { /* The most recently read link state */ unsigned link:1; + /* Interrupts are enabled */ + unsigned interrupts:1; + enum phy_state state; u32 dev_flags; @@ -425,9 +428,6 @@ struct phy_device { int pause; int asym_pause; - /* Enabled Interrupts */ - u32 interrupts; - /* Union of PHY and Attached devices' supported modes */ /* See mii.h for more info */ u32 supported; -- cgit v1.2.3 From 457937bd2e8e70d3a37eee3eaa45d86d169a6762 Mon Sep 17 00:00:00 2001 From: Kyle Roeschley Date: Fri, 9 Nov 2018 12:48:03 -0600 Subject: net: phy: leds: Don't make our own link speed names The phy core provides a handy phy_speed_to_str() helper, so use that instead of doing our own formatting of the different known link speeds. To do this, increase PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE to 11 so we can fit 'Unsupported' if necessary. Signed-off-by: Kyle Roeschley Signed-off-by: David S. Miller --- drivers/net/phy/phy_led_triggers.c | 15 ++------------- include/linux/phy_led_triggers.h | 2 +- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index 491efc1bf5c4..263385b75bba 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -67,7 +67,7 @@ void phy_led_trigger_change_speed(struct phy_device *phy) EXPORT_SYMBOL_GPL(phy_led_trigger_change_speed); static void phy_led_trigger_format_name(struct phy_device *phy, char *buf, - size_t size, char *suffix) + size_t size, const char *suffix) { snprintf(buf, size, PHY_ID_FMT ":%s", phy->mdio.bus->id, phy->mdio.addr, suffix); @@ -77,20 +77,9 @@ static int phy_led_trigger_register(struct phy_device *phy, struct phy_led_trigger *plt, unsigned int speed) { - char name_suffix[PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE]; - plt->speed = speed; - - if (speed < SPEED_1000) - snprintf(name_suffix, sizeof(name_suffix), "%dMbps", speed); - else if (speed == SPEED_2500) - snprintf(name_suffix, sizeof(name_suffix), "2.5Gbps"); - else - snprintf(name_suffix, sizeof(name_suffix), "%dGbps", - DIV_ROUND_CLOSEST(speed, 1000)); - phy_led_trigger_format_name(phy, plt->name, sizeof(plt->name), - name_suffix); + phy_speed_to_str(speed)); plt->trigger.name = plt->name; return led_trigger_register(&plt->trigger); diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h index b37b05bfd1a6..4587ce362535 100644 --- a/include/linux/phy_led_triggers.h +++ b/include/linux/phy_led_triggers.h @@ -20,7 +20,7 @@ struct phy_device; #include #include -#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 10 +#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 11 #define PHY_LINK_LED_TRIGGER_NAME_SIZE (MII_BUS_ID_SIZE + \ FIELD_SIZEOF(struct mdio_device, addr)+\ -- cgit v1.2.3 From 22ce0a7ccf23d55d1fdaa2974002f8b5ae765665 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 10 Nov 2018 09:30:49 +0100 Subject: ide: don't use req->special Just replace it with a field of the same name in struct ide_req. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/ide/ide-atapi.c | 4 ++-- drivers/ide/ide-cd.c | 4 ++-- drivers/ide/ide-devsets.c | 4 ++-- drivers/ide/ide-disk.c | 6 +++--- drivers/ide/ide-eh.c | 2 +- drivers/ide/ide-floppy.c | 2 +- drivers/ide/ide-io.c | 14 +++++++++----- drivers/ide/ide-park.c | 4 ++-- drivers/ide/ide-pm.c | 12 ++++++------ drivers/ide/ide-tape.c | 2 +- drivers/ide/ide-taskfile.c | 2 +- include/linux/ide.h | 1 + 12 files changed, 31 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 33210bc67618..da58020a144e 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -94,7 +94,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_MISC; - rq->special = (char *)pc; + ide_req(rq)->special = pc; if (buf && bufflen) { error = blk_rq_map_kern(drive->queue, rq, buf, bufflen, @@ -244,7 +244,7 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special) return -ENOMEM; } - sense_rq->special = special; + ide_req(sense_rq)->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 69c1aede5f93..1f03884a6808 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -211,12 +211,12 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For ATA_PRIV_SENSE, "rq->special" points to the original + * For ATA_PRIV_SENSE, "ide_req(rq)->special" points to the original * failed request. Also, the sense data should be read * directly from rq which might be different from the original * sense buffer if it got copied during mapping. */ - struct request *failed = (struct request *)rq->special; + struct request *failed = ide_req(rq)->special; void *sense = bio_data(rq->bio); if (failed) { diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index f4f8afdf8bbe..f2f93ed40356 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -171,7 +171,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, scsi_req(rq)->cmd_len = 5; scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC; *(int *)&scsi_req(rq)->cmd[1] = arg; - rq->special = setting->set; + ide_req(rq)->special = setting->set; blk_execute_rq(q, NULL, rq, 0); ret = scsi_req(rq)->result; @@ -182,7 +182,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq) { - int err, (*setfunc)(ide_drive_t *, int) = rq->special; + int err, (*setfunc)(ide_drive_t *, int) = ide_req(rq)->special; err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]); if (err) diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 724db9af0d82..197912af5c2f 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -434,8 +434,8 @@ static bool idedisk_prep_rq(ide_drive_t *drive, struct request *rq) if (req_op(rq) != REQ_OP_FLUSH) return true; - if (rq->special) { - cmd = rq->special; + if (ide_req(rq)->special) { + cmd = ide_req(rq)->special; memset(cmd, 0, sizeof(*cmd)); } else { cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); @@ -455,7 +455,7 @@ static bool idedisk_prep_rq(ide_drive_t *drive, struct request *rq) rq->cmd_flags &= ~REQ_OP_MASK; rq->cmd_flags |= REQ_OP_DRV_OUT; ide_req(rq)->type = ATA_PRIV_TASKFILE; - rq->special = cmd; + ide_req(rq)->special = cmd; cmd->rq = rq; return true; diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 47d5f3379748..e1323e058454 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -125,7 +125,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat) /* retry only "normal" I/O: */ if (blk_rq_is_passthrough(rq)) { if (ata_taskfile_request(rq)) { - struct ide_cmd *cmd = rq->special; + struct ide_cmd *cmd = ide_req(rq)->special; if (cmd) ide_complete_cmd(drive, cmd, stat, err); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index a8df300f949c..780d33ccc5d8 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -276,7 +276,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, switch (ide_req(rq)->type) { case ATA_PRIV_MISC: case ATA_PRIV_SENSE: - pc = (struct ide_atapi_pc *)rq->special; + pc = (struct ide_atapi_pc *)ide_req(rq)->special; break; default: BUG(); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 64e72640acf8..94e9c79c41cf 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -111,7 +111,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err) } if (rq && ata_taskfile_request(rq)) { - struct ide_cmd *orig_cmd = rq->special; + struct ide_cmd *orig_cmd = ide_req(rq)->special; if (cmd->tf_flags & IDE_TFLAG_DYN) kfree(orig_cmd); @@ -261,7 +261,7 @@ EXPORT_SYMBOL_GPL(ide_init_sg_cmd); static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) { - struct ide_cmd *cmd = rq->special; + struct ide_cmd *cmd = ide_req(rq)->special; if (cmd) { if (cmd->protocol == ATA_PROT_PIO) { @@ -352,7 +352,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (ata_taskfile_request(rq)) return execute_drive_cmd(drive, rq); else if (ata_pm_request(rq)) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); @@ -460,16 +460,20 @@ blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, ide_drive_t *drive = hctx->queue->queuedata; ide_hwif_t *hwif = drive->hwif; struct ide_host *host = hwif->host; - struct request *rq = NULL; + struct request *rq = bd->rq; ide_startstop_t startstop; + if (!(rq->rq_flags & RQF_DONTPREP)) { + rq->rq_flags |= RQF_DONTPREP; + ide_req(rq)->special = NULL; + } + /* HLD do_request() callback might sleep, make sure it's okay */ might_sleep(); if (ide_lock_host(host, hwif)) return BLK_STS_DEV_RESOURCE; - rq = bd->rq; blk_mq_start_request(rq); spin_lock_irq(&hwif->lock); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index de9e85cf74d1..102aa3bc3e7f 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -36,7 +36,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_PARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; - rq->special = &timeout; + ide_req(rq)->special = &timeout; blk_execute_rq(q, NULL, rq, 1); rc = scsi_req(rq)->result ? -EIO : 0; blk_put_request(rq); @@ -67,7 +67,7 @@ ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq) memset(&cmd, 0, sizeof(cmd)); if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) { - drive->sleep = *(unsigned long *)rq->special; + drive->sleep = *(unsigned long *)ide_req(rq)->special; drive->dev_flags |= IDE_DFLAG_SLEEPING; tf->command = ATA_CMD_IDLEIMMEDIATE; tf->feature = 0x44; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index ea10507e5190..a8c53c98252d 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -21,7 +21,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0); ide_req(rq)->type = ATA_PRIV_PM_SUSPEND; - rq->special = &rqpm; + ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -82,7 +82,7 @@ int generic_ide_resume(struct device *dev) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT); ide_req(rq)->type = ATA_PRIV_PM_RESUME; - rq->special = &rqpm; + ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; @@ -101,7 +101,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -131,7 +131,7 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; struct ide_cmd cmd = { }; switch (pm->pm_step) { @@ -203,7 +203,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; unsigned long flags; ide_complete_power_step(drive, rq); @@ -228,7 +228,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct ide_pm_state *pm = rq->special; + struct ide_pm_state *pm = ide_req(rq)->special; if (blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PM_SUSPEND && diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 34c1165226a4..db1a65f4b490 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -639,7 +639,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, goto out; } if (req->cmd[13] & REQ_IDETAPE_PC1) { - pc = (struct ide_atapi_pc *)rq->special; + pc = (struct ide_atapi_pc *)ide_req(rq)->special; req->cmd[13] &= ~(REQ_IDETAPE_PC1); req->cmd[13] |= REQ_IDETAPE_PC2; goto out; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index c21d5c50ae3a..17b2e379e872 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -440,7 +440,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, goto put_req; } - rq->special = cmd; + ide_req(rq)->special = cmd; cmd->rq = rq; blk_execute_rq(drive->queue, NULL, rq, 0); diff --git a/include/linux/ide.h b/include/linux/ide.h index 272704ff21ee..e7d29ae633cd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -50,6 +50,7 @@ struct ide_request { struct scsi_request sreq; u8 sense[SCSI_SENSE_BUFFERSIZE]; u8 type; + void *special; }; static inline struct ide_request *ide_req(struct request *rq) -- cgit v1.2.3 From 1385d755cfb42f596ef1cf9f5c761010ff3b34e7 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:25 +0000 Subject: bpf: pass a struct with offload callbacks to bpf_offload_dev_create() For passing device functions for offloaded eBPF programs, there used to be no place where to store the pointer without making the non-offloaded programs pay a memory price. As a consequence, three functions were called with ndo_bpf() through specific commands. Now that we have struct bpf_offload_dev, and since none of those operations rely on RTNL, we can turn these three commands into hooks inside the struct bpf_prog_offload_ops, and pass them as part of bpf_offload_dev_create(). This commit effectively passes a pointer to the struct to bpf_offload_dev_create(). We temporarily have two struct bpf_prog_offload_ops instances, one under offdev->ops and one under offload->dev_ops. The next patches will make the transition towards the former, so that offload->dev_ops can be removed, and callbacks relying on ndo_bpf() added to offdev->ops as well. While at it, rename "nfp_bpf_analyzer_ops" as "nfp_bpf_dev_ops" (and similarly for netdevsim). Suggested-by: Jakub Kicinski Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 2 +- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 +- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 4 ++-- drivers/net/netdevsim/bpf.c | 6 +++--- include/linux/bpf.h | 3 ++- kernel/bpf/offload.c | 5 ++++- 6 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 6243af0ab025..dccae0319204 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app) app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf); } - bpf->bpf_dev = bpf_offload_dev_create(); + bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops); err = PTR_ERR_OR_ZERO(bpf->bpf_dev); if (err) goto err_free_neutral_maps; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index abdd93d14439..941277936475 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -513,7 +513,7 @@ int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int nfp_bpf_finalize(struct bpf_verifier_env *env); -extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; +extern const struct bpf_prog_offload_ops nfp_bpf_dev_ops; struct netdev_bpf; struct nfp_app; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index dc548bb4089e..2fca996a7e77 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -209,7 +209,7 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, goto err_free; nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); - bpf->verifier.ops = &nfp_bpf_analyzer_ops; + bpf->verifier.ops = &nfp_bpf_dev_ops; return 0; @@ -602,7 +602,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, return 0; } -const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = { +const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .insn_hook = nfp_verify_insn, .finalize = nfp_bpf_finalize, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index cb3518474f0e..135aee864162 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -91,7 +91,7 @@ static int nsim_bpf_finalize(struct bpf_verifier_env *env) return 0; } -static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = { +static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .insn_hook = nsim_bpf_verify_insn, .finalize = nsim_bpf_finalize, }; @@ -547,7 +547,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) if (err) return err; - bpf->verifier.ops = &nsim_bpf_analyzer_ops; + bpf->verifier.ops = &nsim_bpf_dev_ops; return 0; case BPF_OFFLOAD_TRANSLATE: state = bpf->offload.prog->aux->offload->dev_priv; @@ -599,7 +599,7 @@ int nsim_bpf_init(struct netdevsim *ns) if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs)) return -ENOMEM; - ns->sdev->bpf_dev = bpf_offload_dev_create(); + ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops); err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev); if (err) return err; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b6a296e01f6a..c0197c37b2b2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -692,7 +692,8 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); -struct bpf_offload_dev *bpf_offload_dev_create(void); +struct bpf_offload_dev * +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops); void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev); int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 8e93c47f0779..d513fbf9ca53 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -33,6 +33,7 @@ static DECLARE_RWSEM(bpf_devs_lock); struct bpf_offload_dev { + const struct bpf_prog_offload_ops *ops; struct list_head netdevs; }; @@ -655,7 +656,8 @@ unlock: } EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); -struct bpf_offload_dev *bpf_offload_dev_create(void) +struct bpf_offload_dev * +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops) { struct bpf_offload_dev *offdev; int err; @@ -673,6 +675,7 @@ struct bpf_offload_dev *bpf_offload_dev_create(void) if (!offdev) return ERR_PTR(-ENOMEM); + offdev->ops = ops; INIT_LIST_HEAD(&offdev->netdevs); return offdev; -- cgit v1.2.3 From 341b3e7b7b89315c43d262da3199098bcf9bbe57 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:26 +0000 Subject: bpf: call verify_insn from its callback in struct bpf_offload_dev We intend to remove the dev_ops in struct bpf_prog_offload, and to only keep the ops in struct bpf_offload_dev instead, which is accessible from more locations for passing function pointers. But dev_ops is used for calling the verify_insn hook. Switch to the newly added ops in struct bpf_prog_offload instead. To avoid table lookups for each eBPF instruction to verify, we remember the offdev attached to a netdev and modify bpf_offload_find_netdev() to avoid performing more than once a lookup for a given offload object. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/offload.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c0197c37b2b2..672714cd904f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -273,6 +273,7 @@ struct bpf_prog_offload_ops { struct bpf_prog_offload { struct bpf_prog *prog; struct net_device *netdev; + struct bpf_offload_dev *offdev; void *dev_priv; struct list_head offloads; bool dev_state; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index d513fbf9ca53..2cd3c0d0417b 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -107,6 +107,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) err = -EINVAL; goto err_unlock; } + offload->offdev = ondev->offdev; prog->aux->offload = offload; list_add_tail(&offload->offloads, &ondev->progs); dev_put(offload->netdev); @@ -167,7 +168,8 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, down_read(&bpf_devs_lock); offload = env->prog->aux->offload; if (offload) - ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); + ret = offload->offdev->ops->insn_hook(env, insn_idx, + prev_insn_idx); up_read(&bpf_devs_lock); return ret; -- cgit v1.2.3 From 00db12c3d141356a4d1e6b6f688e0d5ed3b1f757 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:28 +0000 Subject: bpf: call verifier_prep from its callback in struct bpf_offload_dev In a way similar to the change previously brought to the verify_insn hook and to the finalize callback, switch to the newly added ops in struct bpf_prog_offload for calling the functions used to prepare driver verifiers. Since the dev_ops pointer in struct bpf_prog_offload is no longer used by any callback, we can now remove it from struct bpf_prog_offload. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 11 ++++---- drivers/net/netdevsim/bpf.c | 32 +++++++++++++----------- include/linux/bpf.h | 2 +- include/linux/netdevice.h | 6 ----- kernel/bpf/offload.c | 22 +++++++--------- 5 files changed, 32 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 2fca996a7e77..16a3a9c55852 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -188,10 +188,11 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) } static int -nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) +nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_verifier_env *env) { - struct bpf_prog *prog = bpf->verifier.prog; + struct nfp_net *nn = netdev_priv(netdev); + struct bpf_prog *prog = env->prog; + struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; @@ -209,7 +210,6 @@ nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, goto err_free; nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); - bpf->verifier.ops = &nfp_bpf_dev_ops; return 0; @@ -422,8 +422,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) { switch (bpf->command) { - case BPF_OFFLOAD_VERIFIER_PREP: - return nfp_bpf_verifier_prep(app, nn, bpf); case BPF_OFFLOAD_TRANSLATE: return nfp_bpf_translate(nn, bpf->offload.prog); case BPF_OFFLOAD_DESTROY: @@ -605,4 +603,5 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .insn_hook = nfp_verify_insn, .finalize = nfp_bpf_finalize, + .prepare = nfp_bpf_verifier_prep, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 135aee864162..d045b7d666d9 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -91,11 +91,6 @@ static int nsim_bpf_finalize(struct bpf_verifier_env *env) return 0; } -static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { - .insn_hook = nsim_bpf_verify_insn, - .finalize = nsim_bpf_finalize, -}; - static bool nsim_xdp_offload_active(struct netdevsim *ns) { return ns->xdp_hw.prog; @@ -263,6 +258,17 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) return 0; } +static int +nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (!ns->bpf_bind_accept) + return -EOPNOTSUPP; + + return nsim_bpf_create_prog(ns, env->prog); +} + static void nsim_bpf_destroy_prog(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; @@ -275,6 +281,12 @@ static void nsim_bpf_destroy_prog(struct bpf_prog *prog) kfree(state); } +static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { + .insn_hook = nsim_bpf_verify_insn, + .finalize = nsim_bpf_finalize, + .prepare = nsim_bpf_verifier_prep, +}; + static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) { if (bpf->prog && bpf->prog->aux->offload) { @@ -539,16 +551,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case BPF_OFFLOAD_VERIFIER_PREP: - if (!ns->bpf_bind_accept) - return -EOPNOTSUPP; - - err = nsim_bpf_create_prog(ns, bpf->verifier.prog); - if (err) - return err; - - bpf->verifier.ops = &nsim_bpf_dev_ops; - return 0; case BPF_OFFLOAD_TRANSLATE: state = bpf->offload.prog->aux->offload->dev_priv; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 672714cd904f..f250494a4f56 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,6 +268,7 @@ struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); + int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); }; struct bpf_prog_offload { @@ -277,7 +278,6 @@ struct bpf_prog_offload { void *dev_priv; struct list_head offloads; bool dev_state; - const struct bpf_prog_offload_ops *dev_ops; void *jited_image; u32 jited_len; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 857f8abf7b91..0fa2c2744928 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -863,7 +863,6 @@ enum bpf_netdev_command { XDP_QUERY_PROG, XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ - BPF_OFFLOAD_VERIFIER_PREP, BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY, BPF_OFFLOAD_MAP_ALLOC, @@ -891,11 +890,6 @@ struct netdev_bpf { /* flags with which program was installed */ u32 prog_flags; }; - /* BPF_OFFLOAD_VERIFIER_PREP */ - struct { - struct bpf_prog *prog; - const struct bpf_prog_offload_ops *ops; /* callee set */ - } verifier; /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ struct { struct bpf_prog *prog; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 2c88cb4ddfd8..1f7ac00a494d 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -142,21 +142,17 @@ static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) { - struct netdev_bpf data = {}; - int err; - - data.verifier.prog = env->prog; + struct bpf_prog_offload *offload; + int ret = -ENODEV; - rtnl_lock(); - err = __bpf_offload_ndo(env->prog, BPF_OFFLOAD_VERIFIER_PREP, &data); - if (err) - goto exit_unlock; + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) + ret = offload->offdev->ops->prepare(offload->netdev, env); + offload->dev_state = !ret; + up_read(&bpf_devs_lock); - env->prog->aux->offload->dev_ops = data.verifier.ops; - env->prog->aux->offload->dev_state = true; -exit_unlock: - rtnl_unlock(); - return err; + return ret; } int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, -- cgit v1.2.3 From b07ade27e93360197e453e5ca80eebdc9099dcb5 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:29 +0000 Subject: bpf: pass translate() as a callback and remove its ndo_bpf subcommand As part of the transition from ndo_bpf() to callbacks attached to struct bpf_offload_dev for some of the eBPF offload operations, move the functions related to code translation to the struct and remove the subcommand that was used to call them through the NDO. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 11 +++-------- drivers/net/netdevsim/bpf.c | 14 +++++++++----- include/linux/bpf.h | 1 + include/linux/netdevice.h | 3 +-- kernel/bpf/offload.c | 14 +++++++------- 5 files changed, 21 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 16a3a9c55852..8653a2189c19 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -33,9 +33,6 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, struct nfp_bpf_neutral_map *record; int err; - /* Map record paths are entered via ndo, update side is protected. */ - ASSERT_RTNL(); - /* Reuse path - other offloaded program is already tracking this map. */ record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id, nfp_bpf_maps_neutral_params); @@ -84,8 +81,6 @@ nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog) bool freed = false; int i; - ASSERT_RTNL(); - for (i = 0; i < nfp_prog->map_records_cnt; i++) { if (--nfp_prog->map_records[i]->count) { nfp_prog->map_records[i] = NULL; @@ -219,9 +214,10 @@ err_free: return ret; } -static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) +static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + struct nfp_net *nn = netdev_priv(netdev); unsigned int max_instr; int err; @@ -422,8 +418,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) { switch (bpf->command) { - case BPF_OFFLOAD_TRANSLATE: - return nfp_bpf_translate(nn, bpf->offload.prog); case BPF_OFFLOAD_DESTROY: return nfp_bpf_destroy(nn, bpf->offload.prog); case BPF_OFFLOAD_MAP_ALLOC: @@ -604,4 +598,5 @@ const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .insn_hook = nfp_verify_insn, .finalize = nfp_bpf_finalize, .prepare = nfp_bpf_verifier_prep, + .translate = nfp_bpf_translate, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index d045b7d666d9..30c2cd516d1c 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -269,6 +269,14 @@ nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env) return nsim_bpf_create_prog(ns, env->prog); } +static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog) +{ + struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv; + + state->state = "xlated"; + return 0; +} + static void nsim_bpf_destroy_prog(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; @@ -285,6 +293,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .insn_hook = nsim_bpf_verify_insn, .finalize = nsim_bpf_finalize, .prepare = nsim_bpf_verifier_prep, + .translate = nsim_bpf_translate, }; static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) @@ -551,11 +560,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case BPF_OFFLOAD_TRANSLATE: - state = bpf->offload.prog->aux->offload->dev_priv; - - state->state = "xlated"; - return 0; case BPF_OFFLOAD_DESTROY: nsim_bpf_destroy_prog(bpf->offload.prog); return 0; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f250494a4f56..d1eb3c8a3fa9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -269,6 +269,7 @@ struct bpf_prog_offload_ops { int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); + int (*translate)(struct net_device *netdev, struct bpf_prog *prog); }; struct bpf_prog_offload { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0fa2c2744928..27499127e038 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -863,7 +863,6 @@ enum bpf_netdev_command { XDP_QUERY_PROG, XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ - BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY, BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, @@ -890,7 +889,7 @@ struct netdev_bpf { /* flags with which program was installed */ u32 prog_flags; }; - /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ + /* BPF_OFFLOAD_DESTROY */ struct { struct bpf_prog *prog; } offload; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 1f7ac00a494d..ae0167366c12 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -219,14 +219,14 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog) static int bpf_prog_offload_translate(struct bpf_prog *prog) { - struct netdev_bpf data = {}; - int ret; - - data.offload.prog = prog; + struct bpf_prog_offload *offload; + int ret = -ENODEV; - rtnl_lock(); - ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data); - rtnl_unlock(); + down_read(&bpf_devs_lock); + offload = prog->aux->offload; + if (offload) + ret = offload->offdev->ops->translate(offload->netdev, prog); + up_read(&bpf_devs_lock); return ret; } -- cgit v1.2.3 From eb9119471efbf730c8f830f706026b486eb701dd Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:30 +0000 Subject: bpf: pass destroy() as a callback and remove its ndo_bpf subcommand As part of the transition from ndo_bpf() to callbacks attached to struct bpf_offload_dev for some of the eBPF offload operations, move the functions related to program destruction to the struct and remove the subcommand that was used to call them through the NDO. Remove function __bpf_offload_ndo(), which is no longer used. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 7 ++----- drivers/net/netdevsim/bpf.c | 4 +--- include/linux/bpf.h | 1 + include/linux/netdevice.h | 5 ----- kernel/bpf/offload.c | 24 +----------------------- 5 files changed, 5 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 8653a2189c19..91085cc3c843 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -238,15 +238,13 @@ static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog) return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog); } -static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) +static void nfp_bpf_destroy(struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; kvfree(nfp_prog->prog); nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog); nfp_prog_free(nfp_prog); - - return 0; } /* Atomic engine requires values to be in big endian, we need to byte swap @@ -418,8 +416,6 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap) int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) { switch (bpf->command) { - case BPF_OFFLOAD_DESTROY: - return nfp_bpf_destroy(nn, bpf->offload.prog); case BPF_OFFLOAD_MAP_ALLOC: return nfp_bpf_map_alloc(app->priv, bpf->offmap); case BPF_OFFLOAD_MAP_FREE: @@ -599,4 +595,5 @@ const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .finalize = nfp_bpf_finalize, .prepare = nfp_bpf_verifier_prep, .translate = nfp_bpf_translate, + .destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 30c2cd516d1c..33e3d54c3a0a 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -294,6 +294,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .finalize = nsim_bpf_finalize, .prepare = nsim_bpf_verifier_prep, .translate = nsim_bpf_translate, + .destroy = nsim_bpf_destroy_prog, }; static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) @@ -560,9 +561,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case BPF_OFFLOAD_DESTROY: - nsim_bpf_destroy_prog(bpf->offload.prog); - return 0; case XDP_QUERY_PROG: return xdp_attachment_query(&ns->xdp, bpf); case XDP_QUERY_PROG_HW: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d1eb3c8a3fa9..867d2801db64 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -270,6 +270,7 @@ struct bpf_prog_offload_ops { int (*finalize)(struct bpf_verifier_env *env); int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); int (*translate)(struct net_device *netdev, struct bpf_prog *prog); + void (*destroy)(struct bpf_prog *prog); }; struct bpf_prog_offload { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 27499127e038..17d52a647fe5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -863,7 +863,6 @@ enum bpf_netdev_command { XDP_QUERY_PROG, XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ - BPF_OFFLOAD_DESTROY, BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, XDP_QUERY_XSK_UMEM, @@ -889,10 +888,6 @@ struct netdev_bpf { /* flags with which program was installed */ u32 prog_flags; }; - /* BPF_OFFLOAD_DESTROY */ - struct { - struct bpf_prog *prog; - } offload; /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */ struct { struct bpf_offloaded_map *offmap; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index ae0167366c12..d665e75a0ac3 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -123,23 +123,6 @@ err_maybe_put: return err; } -static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, - struct netdev_bpf *data) -{ - struct bpf_prog_offload *offload = prog->aux->offload; - struct net_device *netdev; - - ASSERT_RTNL(); - - if (!offload) - return -ENODEV; - netdev = offload->netdev; - - data->command = cmd; - - return netdev->netdev_ops->ndo_bpf(netdev, data); -} - int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) { struct bpf_prog_offload *offload; @@ -192,12 +175,9 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env) static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; - struct netdev_bpf data = {}; - - data.offload.prog = prog; if (offload->dev_state) - WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); + offload->offdev->ops->destroy(prog); /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ bpf_prog_free_id(prog, true); @@ -209,12 +189,10 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) void bpf_prog_offload_destroy(struct bpf_prog *prog) { - rtnl_lock(); down_write(&bpf_devs_lock); if (prog->aux->offload) __bpf_prog_offload_destroy(prog); up_write(&bpf_devs_lock); - rtnl_unlock(); } static int bpf_prog_offload_translate(struct bpf_prog *prog) -- cgit v1.2.3 From a40a26322a83d4a26a99ad2616cbd77394c19587 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:31 +0000 Subject: bpf: pass prog instead of env to bpf_prog_offload_verifier_prep() Function bpf_prog_offload_verifier_prep(), called from the kernel BPF verifier to run a driver-specific callback for preparing for the verification step for offloaded programs, takes a pointer to a struct bpf_verifier_env object. However, no driver callback needs the whole structure at this time: the two drivers supporting this, nfp and netdevsim, only need a pointer to the struct bpf_prog instance held by env. Update the callback accordingly, on kernel side and in these two drivers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 3 +-- drivers/net/netdevsim/bpf.c | 4 ++-- include/linux/bpf.h | 2 +- include/linux/bpf_verifier.h | 2 +- kernel/bpf/offload.c | 6 +++--- kernel/bpf/verifier.c | 2 +- 6 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 91085cc3c843..e6b26d2f651d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -183,10 +183,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) } static int -nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_verifier_env *env) +nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_prog *prog) { struct nfp_net *nn = netdev_priv(netdev); - struct bpf_prog *prog = env->prog; struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 33e3d54c3a0a..560bdaf1c98b 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -259,14 +259,14 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) } static int -nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_verifier_env *env) +nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog) { struct netdevsim *ns = netdev_priv(dev); if (!ns->bpf_bind_accept) return -EOPNOTSUPP; - return nsim_bpf_create_prog(ns, env->prog); + return nsim_bpf_create_prog(ns, prog); } static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 867d2801db64..888111350d0e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,7 +268,7 @@ struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); - int (*prepare)(struct net_device *netdev, struct bpf_verifier_env *env); + int (*prepare)(struct net_device *netdev, struct bpf_prog *prog); int (*translate)(struct net_device *netdev, struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d93e89761a8b..11f5df1092d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -245,7 +245,7 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) return cur_func(env)->regs; } -int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); +int bpf_prog_offload_verifier_prep(struct bpf_prog *prog); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int bpf_prog_offload_finalize(struct bpf_verifier_env *env); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index d665e75a0ac3..397d206e184b 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -123,15 +123,15 @@ err_maybe_put: return err; } -int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +int bpf_prog_offload_verifier_prep(struct bpf_prog *prog) { struct bpf_prog_offload *offload; int ret = -ENODEV; down_read(&bpf_devs_lock); - offload = env->prog->aux->offload; + offload = prog->aux->offload; if (offload) - ret = offload->offdev->ops->prepare(offload->netdev, env); + ret = offload->offdev->ops->prepare(offload->netdev, prog); offload->dev_state = !ret; up_read(&bpf_devs_lock); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 75dab40b19a3..8d0977980cfa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6368,7 +6368,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) goto skip_full_check; if (bpf_prog_is_dev_bound(env->prog->aux)) { - ret = bpf_prog_offload_verifier_prep(env); + ret = bpf_prog_offload_verifier_prep(env->prog); if (ret) goto skip_full_check; } -- cgit v1.2.3 From 16a8cb5cffd0a2929ae97bc258d2d9c92a4e7f6d Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 9 Nov 2018 13:03:32 +0000 Subject: bpf: do not pass netdev to translate() and prepare() offload callbacks The kernel functions to prepare verifier and translate for offloaded program retrieve "offload" from "prog", and "netdev" from "offload". Then both "prog" and "netdev" are passed to the callbacks. Simplify this by letting the drivers retrieve the net device themselves from the offload object attached to prog - if they need it at all. There is currently no need to pass the netdev as an argument to those functions. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 9 ++++----- drivers/net/netdevsim/bpf.c | 7 +++---- include/linux/bpf.h | 4 ++-- kernel/bpf/offload.c | 4 ++-- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index e6b26d2f651d..f0283854fade 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -182,10 +182,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) kfree(nfp_prog); } -static int -nfp_bpf_verifier_prep(struct net_device *netdev, struct bpf_prog *prog) +static int nfp_bpf_verifier_prep(struct bpf_prog *prog) { - struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev); struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; @@ -213,10 +212,10 @@ err_free: return ret; } -static int nfp_bpf_translate(struct net_device *netdev, struct bpf_prog *prog) +static int nfp_bpf_translate(struct bpf_prog *prog) { + struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev); struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - struct nfp_net *nn = netdev_priv(netdev); unsigned int max_instr; int err; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 560bdaf1c98b..6a5b7bd9a1f9 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -258,10 +258,9 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) return 0; } -static int -nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog) +static int nsim_bpf_verifier_prep(struct bpf_prog *prog) { - struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev); if (!ns->bpf_bind_accept) return -EOPNOTSUPP; @@ -269,7 +268,7 @@ nsim_bpf_verifier_prep(struct net_device *dev, struct bpf_prog *prog) return nsim_bpf_create_prog(ns, prog); } -static int nsim_bpf_translate(struct net_device *dev, struct bpf_prog *prog) +static int nsim_bpf_translate(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 888111350d0e..987815152629 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,8 +268,8 @@ struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); - int (*prepare)(struct net_device *netdev, struct bpf_prog *prog); - int (*translate)(struct net_device *netdev, struct bpf_prog *prog); + int (*prepare)(struct bpf_prog *prog); + int (*translate)(struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); }; diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 397d206e184b..52c5617e3716 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -131,7 +131,7 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog) down_read(&bpf_devs_lock); offload = prog->aux->offload; if (offload) - ret = offload->offdev->ops->prepare(offload->netdev, prog); + ret = offload->offdev->ops->prepare(prog); offload->dev_state = !ret; up_read(&bpf_devs_lock); @@ -203,7 +203,7 @@ static int bpf_prog_offload_translate(struct bpf_prog *prog) down_read(&bpf_devs_lock); offload = prog->aux->offload; if (offload) - ret = offload->offdev->ops->translate(offload->netdev, prog); + ret = offload->offdev->ops->translate(prog); up_read(&bpf_devs_lock); return ret; -- cgit v1.2.3 From 46f53a65d2de3e1591636c22b626b09d8684fd71 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Sat, 10 Nov 2018 22:15:13 -0800 Subject: bpf: Allow narrow loads with offset > 0 Currently BPF verifier allows narrow loads for a context field only with offset zero. E.g. if there is a __u32 field then only the following loads are permitted: * off=0, size=1 (narrow); * off=0, size=2 (narrow); * off=0, size=4 (full). On the other hand LLVM can generate a load with offset different than zero that make sense from program logic point of view, but verifier doesn't accept it. E.g. tools/testing/selftests/bpf/sendmsg4_prog.c has code: #define DST_IP4 0xC0A801FEU /* 192.168.1.254 */ ... if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) && where ctx is struct bpf_sock_addr. Some versions of LLVM can produce the following byte code for it: 8: 71 12 07 00 00 00 00 00 r2 = *(u8 *)(r1 + 7) 9: 67 02 00 00 18 00 00 00 r2 <<= 24 10: 18 03 00 00 00 00 00 fe 00 00 00 00 00 00 00 00 r3 = 4261412864 ll 12: 5d 32 07 00 00 00 00 00 if r2 != r3 goto +7 where `*(u8 *)(r1 + 7)` means narrow load for ctx->user_ip4 with size=1 and offset=3 (7 - sizeof(ctx->user_family) = 3). This load is currently rejected by verifier. Verifier code that rejects such loads is in bpf_ctx_narrow_access_ok() what means any is_valid_access implementation, that uses the function, works this way, e.g. bpf_skb_is_valid_access() for __sk_buff or sock_addr_is_valid_access() for bpf_sock_addr. The patch makes such loads supported. Offset can be in [0; size_default) but has to be multiple of load size. E.g. for __u32 field the following loads are supported now: * off=0, size=1 (narrow); * off=1, size=1 (narrow); * off=2, size=1 (narrow); * off=3, size=1 (narrow); * off=0, size=2 (narrow); * off=2, size=2 (narrow); * off=0, size=4 (full). Reported-by: Yonghong Song Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 16 +--------------- kernel/bpf/verifier.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index de629b706d1d..cc17f5f32fbb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -668,24 +668,10 @@ static inline u32 bpf_ctx_off_adjust_machine(u32 size) return size; } -static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access, - u32 size_default) -{ - size_default = bpf_ctx_off_adjust_machine(size_default); - size_access = bpf_ctx_off_adjust_machine(size_access); - -#ifdef __LITTLE_ENDIAN - return (off & (size_default - 1)) == 0; -#else - return (off & (size_default - 1)) + size_access == size_default; -#endif -} - static inline bool bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) { - return bpf_ctx_narrow_align_ok(off, size, size_default) && - size <= size_default && (size & (size - 1)) == 0; + return size <= size_default && (size & (size - 1)) == 0; } #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8d0977980cfa..b5222aa61d54 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5718,10 +5718,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; + u32 target_size, size_default, off; struct bpf_prog *new_prog; enum bpf_access_type type; bool is_narrower_load; - u32 target_size; if (ops->gen_prologue || env->seen_direct_write) { if (!ops->gen_prologue) { @@ -5814,9 +5814,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) * we will apply proper mask to the result. */ is_narrower_load = size < ctx_field_size; + size_default = bpf_ctx_off_adjust_machine(ctx_field_size); + off = insn->off; if (is_narrower_load) { - u32 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); - u32 off = insn->off; u8 size_code; if (type == BPF_WRITE) { @@ -5844,12 +5844,23 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } if (is_narrower_load && size < target_size) { - if (ctx_field_size <= 4) + u8 shift = (off & (size_default - 1)) * 8; + + if (ctx_field_size <= 4) { + if (shift) + insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, + insn->dst_reg, + shift); insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, (1 << size * 8) - 1); - else + } else { + if (shift) + insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, + insn->dst_reg, + shift); insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, (1 << size * 8) - 1); + } } new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); -- cgit v1.2.3 From 9be92baa4772a315ff258f59d87a8427d5015a7c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 8 Nov 2018 06:32:44 +0000 Subject: dmaengine: sh: convert to SPDX identifiers This patch updates license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Kuninori Morimoto Signed-off-by: Vinod Koul --- drivers/dma/sh/Kconfig | 1 + include/linux/shdma-base.h | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig index 6e0685f1a838..1c4675425a1e 100644 --- a/drivers/dma/sh/Kconfig +++ b/drivers/dma/sh/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # DMA engine configuration for sh # diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index d927647e6350..6dfd05ef5c2d 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -1,4 +1,5 @@ -/* +/* SPDX-License-Identifier: GPL-2.0 + * * Dmaengine driver base library for DMA controllers, found on SH-based SoCs * * extracted from shdma.c and headers @@ -7,10 +8,6 @@ * Copyright (C) 2009 Nobuhiro Iwamatsu * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. - * - * This is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. */ #ifndef SHDMA_BASE_H -- cgit v1.2.3 From bc822e80170d672dd8ff0d07c521cf72f491cb6c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Nov 2018 13:45:10 +0000 Subject: dmaengine: sa11x0: unexport sa11x0_dma_filter_fn and clean up As we now have no users of sa11x0_dma_filter_fn() in the tree, we can unexport this function, and remove the now unused header file. Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/sa11x0-dma.c | 21 ++++++++------------- include/linux/sa11x0-dma.h | 24 ------------------------ 2 files changed, 8 insertions(+), 37 deletions(-) delete mode 100644 include/linux/sa11x0-dma.h (limited to 'include/linux') diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c index b31d07c7d93c..784d5f1a473b 100644 --- a/drivers/dma/sa11x0-dma.c +++ b/drivers/dma/sa11x0-dma.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -830,6 +829,14 @@ static const struct dma_slave_map sa11x0_dma_map[] = { { "sa11x0-ssp", "rx", "Ser4SSPRc" }, }; +static bool sa11x0_dma_filter_fn(struct dma_chan *chan, void *param) +{ + struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); + const char *p = param; + + return !strcmp(c->name, p); +} + static int sa11x0_dma_init_dmadev(struct dma_device *dmadev, struct device *dev) { @@ -1087,18 +1094,6 @@ static struct platform_driver sa11x0_dma_driver = { .remove = sa11x0_dma_remove, }; -bool sa11x0_dma_filter_fn(struct dma_chan *chan, void *param) -{ - if (chan->device->dev->driver == &sa11x0_dma_driver.driver) { - struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); - const char *p = param; - - return !strcmp(c->name, p); - } - return false; -} -EXPORT_SYMBOL(sa11x0_dma_filter_fn); - static int __init sa11x0_dma_init(void) { return platform_driver_register(&sa11x0_dma_driver); diff --git a/include/linux/sa11x0-dma.h b/include/linux/sa11x0-dma.h deleted file mode 100644 index 65839a58b8e5..000000000000 --- a/include/linux/sa11x0-dma.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * SA11x0 DMA Engine support - * - * Copyright (C) 2012 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __LINUX_SA11X0_DMA_H -#define __LINUX_SA11X0_DMA_H - -struct dma_chan; - -#if defined(CONFIG_DMA_SA11X0) || defined(CONFIG_DMA_SA11X0_MODULE) -bool sa11x0_dma_filter_fn(struct dma_chan *, void *); -#else -static inline bool sa11x0_dma_filter_fn(struct dma_chan *c, void *d) -{ - return false; -} -#endif - -#endif -- cgit v1.2.3 From a4307c0ec66131e722a8fa0f1da09646c46ee924 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 9 Nov 2018 18:17:22 +0100 Subject: net: phy: remove flag PHY_HAS_INTERRUPT from driver configs Now that flag PHY_HAS_INTERRUPT has been replaced with a check for callbacks config_intr and ack_interrupt, we can remove setting this flag from all driver configs. Last but not least remove flag PHY_HAS_INTERRUPT completely. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/amd.c | 1 - drivers/net/phy/aquantia.c | 6 ------ drivers/net/phy/at803x.c | 3 --- drivers/net/phy/bcm63xx.c | 4 ++-- drivers/net/phy/bcm87xx.c | 2 -- drivers/net/phy/broadcom.c | 16 ---------------- drivers/net/phy/cicada.c | 2 -- drivers/net/phy/davicom.c | 4 ---- drivers/net/phy/dp83640.c | 1 - drivers/net/phy/dp83822.c | 1 - drivers/net/phy/dp83848.c | 1 - drivers/net/phy/dp83867.c | 1 - drivers/net/phy/dp83tc811.c | 1 - drivers/net/phy/icplus.c | 1 - drivers/net/phy/intel-xway.c | 10 ---------- drivers/net/phy/lxt.c | 2 -- drivers/net/phy/marvell.c | 15 --------------- drivers/net/phy/meson-gxl.c | 2 +- drivers/net/phy/micrel.c | 14 -------------- drivers/net/phy/microchip.c | 1 - drivers/net/phy/microchip_t1.c | 1 - drivers/net/phy/mscc.c | 6 ------ drivers/net/phy/national.c | 1 - drivers/net/phy/qsemi.c | 1 - drivers/net/phy/realtek.c | 7 ------- drivers/net/phy/smsc.c | 7 +------ drivers/net/phy/ste10Xp.c | 2 -- drivers/net/phy/vitesse.c | 9 --------- include/linux/phy.h | 5 ++--- 29 files changed, 6 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/amd.c b/drivers/net/phy/amd.c index 6fe5dc9201d0..9d0504f3e3b2 100644 --- a/drivers/net/phy/amd.c +++ b/drivers/net/phy/amd.c @@ -66,7 +66,6 @@ static struct phy_driver am79c_driver[] = { { .name = "AM79C874", .phy_id_mask = 0xfffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = am79c_config_init, .ack_interrupt = am79c_ack_interrupt, .config_intr = am79c_config_intr, diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index 632472cab3bb..efc0fbde97a1 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -116,7 +116,6 @@ static struct phy_driver aquantia_driver[] = { .phy_id_mask = 0xfffffff0, .name = "Aquantia AQ1202", .features = PHY_10GBIT_FULL_FEATURES, - .flags = PHY_HAS_INTERRUPT, .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, @@ -128,7 +127,6 @@ static struct phy_driver aquantia_driver[] = { .phy_id_mask = 0xfffffff0, .name = "Aquantia AQ2104", .features = PHY_10GBIT_FULL_FEATURES, - .flags = PHY_HAS_INTERRUPT, .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, @@ -140,7 +138,6 @@ static struct phy_driver aquantia_driver[] = { .phy_id_mask = 0xfffffff0, .name = "Aquantia AQR105", .features = PHY_10GBIT_FULL_FEATURES, - .flags = PHY_HAS_INTERRUPT, .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, @@ -152,7 +149,6 @@ static struct phy_driver aquantia_driver[] = { .phy_id_mask = 0xfffffff0, .name = "Aquantia AQR106", .features = PHY_10GBIT_FULL_FEATURES, - .flags = PHY_HAS_INTERRUPT, .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, @@ -164,7 +160,6 @@ static struct phy_driver aquantia_driver[] = { .phy_id_mask = 0xfffffff0, .name = "Aquantia AQR107", .features = PHY_10GBIT_FULL_FEATURES, - .flags = PHY_HAS_INTERRUPT, .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, @@ -176,7 +171,6 @@ static struct phy_driver aquantia_driver[] = { .phy_id_mask = 0xfffffff0, .name = "Aquantia AQR405", .features = PHY_10GBIT_FULL_FEATURES, - .flags = PHY_HAS_INTERRUPT, .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index e74a047a846e..f9432d053a22 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -379,7 +379,6 @@ static struct phy_driver at803x_driver[] = { .suspend = at803x_suspend, .resume = at803x_resume, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { @@ -395,7 +394,6 @@ static struct phy_driver at803x_driver[] = { .suspend = at803x_suspend, .resume = at803x_resume, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { @@ -410,7 +408,6 @@ static struct phy_driver at803x_driver[] = { .suspend = at803x_suspend, .resume = at803x_resume, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .aneg_done = at803x_aneg_done, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index d95bffdec4c1..6a547b87ff04 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -69,7 +69,7 @@ static struct phy_driver bcm63xx_driver[] = { .phy_id_mask = 0xfffffc00, .name = "Broadcom BCM63XX (1)", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT | PHY_IS_INTERNAL, + .flags = PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm63xx_config_intr, @@ -78,7 +78,7 @@ static struct phy_driver bcm63xx_driver[] = { .phy_id = 0x002bdc00, .phy_id_mask = 0xfffffc00, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT | PHY_IS_INTERNAL, + .flags = PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm63xx_config_intr, diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index f7ebdcff53e4..64d5ba7bf94f 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -193,7 +193,6 @@ static struct phy_driver bcm87xx_driver[] = { .phy_id = PHY_ID_BCM8706, .phy_id_mask = 0xffffffff, .name = "Broadcom BCM8706", - .flags = PHY_HAS_INTERRUPT, .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, @@ -205,7 +204,6 @@ static struct phy_driver bcm87xx_driver[] = { .phy_id = PHY_ID_BCM8727, .phy_id_mask = 0xffffffff, .name = "Broadcom BCM8727", - .flags = PHY_HAS_INTERRUPT, .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index e86ea105c802..c73e265cd907 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -589,7 +589,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5411", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -598,7 +597,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5421", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -607,7 +605,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54210E", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -616,7 +613,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5461", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -625,7 +621,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54612E", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -634,7 +629,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54616S", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -643,7 +637,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5464", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -652,7 +645,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5481", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, .ack_interrupt = bcm_phy_ack_intr, @@ -662,7 +654,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54810", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, .ack_interrupt = bcm_phy_ack_intr, @@ -672,7 +663,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5482", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm5482_config_init, .read_status = bcm5482_read_status, .ack_interrupt = bcm_phy_ack_intr, @@ -682,7 +672,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM50610", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -691,7 +680,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM50610M", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -700,7 +688,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM57780", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, @@ -709,7 +696,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCMAC131", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = brcm_fet_config_init, .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, @@ -718,7 +704,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5241", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = brcm_fet_config_init, .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, @@ -737,7 +722,6 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM89610", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c index c05af00bf4b6..fea61c81bda9 100644 --- a/drivers/net/phy/cicada.c +++ b/drivers/net/phy/cicada.c @@ -108,7 +108,6 @@ static struct phy_driver cis820x_driver[] = { .name = "Cicada Cis8201", .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &cis820x_config_init, .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, @@ -117,7 +116,6 @@ static struct phy_driver cis820x_driver[] = { .name = "Cicada Cis8204", .phy_id_mask = 0x000fffc0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &cis820x_config_init, .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c index 5ee99b3b428c..97162008f42b 100644 --- a/drivers/net/phy/davicom.c +++ b/drivers/net/phy/davicom.c @@ -150,7 +150,6 @@ static struct phy_driver dm91xx_driver[] = { .name = "Davicom DM9161E", .phy_id_mask = 0x0ffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, .ack_interrupt = dm9161_ack_interrupt, @@ -160,7 +159,6 @@ static struct phy_driver dm91xx_driver[] = { .name = "Davicom DM9161B/C", .phy_id_mask = 0x0ffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, .ack_interrupt = dm9161_ack_interrupt, @@ -170,7 +168,6 @@ static struct phy_driver dm91xx_driver[] = { .name = "Davicom DM9161A", .phy_id_mask = 0x0ffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, .ack_interrupt = dm9161_ack_interrupt, @@ -180,7 +177,6 @@ static struct phy_driver dm91xx_driver[] = { .name = "Davicom DM9131", .phy_id_mask = 0x0ffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, } }; diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index edd4d44a386d..18b41bc345ab 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1521,7 +1521,6 @@ static struct phy_driver dp83640_driver = { .phy_id_mask = 0xfffffff0, .name = "NatSemi DP83640", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = dp83640_probe, .remove = dp83640_remove, .soft_reset = dp83640_soft_reset, diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 6e8a2a4f3a6e..24c7f149f3e6 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -318,7 +318,6 @@ static struct phy_driver dp83822_driver[] = { .phy_id_mask = 0xfffffff0, .name = "TI DP83822", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = dp83822_config_init, .soft_reset = dp83822_phy_reset, .get_wol = dp83822_get_wol, diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 6e8e42361fd5..a6b55909d1dc 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -108,7 +108,6 @@ MODULE_DEVICE_TABLE(mdio, dp83848_tbl); .phy_id_mask = 0xfffffff0, \ .name = _name, \ .features = PHY_BASIC_FEATURES, \ - .flags = PHY_HAS_INTERRUPT, \ \ .soft_reset = genphy_soft_reset, \ .config_init = _config_init, \ diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index b3935778b19f..da6a67d47ce9 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -334,7 +334,6 @@ static struct phy_driver dp83867_driver[] = { .phy_id_mask = 0xfffffff0, .name = "TI DP83867", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = dp83867_config_init, .soft_reset = dp83867_phy_reset, diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index 78cad134a79e..da13356999e5 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -346,7 +346,6 @@ static struct phy_driver dp83811_driver[] = { .phy_id_mask = 0xfffffff0, .name = "TI DP83TC811", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = dp83811_config_init, .config_aneg = dp83811_config_aneg, .soft_reset = dp83811_phy_reset, diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 791587a49215..21ce68964204 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -234,7 +234,6 @@ static struct phy_driver icplus_driver[] = { .name = "ICPlus IP101A/G", .phy_id_mask = 0x0ffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = ip101a_g_ack_interrupt, .config_init = &ip101a_g_config_init, .suspend = genphy_suspend, diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c index 7d936fb61c22..fc0f5024a29e 100644 --- a/drivers/net/phy/intel-xway.c +++ b/drivers/net/phy/intel-xway.c @@ -242,7 +242,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.3", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, .ack_interrupt = xway_gphy_ack_interrupt, @@ -255,7 +254,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY22F (PEF 7061) v1.3", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, .ack_interrupt = xway_gphy_ack_interrupt, @@ -268,7 +266,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.4", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, .ack_interrupt = xway_gphy_ack_interrupt, @@ -281,7 +278,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY22F (PEF 7061) v1.4", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .config_aneg = xway_gphy14_config_aneg, .ack_interrupt = xway_gphy_ack_interrupt, @@ -294,7 +290,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY11G (PEF 7071/PEF 7072) v1.5 / v1.6", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, @@ -306,7 +301,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY22F (PEF 7061) v1.5 / v1.6", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, @@ -318,7 +312,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY11G (xRX v1.1 integrated)", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, @@ -330,7 +323,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY22F (xRX v1.1 integrated)", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, @@ -342,7 +334,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY11G (xRX v1.2 integrated)", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, @@ -354,7 +345,6 @@ static struct phy_driver xway_gphy[] = { .phy_id_mask = 0xffffffff, .name = "Intel XWAY PHY22F (xRX v1.2 integrated)", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = xway_gphy_config_init, .ack_interrupt = xway_gphy_ack_interrupt, .did_interrupt = xway_gphy_did_interrupt, diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index c14b254b2879..c9e2c84c25c0 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -257,7 +257,6 @@ static struct phy_driver lxt97x_driver[] = { .name = "LXT970", .phy_id_mask = 0xfffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = lxt970_config_init, .ack_interrupt = lxt970_ack_interrupt, .config_intr = lxt970_config_intr, @@ -266,7 +265,6 @@ static struct phy_driver lxt97x_driver[] = { .name = "LXT971", .phy_id_mask = 0xfffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = lxt971_ack_interrupt, .config_intr = lxt971_config_intr, }, { diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index cbec296107bd..463c616a7281 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -2005,7 +2005,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1101", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1101_config_aneg, @@ -2024,7 +2023,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1112", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, @@ -2043,7 +2041,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1111", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, @@ -2063,7 +2060,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1118", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1118_config_init, .config_aneg = &m88e1118_config_aneg, @@ -2082,7 +2078,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1121R", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = &m88e1121_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1121_config_aneg, @@ -2103,7 +2098,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1318S", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1318_config_init, .config_aneg = &m88e1318_config_aneg, @@ -2126,7 +2120,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1145", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1145_config_init, .config_aneg = &m88e1101_config_aneg, @@ -2146,7 +2139,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1149R", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1149_config_init, .config_aneg = &m88e1118_config_aneg, @@ -2165,7 +2157,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1240", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, @@ -2184,7 +2175,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1116R", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1116r_config_init, .ack_interrupt = &marvell_ack_interrupt, @@ -2202,7 +2192,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1510", .features = PHY_GBIT_FIBRE_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = &m88e1510_probe, .config_init = &m88e1510_config_init, .config_aneg = &m88e1510_config_aneg, @@ -2226,7 +2215,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1540", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = m88e1510_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1510_config_aneg, @@ -2248,7 +2236,6 @@ static struct phy_driver marvell_drivers[] = { .name = "Marvell 88E1545", .probe = m88e1510_probe, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &marvell_config_init, .config_aneg = &m88e1510_config_aneg, .read_status = &marvell_read_status, @@ -2268,7 +2255,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E3016", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e3016_config_init, .aneg_done = &marvell_aneg_done, @@ -2289,7 +2275,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E6390", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = m88e6390_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1510_config_aneg, diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index ddc2c5ea3787..b03bcf2c388a 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -232,7 +232,7 @@ static struct phy_driver meson_gxl_phy[] = { .phy_id_mask = 0xfffffff0, .name = "Meson GXL Internal PHY", .features = PHY_BASIC_FEATURES, - .flags = PHY_IS_INTERNAL | PHY_HAS_INTERRUPT, + .flags = PHY_IS_INTERNAL, .config_init = meson_gxl_config_init, .aneg_done = genphy_aneg_done, .read_status = meson_gxl_read_status, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9265dea79412..cb5783905a25 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -918,7 +918,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KS8737", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ks8737_type, .config_init = kszphy_config_init, .ack_interrupt = kszphy_ack_interrupt, @@ -930,7 +929,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = 0x00ffffff, .name = "Micrel KSZ8021 or KSZ8031", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz8021_type, .probe = kszphy_probe, .config_init = kszphy_config_init, @@ -946,7 +944,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = 0x00ffffff, .name = "Micrel KSZ8031", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz8021_type, .probe = kszphy_probe, .config_init = kszphy_config_init, @@ -962,7 +959,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8041", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz8041_type, .probe = kszphy_probe, .config_init = ksz8041_config_init, @@ -979,7 +975,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8041RNLI", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz8041_type, .probe = kszphy_probe, .config_init = kszphy_config_init, @@ -995,7 +990,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8051", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz8051_type, .probe = kszphy_probe, .config_init = kszphy_config_init, @@ -1011,7 +1005,6 @@ static struct phy_driver ksphy_driver[] = { .name = "Micrel KSZ8001 or KS8721", .phy_id_mask = 0x00fffffc, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz8041_type, .probe = kszphy_probe, .config_init = kszphy_config_init, @@ -1027,7 +1020,6 @@ static struct phy_driver ksphy_driver[] = { .name = "Micrel KSZ8081 or KSZ8091", .phy_id_mask = MICREL_PHY_ID_MASK, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz8081_type, .probe = kszphy_probe, .config_init = kszphy_config_init, @@ -1043,7 +1035,6 @@ static struct phy_driver ksphy_driver[] = { .name = "Micrel KSZ8061", .phy_id_mask = MICREL_PHY_ID_MASK, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, @@ -1054,7 +1045,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = 0x000ffffe, .name = "Micrel KSZ9021 Gigabit PHY", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9021_config_init, @@ -1072,7 +1062,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ9031 Gigabit PHY", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9031_config_init, @@ -1089,7 +1078,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Microchip KSZ9131 Gigabit PHY", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9131_config_init, @@ -1115,7 +1103,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ886X Switch", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, .suspend = genphy_suspend, .resume = genphy_resume, @@ -1124,7 +1111,6 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8795", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 04b12e34da58..7557bebd5d7f 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -346,7 +346,6 @@ static struct phy_driver microchip_phy_driver[] = { .name = "Microchip LAN88xx", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = lan88xx_probe, .remove = lan88xx_remove, diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index c600a8509d60..3d09b471632c 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -47,7 +47,6 @@ static struct phy_driver microchip_t1_phy_driver[] = { .name = "Microchip LAN87xx T1", .features = PHY_BASIC_T1_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = genphy_config_init, .config_aneg = genphy_config_aneg, diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index a2e59f4f6f01..62269e578718 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -1833,7 +1833,6 @@ static struct phy_driver vsc85xx_driver[] = { .name = "Microsemi FE VSC8530", .phy_id_mask = 0xfffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, @@ -1859,7 +1858,6 @@ static struct phy_driver vsc85xx_driver[] = { .name = "Microsemi VSC8531", .phy_id_mask = 0xfffffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, @@ -1885,7 +1883,6 @@ static struct phy_driver vsc85xx_driver[] = { .name = "Microsemi FE VSC8540 SyncE", .phy_id_mask = 0xfffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, @@ -1911,7 +1908,6 @@ static struct phy_driver vsc85xx_driver[] = { .name = "Microsemi VSC8541 SyncE", .phy_id_mask = 0xfffffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, @@ -1937,7 +1933,6 @@ static struct phy_driver vsc85xx_driver[] = { .name = "Microsemi GE VSC8574 SyncE", .phy_id_mask = 0xfffffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, @@ -1964,7 +1959,6 @@ static struct phy_driver vsc85xx_driver[] = { .name = "Microsemi GE VSC8584 SyncE", .phy_id_mask = 0xfffffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c index 2b1e336961f9..139bed2c8ab4 100644 --- a/drivers/net/phy/national.c +++ b/drivers/net/phy/national.c @@ -134,7 +134,6 @@ static struct phy_driver dp83865_driver[] = { { .phy_id_mask = 0xfffffff0, .name = "NatSemi DP83865", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = ns_config_init, .ack_interrupt = ns_ack_interrupt, .config_intr = ns_config_intr, diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c index 889a4dce1648..cfe2313dbefd 100644 --- a/drivers/net/phy/qsemi.c +++ b/drivers/net/phy/qsemi.c @@ -116,7 +116,6 @@ static struct phy_driver qs6612_driver[] = { { .name = "QS6612", .phy_id_mask = 0xfffffff0, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = qs6612_config_init, .ack_interrupt = qs6612_ack_interrupt, .config_intr = qs6612_config_intr, diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 7b1c89b3833c..0f8e5b1c9cb6 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -217,13 +217,11 @@ static struct phy_driver realtek_drvs[] = { .name = "RTL8201CP Ethernet", .phy_id_mask = 0x0000ffff, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, }, { .phy_id = 0x001cc816, .name = "RTL8201F Fast Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = &rtl8201_ack_interrupt, .config_intr = &rtl8201_config_intr, .suspend = genphy_suspend, @@ -243,7 +241,6 @@ static struct phy_driver realtek_drvs[] = { .name = "RTL8211B Gigabit Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, .read_mmd = &genphy_read_mmd_unsupported, @@ -263,7 +260,6 @@ static struct phy_driver realtek_drvs[] = { .name = "RTL8211DN Gigabit Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = rtl821x_ack_interrupt, .config_intr = rtl8211e_config_intr, .suspend = genphy_suspend, @@ -273,7 +269,6 @@ static struct phy_driver realtek_drvs[] = { .name = "RTL8211E Gigabit Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211e_config_intr, .suspend = genphy_suspend, @@ -283,7 +278,6 @@ static struct phy_driver realtek_drvs[] = { .name = "RTL8211F Gigabit Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &rtl8211f_config_init, .ack_interrupt = &rtl8211f_ack_interrupt, .config_intr = &rtl8211f_config_intr, @@ -296,7 +290,6 @@ static struct phy_driver realtek_drvs[] = { .name = "RTL8366RB Gigabit Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &rtl8366rb_config_init, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index c328208388da..f9477ff55545 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -219,7 +219,6 @@ static struct phy_driver smsc_phy_driver[] = { .name = "SMSC LAN83C185", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = smsc_phy_probe, @@ -239,7 +238,6 @@ static struct phy_driver smsc_phy_driver[] = { .name = "SMSC LAN8187", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = smsc_phy_probe, @@ -264,7 +262,6 @@ static struct phy_driver smsc_phy_driver[] = { .name = "SMSC LAN8700", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = smsc_phy_probe, @@ -290,7 +287,6 @@ static struct phy_driver smsc_phy_driver[] = { .name = "SMSC LAN911x Internal PHY", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = smsc_phy_probe, @@ -309,7 +305,7 @@ static struct phy_driver smsc_phy_driver[] = { .name = "SMSC LAN8710/LAN8720", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT | PHY_RST_AFTER_CLK_EN, + .flags = PHY_RST_AFTER_CLK_EN, .probe = smsc_phy_probe, @@ -335,7 +331,6 @@ static struct phy_driver smsc_phy_driver[] = { .name = "SMSC LAN8740", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .probe = smsc_phy_probe, diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c index 2fe9a87b55b5..33d733684f5b 100644 --- a/drivers/net/phy/ste10Xp.c +++ b/drivers/net/phy/ste10Xp.c @@ -87,7 +87,6 @@ static struct phy_driver ste10xp_pdriver[] = { .phy_id_mask = 0xfffffff0, .name = "STe101p", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = ste10Xp_config_init, .ack_interrupt = ste10Xp_ack_interrupt, .config_intr = ste10Xp_config_intr, @@ -98,7 +97,6 @@ static struct phy_driver ste10xp_pdriver[] = { .phy_id_mask = 0xffffffff, .name = "STe100p", .features = PHY_BASIC_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = ste10Xp_config_init, .ack_interrupt = ste10Xp_ack_interrupt, .config_intr = ste10Xp_config_intr, diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index fbf9ad429593..4ca513feba0e 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -399,7 +399,6 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8234", .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, .ack_interrupt = &vsc824x_ack_interrupt, @@ -409,7 +408,6 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8244", .phy_id_mask = 0x000fffc0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, .ack_interrupt = &vsc824x_ack_interrupt, @@ -419,7 +417,6 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8514", .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, .ack_interrupt = &vsc824x_ack_interrupt, @@ -429,7 +426,6 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8572", .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, .ack_interrupt = &vsc824x_ack_interrupt, @@ -439,7 +435,6 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8574", .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, .ack_interrupt = &vsc824x_ack_interrupt, @@ -449,7 +444,6 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8601", .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc8601_config_init, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, @@ -494,7 +488,6 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8662", .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, .ack_interrupt = &vsc824x_ack_interrupt, @@ -505,7 +498,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, .name = "Vitesse VSC8221", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc8221_config_init, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, @@ -515,7 +507,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, .name = "Vitesse VSC8211", .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, .config_init = &vsc8221_config_init, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, diff --git a/include/linux/phy.h b/include/linux/phy.h index 3299ec6e69f3..59bb31ee132f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -66,9 +66,8 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_POLL -1 #define PHY_IGNORE_INTERRUPT -2 -#define PHY_HAS_INTERRUPT 0x00000001 -#define PHY_IS_INTERNAL 0x00000002 -#define PHY_RST_AFTER_CLK_EN 0x00000004 +#define PHY_IS_INTERNAL 0x00000001 +#define PHY_RST_AFTER_CLK_EN 0x00000002 #define MDIO_DEVICE_IS_PHY 0x80000000 /* Interface Mode definitions */ -- cgit v1.2.3 From 8deeb6309cc447b9b35939558f18e2164dd110df Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 9 Nov 2018 18:55:50 +0100 Subject: net: phy: don't set state PHY_CHANGELINK in phy_change State PHY_CHANGELINK isn't needed here, we can call the state machine directly. We just have to remove the check for phy_polling_mode() to make this work also in interrupt mode. Removing this check doesn't cause any overhead because when not polling the state machine is called only if required by some event. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 8 -------- include/linux/phy.h | 7 ++----- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8dac890f32bf..da41420dfd11 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -738,11 +738,6 @@ static irqreturn_t phy_change(struct phy_device *phydev) goto phy_err; } - mutex_lock(&phydev->lock); - if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) - phydev->state = PHY_CHANGELINK; - mutex_unlock(&phydev->lock); - /* reschedule state queue work to run as soon as possible */ phy_trigger_machine(phydev); @@ -946,9 +941,6 @@ void phy_state_machine(struct work_struct *work) break; case PHY_NOLINK: case PHY_RUNNING: - if (!phy_polling_mode(phydev)) - break; - /* fall through */ case PHY_CHANGELINK: case PHY_RESUMING: err = phy_check_link_status(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 59bb31ee132f..7db07e69c88f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -298,7 +298,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * - timer moves to NOLINK or RUNNING * * NOLINK: PHY is up, but not currently plugged in. - * - If the timer notes that the link comes back, we move to RUNNING + * - irq or timer will set RUNNING if link comes back * - phy_stop moves to HALTED * * FORCING: PHY is being configured with forced settings @@ -309,10 +309,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * * RUNNING: PHY is currently up, running, and possibly sending * and/or receiving packets - * - timer will set CHANGELINK if we're polling (this ensures the - * link state is polled every other cycle of this state machine, - * which makes it every other second) - * - irq will set CHANGELINK + * - irq or timer will set NOLINK if link goes down * - phy_stop moves to HALTED * * CHANGELINK: PHY experienced a change in link state -- cgit v1.2.3 From d73a2156bdad6bdf7e0c42051c5ebbea11f6271e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 9 Nov 2018 18:56:52 +0100 Subject: net: phy: simplify phy_mac_interrupt and related functions When using phy_mac_interrupt() the irq number is set to PHY_IGNORE_INTERRUPT, therefore phy_interrupt_is_valid() returns false. As a result phy_change() effectively just calls phy_trigger_machine() when called from phy_mac_interrupt() via phy_change_work(). So we can call phy_trigger_machine() from phy_mac_interrupt() directly and remove some now unneeded code. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 14 +------------- drivers/net/phy/phy_device.c | 1 - include/linux/phy.h | 3 --- 3 files changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index da41420dfd11..ce1e8130a38f 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -750,18 +750,6 @@ phy_err: return IRQ_NONE; } -/** - * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes - * @work: work_struct that describes the work to be done - */ -void phy_change_work(struct work_struct *work) -{ - struct phy_device *phydev = - container_of(work, struct phy_device, phy_queue); - - phy_change(phydev); -} - /** * phy_interrupt - PHY interrupt handler * @irq: interrupt line @@ -1005,7 +993,7 @@ void phy_state_machine(struct work_struct *work) void phy_mac_interrupt(struct phy_device *phydev) { /* Trigger a state machine change */ - queue_work(system_power_efficient_wq, &phydev->phy_queue); + phy_trigger_machine(phydev); } EXPORT_SYMBOL(phy_mac_interrupt); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 00a46218c3a2..0f56d408b033 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -587,7 +587,6 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mutex_init(&dev->lock); INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine); - INIT_WORK(&dev->phy_queue, phy_change_work); /* Request the appropriate module unconditionally; don't * bother trying to do so only if it isn't already loaded, diff --git a/include/linux/phy.h b/include/linux/phy.h index 7db07e69c88f..17d1f64723e4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -369,7 +369,6 @@ struct phy_c45_device_ids { * giving up on the current attempt at acquiring a link * irq: IRQ number of the PHY's interrupt (-1 if none) * phy_timer: The timer for handling the state machine - * phy_queue: A work_queue for the phy_mac_interrupt * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. @@ -454,7 +453,6 @@ struct phy_device { void *priv; /* Interrupt and Polling infrastructure */ - struct work_struct phy_queue; struct delayed_work state_queue; struct mutex lock; @@ -1029,7 +1027,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -- cgit v1.2.3 From aa2af2eb447c9a21c8c9e8d2336672bb620cf900 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 10 Nov 2018 00:39:14 +0100 Subject: net: phy: add macros for PHYID matching Add macros for PHYID matching to be used in PHY driver configs. By using these macros some boilerplate code can be avoided. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 17d1f64723e4..03005c65e02d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -651,6 +651,10 @@ struct phy_driver { #define PHY_ANY_ID "MATCH ANY PHY" #define PHY_ANY_UID 0xffffffff +#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0) +#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4) +#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10) + /* A Structure for boards to register fixups with the PHY Lib */ struct phy_fixup { struct list_head list; -- cgit v1.2.3 From 899a3cbbf77a2a3d6d53d67ff6f10ad59eb03605 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 10 Nov 2018 23:40:50 +0100 Subject: net: phy: remove states PHY_STARTING and PHY_PENDING Both states aren't used. Most likely they result from an idea that never materialized. So remove them. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 7 ------- include/linux/phy.h | 22 ++-------------------- 2 files changed, 2 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 083977d2f187..627e66ab60eb 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -46,9 +46,7 @@ static const char *phy_state_to_str(enum phy_state st) { switch (st) { PHY_STATE_STR(DOWN) - PHY_STATE_STR(STARTING) PHY_STATE_STR(READY) - PHY_STATE_STR(PENDING) PHY_STATE_STR(UP) PHY_STATE_STR(RUNNING) PHY_STATE_STR(NOLINK) @@ -852,9 +850,6 @@ void phy_start(struct phy_device *phydev) mutex_lock(&phydev->lock); switch (phydev->state) { - case PHY_STARTING: - phydev->state = PHY_PENDING; - break; case PHY_READY: phydev->state = PHY_UP; break; @@ -902,9 +897,7 @@ void phy_state_machine(struct work_struct *work) switch (phydev->state) { case PHY_DOWN: - case PHY_STARTING: case PHY_READY: - case PHY_PENDING: break; case PHY_UP: needs_aneg = true; diff --git a/include/linux/phy.h b/include/linux/phy.h index 03005c65e02d..a5bcb4aaa48e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -270,29 +270,13 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * DOWN: PHY device and driver are not ready for anything. probe * should be called if and only if the PHY is in this state, * given that the PHY device exists. - * - PHY driver probe function will, depending on the PHY, set - * the state to STARTING or READY - * - * STARTING: PHY device is coming up, and the ethernet driver is - * not ready. PHY drivers may set this in the probe function. - * If they do, they are responsible for making sure the state is - * eventually set to indicate whether the PHY is UP or READY, - * depending on the state when the PHY is done starting up. - * - PHY driver will set the state to READY - * - start will set the state to PENDING + * - PHY driver probe function will set the state to READY * * READY: PHY is ready to send and receive packets, but the * controller is not. By default, PHYs which do not implement - * probe will be set to this state by phy_probe(). If the PHY - * driver knows the PHY is ready, and the PHY state is STARTING, - * then it sets this STATE. + * probe will be set to this state by phy_probe(). * - start will set the state to UP * - * PENDING: PHY device is coming up, but the ethernet driver is - * ready. phy_start will set this state if the PHY state is - * STARTING. - * - PHY driver will set the state to UP when the PHY is ready - * * UP: The PHY and attached device are ready to do work. * Interrupts should be started here. * - timer moves to NOLINK or RUNNING @@ -329,9 +313,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); */ enum phy_state { PHY_DOWN = 0, - PHY_STARTING, PHY_READY, - PHY_PENDING, PHY_UP, PHY_RUNNING, PHY_NOLINK, -- cgit v1.2.3 From 3c1bcc8614db10803f1f57ef0295363917448cb2 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 10 Nov 2018 23:43:33 +0100 Subject: net: ethernet: Convert phydev advertize and supported from u32 to link mode There are a few MAC/PHYs combinations which now support > 1Gbps. These may need to make use of link modes with bits > 31. Thus their supported PHY features or advertised features cannot be implemented using the current bitmap in a u32. Convert to using a linkmode bitmap, which can support all the currently devices link modes, and is future proof as more modes are added. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 3 +- drivers/net/ethernet/aeroflex/greth.c | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 41 +++-- drivers/net/ethernet/apm/xgene-v2/mdio.c | 22 +-- drivers/net/ethernet/arc/emac_main.c | 3 +- drivers/net/ethernet/broadcom/b44.c | 12 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 5 +- drivers/net/ethernet/broadcom/tg3.c | 44 ++++-- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 7 +- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 5 +- drivers/net/ethernet/freescale/fman/mac.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 18 ++- drivers/net/ethernet/freescale/ucc_geth.c | 7 +- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 6 +- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- .../ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 13 +- drivers/net/ethernet/ibm/emac/core.c | 9 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 21 +-- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 +- drivers/net/ethernet/nxp/lpc_eth.c | 2 - drivers/net/ethernet/realtek/r8169.c | 2 +- drivers/net/ethernet/socionext/sni_ave.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 12 +- drivers/net/ethernet/toshiba/tc35815.c | 29 ++-- drivers/net/phy/aquantia.c | 9 +- drivers/net/phy/bcm63xx.c | 2 +- drivers/net/phy/bcm87xx.c | 8 +- drivers/net/phy/fixed_phy.c | 19 ++- drivers/net/phy/marvell.c | 50 +++--- drivers/net/phy/marvell10g.c | 33 ++-- drivers/net/phy/micrel.c | 17 +- drivers/net/phy/phy-c45.c | 7 +- drivers/net/phy/phy-core.c | 38 +++-- drivers/net/phy/phy.c | 154 ++++++++++++------ drivers/net/phy/phy_device.c | 175 ++++++++++++++------- drivers/net/phy/phylink.c | 19 +-- drivers/net/usb/lan78xx.c | 27 ++-- include/linux/mii.h | 14 +- include/linux/phy.h | 18 ++- 39 files changed, 536 insertions(+), 330 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index a5de9bffe5be..74547f43b938 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -658,7 +658,8 @@ static void mt7530_adjust_link(struct dsa_switch *ds, int port, if (phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - lcl_adv = ethtool_adv_to_lcl_adv_t(phydev->advertising); + lcl_adv = linkmode_adv_to_lcl_adv_t( + phydev->advertising); flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); if (flowctrl & FLOW_CTRL_TX) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 7c9348a26cbb..91fc64c1145e 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1283,7 +1283,7 @@ static int greth_mdio_probe(struct net_device *dev) else phy_set_max_speed(phy, SPEED_100); - phy->advertising = phy->supported; + linkmode_copy(phy->advertising, phy->supported); greth->link = 0; greth->speed = 0; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 151bdb629e8a..128cd648ba99 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -857,6 +857,7 @@ static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata) static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int phy_id = phy_data->phydev->phy_id; @@ -878,9 +879,15 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) phy_write(phy_data->phydev, 0x04, 0x0d01); phy_write(phy_data->phydev, 0x00, 0x9140); - phy_data->phydev->supported = PHY_10BT_FEATURES | - PHY_100BT_FEATURES | - PHY_1000BT_FEATURES; + linkmode_set_bit_array(phy_10_100_features_array, + ARRAY_SIZE(phy_10_100_features_array), + supported); + linkmode_set_bit_array(phy_gbit_features_array, + ARRAY_SIZE(phy_gbit_features_array), + supported); + + linkmode_copy(phy_data->phydev->supported, supported); + phy_support_asym_pause(phy_data->phydev); netif_dbg(pdata, drv, pdata->netdev, @@ -891,6 +898,7 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; unsigned int phy_id = phy_data->phydev->phy_id; @@ -951,9 +959,13 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) reg = phy_read(phy_data->phydev, 0x00); phy_write(phy_data->phydev, 0x00, reg & ~0x00800); - phy_data->phydev->supported = (PHY_10BT_FEATURES | - PHY_100BT_FEATURES | - PHY_1000BT_FEATURES); + linkmode_set_bit_array(phy_10_100_features_array, + ARRAY_SIZE(phy_10_100_features_array), + supported); + linkmode_set_bit_array(phy_gbit_features_array, + ARRAY_SIZE(phy_gbit_features_array), + supported); + linkmode_copy(phy_data->phydev->supported, supported); phy_support_asym_pause(phy_data->phydev); netif_dbg(pdata, drv, pdata->netdev, @@ -976,7 +988,6 @@ static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata) struct ethtool_link_ksettings *lks = &pdata->phy.lks; struct xgbe_phy_data *phy_data = pdata->phy_data; struct phy_device *phydev; - u32 advertising; int ret; /* If we already have a PHY, just return */ @@ -1036,9 +1047,8 @@ static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata) xgbe_phy_external_phy_quirks(pdata); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - lks->link_modes.advertising); - phydev->advertising &= advertising; + linkmode_and(phydev->advertising, phydev->advertising, + lks->link_modes.advertising); phy_start_aneg(phy_data->phydev); @@ -1497,7 +1507,7 @@ static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata) if (!phy_data->phydev) return; - lcl_adv = ethtool_adv_to_lcl_adv_t(phy_data->phydev->advertising); + lcl_adv = linkmode_adv_to_lcl_adv_t(phy_data->phydev->advertising); if (phy_data->phydev->pause) { XGBE_SET_LP_ADV(lks, Pause); @@ -1815,7 +1825,6 @@ static int xgbe_phy_an_config(struct xgbe_prv_data *pdata) { struct ethtool_link_ksettings *lks = &pdata->phy.lks; struct xgbe_phy_data *phy_data = pdata->phy_data; - u32 advertising; int ret; ret = xgbe_phy_find_phy_device(pdata); @@ -1825,12 +1834,10 @@ static int xgbe_phy_an_config(struct xgbe_prv_data *pdata) if (!phy_data->phydev) return 0; - ethtool_convert_link_mode_to_legacy_u32(&advertising, - lks->link_modes.advertising); - phy_data->phydev->autoneg = pdata->phy.autoneg; - phy_data->phydev->advertising = phy_data->phydev->supported & - advertising; + linkmode_and(phy_data->phydev->advertising, + phy_data->phydev->supported, + lks->link_modes.advertising); if (pdata->phy.autoneg != AUTONEG_ENABLE) { phy_data->phydev->speed = pdata->phy.speed; diff --git a/drivers/net/ethernet/apm/xgene-v2/mdio.c b/drivers/net/ethernet/apm/xgene-v2/mdio.c index f5fe3bb2e59d..53529cd85162 100644 --- a/drivers/net/ethernet/apm/xgene-v2/mdio.c +++ b/drivers/net/ethernet/apm/xgene-v2/mdio.c @@ -109,6 +109,7 @@ void xge_mdio_remove(struct net_device *ndev) int xge_mdio_config(struct net_device *ndev) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct xge_pdata *pdata = netdev_priv(ndev); struct device *dev = &pdata->pdev->dev; struct mii_bus *mdio_bus; @@ -148,16 +149,17 @@ int xge_mdio_config(struct net_device *ndev) goto err; } - phydev->supported &= ~(SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Half | - SUPPORTED_AUI | - SUPPORTED_MII | - SUPPORTED_FIBRE | - SUPPORTED_BNC); - phydev->advertising = phydev->supported; + linkmode_set_bit_array(phy_10_100_features_array, + ARRAY_SIZE(phy_10_100_features_array), + mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_AUI_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_BNC_BIT, mask); + + linkmode_andnot(phydev->supported, phydev->supported, mask); + linkmode_copy(phydev->advertising, phydev->supported); pdata->phy_speed = SPEED_UNKNOWN; return 0; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index bd277b0dc615..4406325fdd9f 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -432,7 +432,8 @@ static int arc_emac_open(struct net_device *ndev) phy_dev->autoneg = AUTONEG_ENABLE; phy_dev->speed = 0; phy_dev->duplex = 0; - phy_dev->advertising &= phy_dev->supported; + linkmode_and(phy_dev->advertising, phy_dev->advertising, + phy_dev->supported); priv->last_rx_bd = 0; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index e445ab724827..f44808959ff3 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2248,6 +2248,7 @@ static void b44_adjust_link(struct net_device *dev) static int b44_register_phy_one(struct b44 *bp) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct mii_bus *mii_bus; struct ssb_device *sdev = bp->sdev; struct phy_device *phydev; @@ -2303,11 +2304,12 @@ static int b44_register_phy_one(struct b44 *bp) } /* mask with MAC supported features */ - phydev->supported &= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_Autoneg | - SUPPORTED_MII); - phydev->advertising = phydev->supported; + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); + linkmode_and(phydev->supported, phydev->supported, mask); + linkmode_copy(phydev->advertising, phydev->supported); bp->old_link = 0; bp->phy_addr = phydev->mdio.addr; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index a6cbaca37e94..aceb9b7b55bd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -226,7 +226,8 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if (dev->phydev->supported & PHY_1000BT_FEATURES) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + dev->phydev->supported)) port_ctrl = PORT_MODE_EXT_RVMII_50; else port_ctrl = PORT_MODE_EXT_RVMII_25; @@ -317,7 +318,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return ret; } - phydev->advertising = phydev->supported; + linkmode_copy(phydev->advertising, phydev->supported); /* The internal PHY has its link interrupts routed to the * Ethernet MAC ISRs. On GENETv5 there is a hardware issue diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ce44d208e137..79b881d9cdb0 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -2157,7 +2157,8 @@ static void tg3_phy_start(struct tg3 *tp) phydev->speed = tp->link_config.speed; phydev->duplex = tp->link_config.duplex; phydev->autoneg = tp->link_config.autoneg; - phydev->advertising = tp->link_config.advertising; + ethtool_convert_legacy_u32_to_link_mode( + phydev->advertising, tp->link_config.advertising); } phy_start(phydev); @@ -4057,8 +4058,9 @@ static int tg3_power_down_prepare(struct tg3 *tp) do_low_power = false; if ((tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) && !(tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising) = { 0, }; struct phy_device *phydev; - u32 phyid, advertising; + u32 phyid; phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); @@ -4067,25 +4069,33 @@ static int tg3_power_down_prepare(struct tg3 *tp) tp->link_config.speed = phydev->speed; tp->link_config.duplex = phydev->duplex; tp->link_config.autoneg = phydev->autoneg; - tp->link_config.advertising = phydev->advertising; - - advertising = ADVERTISED_TP | - ADVERTISED_Pause | - ADVERTISED_Autoneg | - ADVERTISED_10baseT_Half; + ethtool_convert_link_mode_to_legacy_u32( + &tp->link_config.advertising, + phydev->advertising); + + linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, advertising); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + advertising); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + advertising); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + advertising); if (tg3_flag(tp, ENABLE_ASF) || device_should_wake) { - if (tg3_flag(tp, WOL_SPEED_100MB)) - advertising |= - ADVERTISED_100baseT_Half | - ADVERTISED_100baseT_Full | - ADVERTISED_10baseT_Full; - else - advertising |= ADVERTISED_10baseT_Full; + if (tg3_flag(tp, WOL_SPEED_100MB)) { + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + advertising); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + advertising); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + advertising); + } else { + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + advertising); + } } - phydev->advertising = advertising; - + linkmode_copy(phydev->advertising, advertising); phy_start_aneg(phydev); phyid = phydev->drv->phy_id & phydev->drv->phy_id_mask; diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 4b3aecf98f2a..5359c1021f42 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1080,8 +1080,11 @@ static int octeon_mgmt_open(struct net_device *netdev) /* Set the mode of the interface, RGMII/MII. */ if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && netdev->phydev) { union cvmx_agl_prtx_ctl agl_prtx_ctl; - int rgmii_mode = (netdev->phydev->supported & - (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) != 0; + int rgmii_mode = + (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + netdev->phydev->supported) | + linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + netdev->phydev->supported)) != 0; agl_prtx_ctl.u64 = cvmx_read_csr(p->agl_prt_ctl); agl_prtx_ctl.s.mode = rgmii_mode ? 0 : 1; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 6e0f47f2c8a3..9510c9d78858 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2475,6 +2475,7 @@ static void dpaa_adjust_link(struct net_device *net_dev) static int dpaa_phy_init(struct net_device *net_dev) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct mac_device *mac_dev; struct phy_device *phy_dev; struct dpaa_priv *priv; @@ -2491,7 +2492,9 @@ static int dpaa_phy_init(struct net_device *net_dev) } /* Remove any features not supported by the controller */ - phy_dev->supported &= mac_dev->if_support; + ethtool_convert_legacy_u32_to_link_mode(mask, mac_dev->if_support); + linkmode_and(phy_dev->supported, phy_dev->supported, mask); + phy_support_asym_pause(phy_dev); mac_dev->phy_dev = phy_dev; diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index d79e4e009d63..71f4205f14e7 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -393,7 +393,7 @@ void fman_get_pause_cfg(struct mac_device *mac_dev, bool *rx_pause, */ /* get local capabilities */ - lcl_adv = ethtool_adv_to_lcl_adv_t(phy_dev->advertising); + lcl_adv = linkmode_adv_to_lcl_adv_t(phy_dev->advertising); /* get link partner capabilities */ rmt_adv = 0; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3c8da1a18ba0..0e102c764b13 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1784,14 +1784,20 @@ static phy_interface_t gfar_get_interface(struct net_device *dev) */ static int init_phy(struct net_device *dev) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct gfar_private *priv = netdev_priv(dev); - uint gigabit_support = - priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ? - GFAR_SUPPORTED_GBIT : 0; phy_interface_t interface; struct phy_device *phydev; struct ethtool_eee edata; + linkmode_set_bit_array(phy_10_100_features_array, + ARRAY_SIZE(phy_10_100_features_array), + mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask); + priv->oldlink = 0; priv->oldspeed = 0; priv->oldduplex = -1; @@ -1809,8 +1815,8 @@ static int init_phy(struct net_device *dev) gfar_configure_serdes(dev); /* Remove any features not supported by the controller */ - phydev->supported &= (GFAR_SUPPORTED | gigabit_support); - phydev->advertising = phydev->supported; + linkmode_and(phydev->supported, phydev->supported, mask); + linkmode_copy(phydev->advertising, phydev->supported); /* Add support for flow control */ phy_support_asym_pause(phydev); @@ -3656,7 +3662,7 @@ static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv) if (phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - lcl_adv = ethtool_adv_to_lcl_adv_t(phydev->advertising); + lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising); flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); if (flowctrl & FLOW_CTRL_TX) val |= MACCFG1_TX_FLOW; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 32e02700feaa..2e978cb8b28c 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1742,12 +1742,7 @@ static int init_phy(struct net_device *dev) if (priv->phy_interface == PHY_INTERFACE_MODE_SGMII) uec_configure_serdes(dev); - phy_set_max_speed(phydev, SPEED_100); - - if (priv->max_speed == SPEED_1000) - phydev->supported |= ADVERTISED_1000baseT_Full; - - phydev->advertising = phydev->supported; + phy_set_max_speed(phydev, priv->max_speed); priv->phydev = phydev; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 28e907831b0e..c62378c07e70 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1163,6 +1163,7 @@ static void hns_nic_adjust_link(struct net_device *ndev) */ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; struct phy_device *phy_dev = h->phy_dev; int ret; @@ -1180,8 +1181,9 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (unlikely(ret)) return -ENODEV; - phy_dev->supported &= h->if_support; - phy_dev->advertising = phy_dev->supported; + ethtool_convert_legacy_u32_to_link_mode(supported, h->if_support); + linkmode_and(phy_dev->supported, phy_dev->supported, supported); + linkmode_copy(phy_dev->advertising, phy_dev->supported); if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ab90108db1c9..43bfc730a62d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6582,7 +6582,7 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev) if (!phydev->link || !phydev->autoneg) return 0; - local_advertising = ethtool_adv_to_lcl_adv_t(phydev->advertising); + local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising); if (phydev->pause) remote_advertising = LPA_PAUSE_CAP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 03018638f701..741cb3b9519d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -195,12 +195,13 @@ int hclge_mac_connect_phy(struct hclge_dev *hdev) { struct net_device *netdev = hdev->vport[0].nic.netdev; struct phy_device *phydev = hdev->hw.mac.phydev; + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; int ret; if (!phydev) return 0; - phydev->supported &= ~SUPPORTED_FIBRE; + linkmode_clear_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->supported); ret = phy_connect_direct(netdev, phydev, hclge_mac_adjust_link, @@ -210,7 +211,15 @@ int hclge_mac_connect_phy(struct hclge_dev *hdev) return ret; } - phydev->supported &= HCLGE_PHY_SUPPORTED_FEATURES; + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, mask); + linkmode_set_bit_array(phy_10_100_features_array, + ARRAY_SIZE(phy_10_100_features_array), + mask); + linkmode_set_bit_array(phy_gbit_features_array, + ARRAY_SIZE(phy_gbit_features_array), + mask); + linkmode_and(phydev->supported, phydev->supported, mask); phy_support_asym_pause(phydev); return 0; diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 760b2ad8e295..209255495bc9 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2455,7 +2455,8 @@ static void emac_adjust_link(struct net_device *ndev) dev->phy.duplex = phy->duplex; dev->phy.pause = phy->pause; dev->phy.asym_pause = phy->asym_pause; - dev->phy.advertising = phy->advertising; + ethtool_convert_link_mode_to_legacy_u32(&dev->phy.advertising, + phy->advertising); } static int emac_mii_bus_read(struct mii_bus *bus, int addr, int regnum) @@ -2490,7 +2491,8 @@ static int emac_mdio_phy_start_aneg(struct mii_phy *phy, phy_dev->autoneg = phy->autoneg; phy_dev->speed = phy->speed; phy_dev->duplex = phy->duplex; - phy_dev->advertising = phy->advertising; + ethtool_convert_legacy_u32_to_link_mode(phy_dev->advertising, + phy->advertising); return phy_start_aneg(phy_dev); } @@ -2624,7 +2626,8 @@ static int emac_dt_phy_connect(struct emac_instance *dev, dev->phy.def->phy_id_mask = dev->phy_dev->drv->phy_id_mask; dev->phy.def->name = dev->phy_dev->drv->name; dev->phy.def->ops = &emac_dt_mdio_phy_ops; - dev->phy.features = dev->phy_dev->supported; + ethtool_convert_link_mode_to_legacy_u32(&dev->phy.features, + dev->phy_dev->supported); dev->phy.address = dev->phy_dev->mdio.addr; dev->phy.mode = dev->phy_dev->interface; return 0; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 1e9bcbdc6a90..2f427271a793 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1499,23 +1499,16 @@ mv643xx_eth_get_link_ksettings_phy(struct mv643xx_eth_private *mp, struct ethtool_link_ksettings *cmd) { struct net_device *dev = mp->dev; - u32 supported, advertising; phy_ethtool_ksettings_get(dev->phydev, cmd); /* * The MAC does not support 1000baseT_Half. */ - ethtool_convert_link_mode_to_legacy_u32(&supported, - cmd->link_modes.supported); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); - supported &= ~SUPPORTED_1000baseT_Half; - advertising &= ~ADVERTISED_1000baseT_Half; - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + cmd->link_modes.supported); + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + cmd->link_modes.advertising); return 0; } @@ -3031,10 +3024,12 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex) phy->autoneg = AUTONEG_ENABLE; phy->speed = 0; phy->duplex = 0; - phy->advertising = phy->supported | ADVERTISED_Autoneg; + linkmode_copy(phy->advertising, phy->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phy->advertising); } else { phy->autoneg = AUTONEG_DISABLE; - phy->advertising = 0; + linkmode_zero(phy->advertising); phy->speed = speed; phy->duplex = duplex; } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7dbfdac4067a..399f565dd85a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -243,7 +243,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) if (dev->phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - lcl_adv = ethtool_adv_to_lcl_adv_t(dev->phydev->advertising); + lcl_adv = linkmode_adv_to_lcl_adv_t(dev->phydev->advertising); flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); if (flowctrl & FLOW_CTRL_TX) @@ -353,8 +353,9 @@ static int mtk_phy_connect(struct net_device *dev) phy_set_max_speed(dev->phydev, SPEED_1000); phy_support_asym_pause(dev->phydev); - dev->phydev->advertising = dev->phydev->supported | - ADVERTISED_Autoneg; + linkmode_copy(dev->phydev->advertising, dev->phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + dev->phydev->advertising); phy_start_aneg(dev->phydev); of_node_put(np); diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 25382f8fbb70..bd8695a4faaa 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -783,8 +783,6 @@ static int lpc_mii_probe(struct net_device *ndev) phy_set_max_speed(phydev, SPEED_100); - phydev->advertising = phydev->supported; - pldat->link = 0; pldat->speed = 0; pldat->duplex = -1; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 1fd01688d37b..56de045268f8 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6584,7 +6584,7 @@ static int r8169_phy_connect(struct rtl8169_private *tp) phy_set_max_speed(phydev, SPEED_100); /* Ensure to advertise everything, incl. pause */ - phydev->advertising = phydev->supported; + linkmode_copy(phydev->advertising, phydev->supported); phy_attached_info(phydev); diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 6732f5cbde08..9e7391faa1dc 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1117,7 +1117,7 @@ static void ave_phy_adjust_link(struct net_device *ndev) if (phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - lcl_adv = ethtool_adv_to_lcl_adv_t(phydev->advertising); + lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising); cap = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); if (cap & FLOW_CTRL_TX) txcr |= AVE_TXCR_FLOCTR; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 5710864fa809..d1f61c25d82b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -458,8 +458,10 @@ stmmac_get_pauseparam(struct net_device *netdev, if (!adv_lp.pause) return; } else { - if (!(netdev->phydev->supported & SUPPORTED_Pause) || - !(netdev->phydev->supported & SUPPORTED_Asym_Pause)) + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + netdev->phydev->supported) || + linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + netdev->phydev->supported)) return; } @@ -487,8 +489,10 @@ stmmac_set_pauseparam(struct net_device *netdev, if (!adv_lp.pause) return -EOPNOTSUPP; } else { - if (!(phy->supported & SUPPORTED_Pause) || - !(phy->supported & SUPPORTED_Asym_Pause)) + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phy->supported) || + linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phy->supported)) return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 6a71c2c0f17d..c50a9772f4af 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -607,9 +607,9 @@ static void tc_handle_link_change(struct net_device *dev) static int tc_mii_probe(struct net_device *dev) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct tc35815_local *lp = netdev_priv(dev); struct phy_device *phydev; - u32 dropmask; phydev = phy_find_first(lp->mii_bus); if (!phydev) { @@ -630,17 +630,22 @@ static int tc_mii_probe(struct net_device *dev) /* mask with MAC supported features */ phy_set_max_speed(phydev, SPEED_100); - dropmask = 0; - if (options.speed == 10) - dropmask |= SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full; - else if (options.speed == 100) - dropmask |= SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full; - if (options.duplex == 1) - dropmask |= SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Full; - else if (options.duplex == 2) - dropmask |= SUPPORTED_10baseT_Half | SUPPORTED_100baseT_Half; - phydev->supported &= ~dropmask; - phydev->advertising = phydev->supported; + if (options.speed == 10) { + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask); + } else if (options.speed == 100) { + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, mask); + } + if (options.duplex == 1) { + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask); + } else if (options.duplex == 2) { + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask); + } + linkmode_and(phydev->supported, phydev->supported, mask); + linkmode_copy(phydev->advertising, phydev->supported); lp->link = 0; lp->speed = 0; diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index efc0fbde97a1..beb3309bb0f0 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -25,15 +25,10 @@ #define PHY_ID_AQR107 0x03a1b4e0 #define PHY_ID_AQR405 0x03a1b4b0 -#define PHY_AQUANTIA_FEATURES (SUPPORTED_10000baseT_Full | \ - SUPPORTED_1000baseT_Full | \ - SUPPORTED_100baseT_Full | \ - PHY_DEFAULT_FEATURES) - static int aquantia_config_aneg(struct phy_device *phydev) { - phydev->supported = PHY_AQUANTIA_FEATURES; - phydev->advertising = phydev->supported; + linkmode_copy(phydev->supported, phy_10gbit_features); + linkmode_copy(phydev->advertising, phydev->supported); return 0; } diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index 6a547b87ff04..a88dd14a25c0 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -43,7 +43,7 @@ static int bcm63xx_config_init(struct phy_device *phydev) int reg, err; /* ASYM_PAUSE bit is marked RO in datasheet, so don't cheat */ - phydev->supported |= SUPPORTED_Pause; + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); reg = phy_read(phydev, MII_BCM63XX_IR); if (reg < 0) diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index 64d5ba7bf94f..1b350183bffb 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -86,8 +86,12 @@ static int bcm87xx_of_reg_init(struct phy_device *phydev) static int bcm87xx_config_init(struct phy_device *phydev) { - phydev->supported = SUPPORTED_10000baseR_FEC; - phydev->advertising = ADVERTISED_10000baseR_FEC; + linkmode_zero(phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + phydev->supported); + linkmode_zero(phydev->advertising); + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + phydev->advertising); phydev->state = PHY_NOLINK; phydev->autoneg = AUTONEG_DISABLE; diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 67b260877f30..f7fb62712cd8 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -223,14 +223,23 @@ struct phy_device *fixed_phy_register(unsigned int irq, switch (status->speed) { case SPEED_1000: - phy->supported = PHY_1000BT_FEATURES; - break; + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + phy->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phy->supported); + /* fall through */ case SPEED_100: - phy->supported = PHY_100BT_FEATURES; - break; + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + phy->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + phy->supported); + /* fall through */ case SPEED_10: default: - phy->supported = PHY_10BT_FEATURES; + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + phy->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + phy->supported); } ret = phy_device_register(phy); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 463c616a7281..96f33831ea99 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -491,25 +491,26 @@ static int m88e1318_config_aneg(struct phy_device *phydev) } /** - * ethtool_adv_to_fiber_adv_t - * @ethadv: the ethtool advertisement settings + * linkmode_adv_to_fiber_adv_t + * @advertise: the linkmode advertisement settings * - * A small helper function that translates ethtool advertisement - * settings to phy autonegotiation advertisements for the - * MII_ADV register for fiber link. + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the MII_ADV + * register for fiber link. */ -static inline u32 ethtool_adv_to_fiber_adv_t(u32 ethadv) +static inline u32 linkmode_adv_to_fiber_adv_t(unsigned long *advertise) { u32 result = 0; - if (ethadv & ADVERTISED_1000baseT_Half) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, advertise)) result |= ADVERTISE_FIBER_1000HALF; - if (ethadv & ADVERTISED_1000baseT_Full) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, advertise)) result |= ADVERTISE_FIBER_1000FULL; - if ((ethadv & ADVERTISE_PAUSE_ASYM) && (ethadv & ADVERTISE_PAUSE_CAP)) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertise) && + linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise)) result |= LPA_PAUSE_ASYM_FIBER; - else if (ethadv & ADVERTISE_PAUSE_CAP) + else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise)) result |= (ADVERTISE_PAUSE_FIBER & (~ADVERTISE_PAUSE_ASYM_FIBER)); @@ -530,14 +531,13 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) int changed = 0; int err; int adv, oldadv; - u32 advertise; if (phydev->autoneg != AUTONEG_ENABLE) return genphy_setup_forced(phydev); /* Only allow advertising what this PHY supports */ - phydev->advertising &= phydev->supported; - advertise = phydev->advertising; + linkmode_and(phydev->advertising, phydev->advertising, + phydev->supported); /* Setup fiber advertisement */ adv = phy_read(phydev, MII_ADVERTISE); @@ -547,7 +547,7 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) oldadv = adv; adv &= ~(ADVERTISE_FIBER_1000HALF | ADVERTISE_FIBER_1000FULL | LPA_PAUSE_FIBER); - adv |= ethtool_adv_to_fiber_adv_t(advertise); + adv |= linkmode_adv_to_fiber_adv_t(phydev->advertising); if (adv != oldadv) { err = phy_write(phydev, MII_ADVERTISE, adv); @@ -879,8 +879,14 @@ static int m88e1510_config_init(struct phy_device *phydev) * so disable Pause support. */ pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause; - phydev->supported &= ~pause; - phydev->advertising &= ~pause; + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->supported); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->supported); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->advertising); } return m88e1318_config_init(phydev); @@ -1235,7 +1241,8 @@ static int marvell_read_status(struct phy_device *phydev) int err; /* Check the fiber mode first */ - if (phydev->supported & SUPPORTED_FIBRE && + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported) && phydev->interface != PHY_INTERFACE_MODE_SGMII) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) @@ -1278,7 +1285,8 @@ static int marvell_suspend(struct phy_device *phydev) int err; /* Suspend the fiber mode first */ - if (!(phydev->supported & SUPPORTED_FIBRE)) { + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; @@ -1312,7 +1320,8 @@ static int marvell_resume(struct phy_device *phydev) int err; /* Resume the fiber mode first */ - if (!(phydev->supported & SUPPORTED_FIBRE)) { + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; @@ -1463,7 +1472,8 @@ error: static int marvell_get_sset_count(struct phy_device *phydev) { - if (phydev->supported & SUPPORTED_FIBRE) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) return ARRAY_SIZE(marvell_hw_stats); else return ARRAY_SIZE(marvell_hw_stats) - NB_FIBER_STATS; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 1c9d039eec63..d939dce16b35 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -252,7 +252,6 @@ static int mv3310_resume(struct phy_device *phydev) static int mv3310_config_init(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; - u32 mask; int val; /* Check that the PHY interface type is compatible */ @@ -336,13 +335,9 @@ static int mv3310_config_init(struct phy_device *phydev) } } - if (!ethtool_convert_link_mode_to_legacy_u32(&mask, supported)) - phydev_warn(phydev, - "PHY supports (%*pb) more modes than phylib supports, some modes not supported.\n", - __ETHTOOL_LINK_MODE_MASK_NBITS, supported); - - phydev->supported &= mask; - phydev->advertising &= phydev->supported; + linkmode_copy(phydev->supported, supported); + linkmode_and(phydev->advertising, phydev->advertising, + phydev->supported); return 0; } @@ -350,7 +345,7 @@ static int mv3310_config_init(struct phy_device *phydev) static int mv3310_config_aneg(struct phy_device *phydev) { bool changed = false; - u32 advertising; + u16 reg; int ret; /* We don't support manual MDI control */ @@ -364,31 +359,35 @@ static int mv3310_config_aneg(struct phy_device *phydev) return genphy_c45_an_disable_aneg(phydev); } - phydev->advertising &= phydev->supported; - advertising = phydev->advertising; + linkmode_and(phydev->advertising, phydev->advertising, + phydev->supported); ret = mv3310_modify(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, - ethtool_adv_to_mii_adv_t(advertising)); + linkmode_adv_to_mii_adv_t(phydev->advertising)); if (ret < 0) return ret; if (ret > 0) changed = true; + reg = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); ret = mv3310_modify(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, - ADVERTISE_1000FULL | ADVERTISE_1000HALF, - ethtool_adv_to_mii_ctrl1000_t(advertising)); + ADVERTISE_1000FULL | ADVERTISE_1000HALF, reg); if (ret < 0) return ret; if (ret > 0) changed = true; /* 10G control register */ + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + phydev->advertising)) + reg = MDIO_AN_10GBT_CTRL_ADV10G; + else + reg = 0; + ret = mv3310_modify(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, - MDIO_AN_10GBT_CTRL_ADV10G, - advertising & ADVERTISED_10000baseT_Full ? - MDIO_AN_10GBT_CTRL_ADV10G : 0); + MDIO_AN_10GBT_CTRL_ADV10G, reg); if (ret < 0) return ret; if (ret > 0) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index cb5783905a25..c33384710d26 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -311,17 +311,22 @@ static int kszphy_config_init(struct phy_device *phydev) static int ksz8041_config_init(struct phy_device *phydev) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + struct device_node *of_node = phydev->mdio.dev.of_node; /* Limit supported and advertised modes in fiber mode */ if (of_property_read_bool(of_node, "micrel,fiber-mode")) { phydev->dev_flags |= MICREL_PHY_FXEN; - phydev->supported &= SUPPORTED_100baseT_Full | - SUPPORTED_100baseT_Half; - phydev->supported |= SUPPORTED_FIBRE; - phydev->advertising &= ADVERTISED_100baseT_Full | - ADVERTISED_100baseT_Half; - phydev->advertising |= ADVERTISED_FIBRE; + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask); + + linkmode_and(phydev->supported, phydev->supported, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported); + linkmode_and(phydev->advertising, phydev->advertising, mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->advertising); phydev->autoneg = AUTONEG_DISABLE; } diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index d7636ff03bc7..a19f4dfa7470 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -304,8 +304,11 @@ EXPORT_SYMBOL_GPL(gen10g_no_soft_reset); int gen10g_config_init(struct phy_device *phydev) { /* Temporarily just say we support everything */ - phydev->supported = SUPPORTED_10000baseT_Full; - phydev->advertising = SUPPORTED_10000baseT_Full; + linkmode_zero(phydev->supported); + + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + phydev->supported); + linkmode_copy(phydev->advertising, phydev->supported); return 0; } diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index c7da4cbb1103..9d192b660b07 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -129,7 +129,6 @@ static const struct phy_setting settings[] = { * @speed: speed to match * @duplex: duplex to match * @mask: allowed link modes - * @maxbit: bit size of link modes * @exact: an exact match is required * * Search the settings array for a setting that matches the speed and @@ -143,14 +142,14 @@ static const struct phy_setting settings[] = { * they all fail, %NULL will be returned. */ const struct phy_setting * -phy_lookup_setting(int speed, int duplex, const unsigned long *mask, - size_t maxbit, bool exact) +phy_lookup_setting(int speed, int duplex, const unsigned long *mask, bool exact) { const struct phy_setting *p, *match = NULL, *last = NULL; int i; for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) { - if (p->bit < maxbit && test_bit(p->bit, mask)) { + if (p->bit < __ETHTOOL_LINK_MODE_MASK_NBITS && + test_bit(p->bit, mask)) { last = p; if (p->speed == speed && p->duplex == duplex) { /* Exact match for speed and duplex */ @@ -175,13 +174,13 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, EXPORT_SYMBOL_GPL(phy_lookup_setting); size_t phy_speeds(unsigned int *speeds, size_t size, - unsigned long *mask, size_t maxbit) + unsigned long *mask) { size_t count; int i; for (i = 0, count = 0; i < ARRAY_SIZE(settings) && count < size; i++) - if (settings[i].bit < maxbit && + if (settings[i].bit < __ETHTOOL_LINK_MODE_MASK_NBITS && test_bit(settings[i].bit, mask) && (count == 0 || speeds[count - 1] != settings[i].speed)) speeds[count++] = settings[i].speed; @@ -199,27 +198,38 @@ size_t phy_speeds(unsigned int *speeds, size_t size, */ void phy_resolve_aneg_linkmode(struct phy_device *phydev) { - u32 common = phydev->lp_advertising & phydev->advertising; + __ETHTOOL_DECLARE_LINK_MODE_MASK(common); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp); - if (common & ADVERTISED_10000baseT_Full) { + ethtool_convert_legacy_u32_to_link_mode(lp, phydev->lp_advertising); + + linkmode_and(common, lp, phydev->advertising); + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, common)) { phydev->speed = SPEED_10000; phydev->duplex = DUPLEX_FULL; - } else if (common & ADVERTISED_1000baseT_Full) { + } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + common)) { phydev->speed = SPEED_1000; phydev->duplex = DUPLEX_FULL; - } else if (common & ADVERTISED_1000baseT_Half) { + } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + common)) { phydev->speed = SPEED_1000; phydev->duplex = DUPLEX_HALF; - } else if (common & ADVERTISED_100baseT_Full) { + } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + common)) { phydev->speed = SPEED_100; phydev->duplex = DUPLEX_FULL; - } else if (common & ADVERTISED_100baseT_Half) { + } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + common)) { phydev->speed = SPEED_100; phydev->duplex = DUPLEX_HALF; - } else if (common & ADVERTISED_10baseT_Full) { + } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + common)) { phydev->speed = SPEED_10; phydev->duplex = DUPLEX_FULL; - } else if (common & ADVERTISED_10baseT_Half) { + } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + common)) { phydev->speed = SPEED_10; phydev->duplex = DUPLEX_HALF; } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 627e66ab60eb..ecc8a7d5306c 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -179,11 +179,9 @@ EXPORT_SYMBOL(phy_aneg_done); * settings were found. */ static const struct phy_setting * -phy_find_valid(int speed, int duplex, u32 supported) +phy_find_valid(int speed, int duplex, unsigned long *supported) { - unsigned long mask = supported; - - return phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, false); + return phy_lookup_setting(speed, duplex, supported, false); } /** @@ -200,9 +198,7 @@ unsigned int phy_supported_speeds(struct phy_device *phy, unsigned int *speeds, unsigned int size) { - unsigned long supported = phy->supported; - - return phy_speeds(speeds, size, &supported, BITS_PER_LONG); + return phy_speeds(speeds, size, phy->supported); } /** @@ -214,11 +210,10 @@ unsigned int phy_supported_speeds(struct phy_device *phy, * * Description: Returns true if there is a valid setting, false otherwise. */ -static inline bool phy_check_valid(int speed, int duplex, u32 features) +static inline bool phy_check_valid(int speed, int duplex, + unsigned long *features) { - unsigned long mask = features; - - return !!phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, true); + return !!phy_lookup_setting(speed, duplex, features, true); } /** @@ -232,13 +227,13 @@ static inline bool phy_check_valid(int speed, int duplex, u32 features) static void phy_sanitize_settings(struct phy_device *phydev) { const struct phy_setting *setting; - u32 features = phydev->supported; /* Sanitize settings based on PHY capabilities */ - if ((features & SUPPORTED_Autoneg) == 0) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported)) phydev->autoneg = AUTONEG_DISABLE; - setting = phy_find_valid(phydev->speed, phydev->duplex, features); + setting = phy_find_valid(phydev->speed, phydev->duplex, + phydev->supported); if (setting) { phydev->speed = setting->speed; phydev->duplex = setting->duplex; @@ -264,13 +259,15 @@ static void phy_sanitize_settings(struct phy_device *phydev) */ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); u32 speed = ethtool_cmd_speed(cmd); if (cmd->phy_address != phydev->mdio.addr) return -EINVAL; /* We make sure that we don't pass unsupported values in to the PHY */ - cmd->advertising &= phydev->supported; + ethtool_convert_legacy_u32_to_link_mode(advertising, cmd->advertising); + linkmode_and(advertising, advertising, phydev->supported); /* Verify the settings we care about. */ if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) @@ -291,12 +288,14 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) phydev->speed = speed; - phydev->advertising = cmd->advertising; + linkmode_copy(phydev->advertising, advertising); if (AUTONEG_ENABLE == cmd->autoneg) - phydev->advertising |= ADVERTISED_Autoneg; + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising); else - phydev->advertising &= ~ADVERTISED_Autoneg; + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising); phydev->duplex = cmd->duplex; @@ -312,19 +311,18 @@ EXPORT_SYMBOL(phy_ethtool_sset); int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); u8 autoneg = cmd->base.autoneg; u8 duplex = cmd->base.duplex; u32 speed = cmd->base.speed; - u32 advertising; if (cmd->base.phy_address != phydev->mdio.addr) return -EINVAL; - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); + linkmode_copy(advertising, cmd->link_modes.advertising); /* We make sure that we don't pass unsupported values in to the PHY */ - advertising &= phydev->supported; + linkmode_and(advertising, advertising, phydev->supported); /* Verify the settings we care about. */ if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) @@ -345,12 +343,14 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, phydev->speed = speed; - phydev->advertising = advertising; + linkmode_copy(phydev->advertising, advertising); if (autoneg == AUTONEG_ENABLE) - phydev->advertising |= ADVERTISED_Autoneg; + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising); else - phydev->advertising &= ~ADVERTISED_Autoneg; + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising); phydev->duplex = duplex; @@ -366,11 +366,8 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_set); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - phydev->supported); - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - phydev->advertising); + linkmode_copy(cmd->link_modes.supported, phydev->supported); + linkmode_copy(cmd->link_modes.advertising, phydev->advertising); ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, phydev->lp_advertising); @@ -442,7 +439,8 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) } break; case MII_ADVERTISE: - phydev->advertising = mii_adv_to_ethtool_adv_t(val); + mii_adv_to_linkmode_adv_t(phydev->advertising, + val); change_autoneg = true; break; default: @@ -604,20 +602,38 @@ static int phy_poll_aneg_done(struct phy_device *phydev) */ int phy_speed_down(struct phy_device *phydev, bool sync) { - u32 adv = phydev->lp_advertising & phydev->supported; - u32 adv_old = phydev->advertising; + __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); + __ETHTOOL_DECLARE_LINK_MODE_MASK(adv); int ret; if (phydev->autoneg != AUTONEG_ENABLE) return 0; - if (adv & PHY_10BT_FEATURES) - phydev->advertising &= ~(PHY_100BT_FEATURES | - PHY_1000BT_FEATURES); - else if (adv & PHY_100BT_FEATURES) - phydev->advertising &= ~PHY_1000BT_FEATURES; + linkmode_copy(adv_old, phydev->advertising); + ethtool_convert_legacy_u32_to_link_mode(adv, phydev->lp_advertising); + linkmode_and(adv, adv, phydev->supported); + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, adv) || + linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, adv)) { + linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->advertising); + } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + adv) || + linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + adv)) { + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->advertising); + } - if (phydev->advertising == adv_old) + if (linkmode_equal(phydev->advertising, adv_old)) return 0; ret = phy_config_aneg(phydev); @@ -636,15 +652,30 @@ EXPORT_SYMBOL_GPL(phy_speed_down); */ int phy_speed_up(struct phy_device *phydev) { - u32 mask = PHY_10BT_FEATURES | PHY_100BT_FEATURES | PHY_1000BT_FEATURES; - u32 adv_old = phydev->advertising; + __ETHTOOL_DECLARE_LINK_MODE_MASK(all_speeds) = { 0, }; + __ETHTOOL_DECLARE_LINK_MODE_MASK(not_speeds); + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); + __ETHTOOL_DECLARE_LINK_MODE_MASK(speeds); + + linkmode_copy(adv_old, phydev->advertising); if (phydev->autoneg != AUTONEG_ENABLE) return 0; - phydev->advertising = (adv_old & ~mask) | (phydev->supported & mask); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, all_speeds); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, all_speeds); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, all_speeds); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, all_speeds); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, all_speeds); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, all_speeds); - if (phydev->advertising == adv_old) + linkmode_andnot(not_speeds, adv_old, all_speeds); + linkmode_copy(supported, phydev->supported); + linkmode_and(speeds, supported, all_speeds); + linkmode_or(phydev->advertising, not_speeds, speeds); + + if (linkmode_equal(phydev->advertising, adv_old)) return 0; return phy_config_aneg(phydev); @@ -973,6 +1004,30 @@ void phy_mac_interrupt(struct phy_device *phydev) } EXPORT_SYMBOL(phy_mac_interrupt); +static void mmd_eee_adv_to_linkmode(unsigned long *advertising, u16 eee_adv) +{ + linkmode_zero(advertising); + + if (eee_adv & MDIO_EEE_100TX) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + advertising); + if (eee_adv & MDIO_EEE_1000T) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + advertising); + if (eee_adv & MDIO_EEE_10GT) + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + advertising); + if (eee_adv & MDIO_EEE_1000KX) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + advertising); + if (eee_adv & MDIO_EEE_10GKX4) + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + advertising); + if (eee_adv & MDIO_EEE_10GKR) + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + advertising); +} + /** * phy_init_eee - init and check the EEE feature * @phydev: target phy_device struct @@ -991,9 +1046,12 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) /* According to 802.3az,the EEE is supported only in full duplex-mode. */ if (phydev->duplex == DUPLEX_FULL) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(common); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp); + __ETHTOOL_DECLARE_LINK_MODE_MASK(adv); int eee_lp, eee_cap, eee_adv; - u32 lp, cap, adv; int status; + u32 cap; /* Read phy status to properly get the right settings */ status = phy_read_status(phydev); @@ -1020,9 +1078,11 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) if (eee_adv <= 0) goto eee_exit_err; - adv = mmd_eee_adv_to_ethtool_adv_t(eee_adv); - lp = mmd_eee_adv_to_ethtool_adv_t(eee_lp); - if (!phy_check_valid(phydev->speed, phydev->duplex, lp & adv)) + mmd_eee_adv_to_linkmode(adv, eee_adv); + mmd_eee_adv_to_linkmode(lp, eee_lp); + linkmode_and(common, adv, lp); + + if (!phy_check_valid(phydev->speed, phydev->duplex, common)) goto eee_exit_err; if (clk_stop_enable) { diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0f56d408b033..09a1c2d835b2 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -66,10 +66,12 @@ static const int phy_basic_ports_array[] = { ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_MII_BIT, }; +EXPORT_SYMBOL_GPL(phy_basic_ports_array); static const int phy_fibre_port_array[] = { ETHTOOL_LINK_MODE_FIBRE_BIT, }; +EXPORT_SYMBOL_GPL(phy_fibre_port_array); static const int phy_all_ports_features_array[] = { ETHTOOL_LINK_MODE_Autoneg_BIT, @@ -80,27 +82,32 @@ static const int phy_all_ports_features_array[] = { ETHTOOL_LINK_MODE_BNC_BIT, ETHTOOL_LINK_MODE_Backplane_BIT, }; +EXPORT_SYMBOL_GPL(phy_all_ports_features_array); -static const int phy_10_100_features_array[] = { +const int phy_10_100_features_array[4] = { ETHTOOL_LINK_MODE_10baseT_Half_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT, }; +EXPORT_SYMBOL_GPL(phy_10_100_features_array); -static const int phy_basic_t1_features_array[] = { +const int phy_basic_t1_features_array[2] = { ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT, }; +EXPORT_SYMBOL_GPL(phy_basic_t1_features_array); -static const int phy_gbit_features_array[] = { +const int phy_gbit_features_array[2] = { ETHTOOL_LINK_MODE_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT, }; +EXPORT_SYMBOL_GPL(phy_gbit_features_array); -static const int phy_10gbit_features_array[] = { +const int phy_10gbit_features_array[1] = { ETHTOOL_LINK_MODE_10000baseT_Full_BIT, }; +EXPORT_SYMBOL_GPL(phy_10gbit_features_array); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_full_features); @@ -1441,8 +1448,13 @@ static int genphy_config_advert(struct phy_device *phydev) int err, changed = 0; /* Only allow advertising what this PHY supports */ - phydev->advertising &= phydev->supported; - advertise = phydev->advertising; + linkmode_and(phydev->advertising, phydev->advertising, + phydev->supported); + if (!ethtool_convert_link_mode_to_legacy_u32(&advertise, + phydev->advertising)) + phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, + phydev->advertising); /* Setup standard advertisement */ adv = phy_read(phydev, MII_ADVERTISE); @@ -1481,10 +1493,11 @@ static int genphy_config_advert(struct phy_device *phydev) oldadv = adv; adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); - if (phydev->supported & (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + phydev->supported) || + linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->supported)) adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); - } if (adv != oldadv) changed = 1; @@ -1692,8 +1705,10 @@ int genphy_read_status(struct phy_device *phydev) phydev->lp_advertising = 0; if (AUTONEG_ENABLE == phydev->autoneg) { - if (phydev->supported & (SUPPORTED_1000baseT_Half - | SUPPORTED_1000baseT_Full)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + phydev->supported) || + linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->supported)) { lpagb = phy_read(phydev, MII_STAT1000); if (lpagb < 0) return lpagb; @@ -1800,11 +1815,13 @@ EXPORT_SYMBOL(genphy_soft_reset); int genphy_config_init(struct phy_device *phydev) { int val; - u32 features; + __ETHTOOL_DECLARE_LINK_MODE_MASK(features) = { 0, }; - features = (SUPPORTED_TP | SUPPORTED_MII - | SUPPORTED_AUI | SUPPORTED_FIBRE | - SUPPORTED_BNC | SUPPORTED_Pause | SUPPORTED_Asym_Pause); + linkmode_set_bit_array(phy_basic_ports_array, + ARRAY_SIZE(phy_basic_ports_array), + features); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, features); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, features); /* Do we support autonegotiation? */ val = phy_read(phydev, MII_BMSR); @@ -1812,16 +1829,16 @@ int genphy_config_init(struct phy_device *phydev) return val; if (val & BMSR_ANEGCAPABLE) - features |= SUPPORTED_Autoneg; + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, features); if (val & BMSR_100FULL) - features |= SUPPORTED_100baseT_Full; + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, features); if (val & BMSR_100HALF) - features |= SUPPORTED_100baseT_Half; + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, features); if (val & BMSR_10FULL) - features |= SUPPORTED_10baseT_Full; + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, features); if (val & BMSR_10HALF) - features |= SUPPORTED_10baseT_Half; + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, features); if (val & BMSR_ESTATEN) { val = phy_read(phydev, MII_ESTATUS); @@ -1829,13 +1846,15 @@ int genphy_config_init(struct phy_device *phydev) return val; if (val & ESTATUS_1000_TFULL) - features |= SUPPORTED_1000baseT_Full; + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + features); if (val & ESTATUS_1000_THALF) - features |= SUPPORTED_1000baseT_Half; + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + features); } - phydev->supported &= features; - phydev->advertising &= features; + linkmode_and(phydev->supported, phydev->supported, features); + linkmode_and(phydev->advertising, phydev->advertising, features); return 0; } @@ -1879,20 +1898,37 @@ EXPORT_SYMBOL(genphy_loopback); static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { - phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | - PHY_10BT_FEATURES); + __ETHTOOL_DECLARE_LINK_MODE_MASK(speeds) = { 0, }; + + linkmode_set_bit_array(phy_10_100_features_array, + ARRAY_SIZE(phy_10_100_features_array), + speeds); + linkmode_set_bit_array(phy_gbit_features_array, + ARRAY_SIZE(phy_gbit_features_array), + speeds); + + linkmode_andnot(phydev->supported, phydev->supported, speeds); switch (max_speed) { default: return -ENOTSUPP; case SPEED_1000: - phydev->supported |= PHY_1000BT_FEATURES; + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->supported); /* fall through */ case SPEED_100: - phydev->supported |= PHY_100BT_FEATURES; + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + phydev->supported); /* fall through */ case SPEED_10: - phydev->supported |= PHY_10BT_FEATURES; + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + phydev->supported); } return 0; @@ -1906,7 +1942,7 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) if (err) return err; - phydev->advertising = phydev->supported; + linkmode_copy(phydev->advertising, phydev->supported); return 0; } @@ -1923,10 +1959,8 @@ EXPORT_SYMBOL(phy_set_max_speed); */ void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode) { - WARN_ON(link_mode > 31); - - phydev->supported &= ~BIT(link_mode); - phydev->advertising = phydev->supported; + linkmode_clear_bit(link_mode, phydev->supported); + linkmode_copy(phydev->advertising, phydev->supported); } EXPORT_SYMBOL(phy_remove_link_mode); @@ -1939,9 +1973,9 @@ EXPORT_SYMBOL(phy_remove_link_mode); */ void phy_support_sym_pause(struct phy_device *phydev) { - phydev->supported &= ~SUPPORTED_Asym_Pause; - phydev->supported |= SUPPORTED_Pause; - phydev->advertising = phydev->supported; + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); + linkmode_copy(phydev->advertising, phydev->supported); } EXPORT_SYMBOL(phy_support_sym_pause); @@ -1953,8 +1987,9 @@ EXPORT_SYMBOL(phy_support_sym_pause); */ void phy_support_asym_pause(struct phy_device *phydev) { - phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - phydev->advertising = phydev->supported; + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); + linkmode_copy(phydev->advertising, phydev->supported); } EXPORT_SYMBOL(phy_support_asym_pause); @@ -1972,12 +2007,13 @@ EXPORT_SYMBOL(phy_support_asym_pause); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg) { - phydev->supported &= ~SUPPORTED_Pause; + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); if (rx && tx && autoneg) - phydev->supported |= SUPPORTED_Pause; + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->supported); - phydev->advertising = phydev->supported; + linkmode_copy(phydev->advertising, phydev->supported); } EXPORT_SYMBOL(phy_set_sym_pause); @@ -1994,20 +2030,29 @@ EXPORT_SYMBOL(phy_set_sym_pause); */ void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx) { - u16 oldadv = phydev->advertising; - u16 newadv = oldadv &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause); + __ETHTOOL_DECLARE_LINK_MODE_MASK(oldadv); - if (rx) - newadv |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - if (tx) - newadv ^= SUPPORTED_Asym_Pause; + linkmode_copy(oldadv, phydev->advertising); - if (oldadv != newadv) { - phydev->advertising = newadv; + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->advertising); - if (phydev->autoneg) - phy_start_aneg(phydev); + if (rx) { + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->advertising); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->advertising); } + + if (tx) + linkmode_change_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->advertising); + + if (!linkmode_equal(oldadv, phydev->advertising) && + phydev->autoneg) + phy_start_aneg(phydev); } EXPORT_SYMBOL(phy_set_asym_pause); @@ -2023,8 +2068,10 @@ EXPORT_SYMBOL(phy_set_asym_pause); bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp) { - if (!(phydev->supported & SUPPORTED_Pause) || - (!(phydev->supported & SUPPORTED_Asym_Pause) && + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->supported) || + (!linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->supported) && pp->rx_pause != pp->tx_pause)) return false; return true; @@ -2112,9 +2159,9 @@ static int phy_probe(struct device *dev) * or both of these values */ ethtool_convert_link_mode_to_legacy_u32(&features, phydrv->features); - phydev->supported = features; + linkmode_copy(phydev->supported, phydrv->features); of_set_phy_supported(phydev); - phydev->advertising = phydev->supported; + linkmode_copy(phydev->advertising, phydev->supported); /* Get the EEE modes we want to prohibit. We will ask * the PHY stop advertising these mode later on @@ -2134,14 +2181,22 @@ static int phy_probe(struct device *dev) */ if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features) || test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) { - phydev->supported &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->supported); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->supported); if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features)) - phydev->supported |= SUPPORTED_Pause; + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->supported); if (test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) - phydev->supported |= SUPPORTED_Asym_Pause; + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->supported); } else { - phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->supported); } /* Set the state to READY by default */ diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 9b8dd0d0ee42..e7becc7379d7 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -191,8 +191,7 @@ static int phylink_parse_fixedlink(struct phylink *pl, phylink_validate(pl, pl->supported, &pl->link_config); s = phy_lookup_setting(pl->link_config.speed, pl->link_config.duplex, - pl->supported, - __ETHTOOL_LINK_MODE_MASK_NBITS, true); + pl->supported, true); linkmode_zero(pl->supported); phylink_set(pl->supported, MII); if (s) { @@ -634,13 +633,11 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) { struct phylink_link_state config; __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); - u32 advertising; int ret; memset(&config, 0, sizeof(config)); - ethtool_convert_legacy_u32_to_link_mode(supported, phy->supported); - ethtool_convert_legacy_u32_to_link_mode(config.advertising, - phy->advertising); + linkmode_copy(supported, phy->supported); + linkmode_copy(config.advertising, phy->advertising); config.interface = pl->link_config.interface; /* @@ -673,15 +670,14 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) linkmode_copy(pl->link_config.advertising, config.advertising); /* Restrict the phy advertisement according to the MAC support. */ - ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising); - phy->advertising = advertising; + linkmode_copy(phy->advertising, config.advertising); mutex_unlock(&pl->state_mutex); mutex_unlock(&phy->lock); netdev_dbg(pl->netdev, - "phy: setting supported %*pb advertising 0x%08x\n", + "phy: setting supported %*pb advertising %*pb\n", __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported, - phy->advertising); + __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising); phy_start_machine(phy); if (phy->irq > 0) @@ -1088,8 +1084,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, * duplex. */ s = phy_lookup_setting(kset->base.speed, kset->base.duplex, - pl->supported, - __ETHTOOL_LINK_MODE_MASK_NBITS, false); + pl->supported, false); if (!s) return -EINVAL; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index be1917be28f2..3c8bdac78866 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1586,18 +1587,17 @@ static int lan78xx_set_pause(struct net_device *net, dev->fc_request_control |= FLOW_CTRL_TX; if (ecmd.base.autoneg) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(fc) = { 0, }; u32 mii_adv; - u32 advertising; - ethtool_convert_link_mode_to_legacy_u32( - &advertising, ecmd.link_modes.advertising); - - advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + ecmd.link_modes.advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ecmd.link_modes.advertising); mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control); - advertising |= mii_adv_to_ethtool_adv_t(mii_adv); - - ethtool_convert_legacy_u32_to_link_mode( - ecmd.link_modes.advertising, advertising); + mii_adv_to_linkmode_adv_t(fc, mii_adv); + linkmode_or(ecmd.link_modes.advertising, fc, + ecmd.link_modes.advertising); phy_ethtool_ksettings_set(phydev, &ecmd); } @@ -2095,6 +2095,7 @@ static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev) static int lan78xx_phy_init(struct lan78xx_net *dev) { + __ETHTOOL_DECLARE_LINK_MODE_MASK(fc) = { 0, }; int ret; u32 mii_adv; struct phy_device *phydev; @@ -2158,9 +2159,13 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) /* support both flow controls */ dev->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX); - phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->advertising); mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control); - phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv); + mii_adv_to_linkmode_adv_t(fc, mii_adv); + linkmode_or(phydev->advertising, fc, phydev->advertising); if (phydev->mdio.dev.of_node) { u32 reg; diff --git a/include/linux/mii.h b/include/linux/mii.h index 2da85b02e1c0..aaa458bbef2a 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -385,19 +385,21 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, } /** - * ethtool_adv_to_lcl_adv_t - * @advertising:pointer to ethtool advertising + * linkmode_adv_to_lcl_adv_t + * @advertising:pointer to linkmode advertising * - * A small helper function that translates ethtool advertising to LVL + * A small helper function that translates linkmode advertising to LVL * pause capabilities. */ -static inline u32 ethtool_adv_to_lcl_adv_t(u32 advertising) +static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) { u32 lcl_adv = 0; - if (advertising & ADVERTISED_Pause) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + advertising)) lcl_adv |= ADVERTISE_PAUSE_CAP; - if (advertising & ADVERTISED_Asym_Pause) + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + advertising)) lcl_adv |= ADVERTISE_PAUSE_ASYM; return lcl_adv; diff --git a/include/linux/phy.h b/include/linux/phy.h index a5bcb4aaa48e..cbc66ac3b560 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -58,6 +58,11 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) +extern const int phy_10_100_features_array[4]; +extern const int phy_basic_t1_features_array[2]; +extern const int phy_gbit_features_array[2]; +extern const int phy_10gbit_features_array[1]; + /* * Set phydev->irq to PHY_POLL if interrupts are not supported, * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if @@ -405,10 +410,11 @@ struct phy_device { int pause; int asym_pause; - /* Union of PHY and Attached devices' supported modes */ - /* See mii.h for more info */ - u32 supported; - u32 advertising; + /* Union of PHY and Attached devices' supported link modes */ + /* See ethtool.h for more info */ + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + u32 lp_advertising; /* Energy efficient ethernet modes which should be prohibited */ @@ -660,9 +666,9 @@ struct phy_setting { const struct phy_setting * phy_lookup_setting(int speed, int duplex, const unsigned long *mask, - size_t maxbit, bool exact); + bool exact); size_t phy_speeds(unsigned int *speeds, size_t size, - unsigned long *mask, size_t maxbit); + unsigned long *mask); void phy_resolve_aneg_linkmode(struct phy_device *phydev); -- cgit v1.2.3 From c0ec3c2736774c69bf5c641aea7712132c0f0eba Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 10 Nov 2018 23:43:34 +0100 Subject: net: phy: Convert u32 phydev->lp_advertising to linkmode Convert phy drivers to report the link partner advertised modes using a linkmode bitmap. This allows them to report the higher speeds which don't fit in a u32. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/lxt.c | 4 ++-- drivers/net/phy/marvell.c | 26 ++++++++++++-------------- drivers/net/phy/marvell10g.c | 4 ++-- drivers/net/phy/phy-c45.c | 5 +++-- drivers/net/phy/phy-core.c | 13 ++++++------- drivers/net/phy/phy.c | 8 +++----- drivers/net/phy/phy_device.c | 8 ++++---- drivers/net/phy/uPD60620.c | 6 +++--- include/linux/mii.h | 36 ++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 3 +-- 10 files changed, 72 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index c9e2c84c25c0..c8bb29ae1a2a 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -177,7 +177,7 @@ static int lxt973a2_read_status(struct phy_device *phydev) */ } while (lpa == adv && retry--); - phydev->lp_advertising = mii_lpa_to_ethtool_lpa_t(lpa); + mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); lpa &= adv; @@ -218,7 +218,7 @@ static int lxt973a2_read_status(struct phy_device *phydev) phydev->speed = SPEED_10; phydev->pause = phydev->asym_pause = 0; - phydev->lp_advertising = 0; + linkmode_zero(phydev->lp_advertising); } return 0; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 96f33831ea99..36a0db86c6f4 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1049,22 +1049,21 @@ static int m88e1145_config_init(struct phy_device *phydev) } /** - * fiber_lpa_to_ethtool_lpa_t + * fiber_lpa_to_linkmode_lpa_t + * @advertising: the linkmode advertisement settings * @lpa: value of the MII_LPA register for fiber link * * A small helper function that translates MII_LPA - * bits to ethtool LP advertisement settings. + * bits to linkmode LP advertisement settings. */ -static u32 fiber_lpa_to_ethtool_lpa_t(u32 lpa) +static void fiber_lpa_to_linkmode_lpa_t(unsigned long *advertising, u32 lpa) { - u32 result = 0; - if (lpa & LPA_FIBER_1000HALF) - result |= ADVERTISED_1000baseT_Half; + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + advertising); if (lpa & LPA_FIBER_1000FULL) - result |= ADVERTISED_1000baseT_Full; - - return result; + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + advertising); } /** @@ -1140,9 +1139,8 @@ static int marvell_read_status_page_an(struct phy_device *phydev, } if (!fiber) { - phydev->lp_advertising = - mii_stat1000_to_ethtool_lpa_t(lpagb) | - mii_lpa_to_ethtool_lpa_t(lpa); + mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); + mii_stat1000_to_linkmode_lpa_t(phydev->lp_advertising, lpagb); if (phydev->duplex == DUPLEX_FULL) { phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; @@ -1150,7 +1148,7 @@ static int marvell_read_status_page_an(struct phy_device *phydev, } } else { /* The fiber link is only 1000M capable */ - phydev->lp_advertising = fiber_lpa_to_ethtool_lpa_t(lpa); + fiber_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); if (phydev->duplex == DUPLEX_FULL) { if (!(lpa & LPA_PAUSE_FIBER)) { @@ -1189,7 +1187,7 @@ static int marvell_read_status_page_fixed(struct phy_device *phydev) phydev->pause = 0; phydev->asym_pause = 0; - phydev->lp_advertising = 0; + linkmode_zero(phydev->lp_advertising); return 0; } diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index d939dce16b35..6f6e886fc836 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -457,7 +457,7 @@ static int mv3310_read_status(struct phy_device *phydev) phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; - phydev->lp_advertising = 0; + linkmode_zero(phydev->lp_advertising); phydev->link = 0; phydev->pause = 0; phydev->asym_pause = 0; @@ -490,7 +490,7 @@ static int mv3310_read_status(struct phy_device *phydev) if (val < 0) return val; - phydev->lp_advertising |= mii_stat1000_to_ethtool_lpa_t(val); + mii_stat1000_to_linkmode_lpa_t(phydev->lp_advertising, val); if (phydev->autoneg == AUTONEG_ENABLE) phy_resolve_aneg_linkmode(phydev); diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index a19f4dfa7470..03af927fa5ad 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -181,7 +181,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev) if (val < 0) return val; - phydev->lp_advertising = mii_lpa_to_ethtool_lpa_t(val); + mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, val); phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0; phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0; @@ -191,7 +191,8 @@ int genphy_c45_read_lpa(struct phy_device *phydev) return val; if (val & MDIO_AN_10GBT_STAT_LP10G) - phydev->lp_advertising |= ADVERTISED_10000baseT_Full; + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + phydev->lp_advertising); return 0; } diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 9d192b660b07..2c3a13d1c421 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -199,11 +199,8 @@ size_t phy_speeds(unsigned int *speeds, size_t size, void phy_resolve_aneg_linkmode(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(common); - __ETHTOOL_DECLARE_LINK_MODE_MASK(lp); - ethtool_convert_legacy_u32_to_link_mode(lp, phydev->lp_advertising); - - linkmode_and(common, lp, phydev->advertising); + linkmode_and(common, phydev->lp_advertising, phydev->advertising); if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, common)) { phydev->speed = SPEED_10000; @@ -235,9 +232,11 @@ void phy_resolve_aneg_linkmode(struct phy_device *phydev) } if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = !!(phydev->lp_advertising & ADVERTISED_Pause); - phydev->asym_pause = !!(phydev->lp_advertising & - ADVERTISED_Asym_Pause); + phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->lp_advertising); + phydev->asym_pause = linkmode_test_bit( + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->lp_advertising); } } EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ecc8a7d5306c..d73873334e47 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -368,9 +368,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, { linkmode_copy(cmd->link_modes.supported, phydev->supported); linkmode_copy(cmd->link_modes.advertising, phydev->advertising); - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, - phydev->lp_advertising); + linkmode_copy(cmd->link_modes.lp_advertising, phydev->lp_advertising); cmd->base.speed = phydev->speed; cmd->base.duplex = phydev->duplex; @@ -549,7 +547,7 @@ int phy_start_aneg(struct phy_device *phydev) phy_sanitize_settings(phydev); /* Invalidate LP advertising flags */ - phydev->lp_advertising = 0; + linkmode_zero(phydev->lp_advertising); err = phy_config_aneg(phydev); if (err < 0) @@ -610,7 +608,7 @@ int phy_speed_down(struct phy_device *phydev, bool sync) return 0; linkmode_copy(adv_old, phydev->advertising); - ethtool_convert_legacy_u32_to_link_mode(adv, phydev->lp_advertising); + linkmode_copy(adv, phydev->lp_advertising); linkmode_and(adv, adv, phydev->supported); if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, adv) || diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 09a1c2d835b2..55202a0ac476 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1702,7 +1702,7 @@ int genphy_read_status(struct phy_device *phydev) if (err) return err; - phydev->lp_advertising = 0; + linkmode_zero(phydev->lp_advertising); if (AUTONEG_ENABLE == phydev->autoneg) { if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, @@ -1725,8 +1725,8 @@ int genphy_read_status(struct phy_device *phydev) return -ENOLINK; } - phydev->lp_advertising = - mii_stat1000_to_ethtool_lpa_t(lpagb); + mii_stat1000_to_linkmode_lpa_t(phydev->lp_advertising, + lpagb); common_adv_gb = lpagb & adv << 2; } @@ -1734,7 +1734,7 @@ int genphy_read_status(struct phy_device *phydev) if (lpa < 0) return lpa; - phydev->lp_advertising |= mii_lpa_to_ethtool_lpa_t(lpa); + mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); adv = phy_read(phydev, MII_ADVERTISE); if (adv < 0) diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c index 55f48ee3595a..1e4fc42e4629 100644 --- a/drivers/net/phy/uPD60620.c +++ b/drivers/net/phy/uPD60620.c @@ -47,7 +47,7 @@ static int upd60620_read_status(struct phy_device *phydev) return phy_state; phydev->link = 0; - phydev->lp_advertising = 0; + linkmode_zero(phydev->lp_advertising); phydev->pause = 0; phydev->asym_pause = 0; @@ -70,8 +70,8 @@ static int upd60620_read_status(struct phy_device *phydev) if (phy_state < 0) return phy_state; - phydev->lp_advertising - = mii_lpa_to_ethtool_lpa_t(phy_state); + mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, + phy_state); if (phydev->duplex == DUPLEX_FULL) { if (phy_state & LPA_PAUSE_CAP) diff --git a/include/linux/mii.h b/include/linux/mii.h index aaa458bbef2a..e7112e878bb0 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -287,6 +287,25 @@ static inline u32 mii_stat1000_to_ethtool_lpa_t(u32 lpa) return result; } +/** + * mii_stat1000_to_linkmode_lpa_t + * @advertising: target the linkmode advertisement settings + * @adv: value of the MII_STAT1000 register + * + * A small helper function that translates MII_STAT1000 bits, when in + * 1000Base-T mode, to linkmode advertisement settings. + */ +static inline void mii_stat1000_to_linkmode_lpa_t(unsigned long *advertising, + u32 lpa) +{ + if (lpa & LPA_1000HALF) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + advertising); + if (lpa & LPA_1000FULL) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + advertising); +} + /** * ethtool_adv_to_mii_adv_x * @ethadv: the ethtool advertisement settings @@ -384,6 +403,23 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising); } +/** + * mii_lpa_to_linkmode_lpa_t + * @adv: value of the MII_LPA register + * + * A small helper function that translates MII_LPA bits, when in + * 1000Base-T mode, to linkmode LP advertisement settings. + */ +static inline void mii_lpa_to_linkmode_lpa_t(unsigned long *lp_advertising, + u32 lpa) +{ + if (lpa & LPA_LPACK) + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + lp_advertising); + + mii_adv_to_linkmode_adv_t(lp_advertising, lpa); +} + /** * linkmode_adv_to_lcl_adv_t * @advertising:pointer to linkmode advertising diff --git a/include/linux/phy.h b/include/linux/phy.h index cbc66ac3b560..8f927246acdb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -414,8 +414,7 @@ struct phy_device { /* See ethtool.h for more info */ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); - - u32 lp_advertising; + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; -- cgit v1.2.3 From fe1919147c69c3b820f801eb99bcc50cec0fb5a5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 10 Nov 2018 23:43:35 +0100 Subject: net: phy: Fixup kerneldoc markup. Add missing markup for function parameters Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index e7112e878bb0..fb7ae4ae8ce3 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -209,7 +209,7 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv) /** * linkmode_adv_to_mii_ctrl1000_t - * advertising: the linkmode advertisement settings + * @advertising: the linkmode advertisement settings * * A small helper function that translates linkmode advertisement * settings to phy autonegotiation advertisements for the -- cgit v1.2.3 From 9206eb0bc5679d06d2f54b9db86fe2b9a55e07e4 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 11 Nov 2018 20:31:21 +0100 Subject: PCI: add USR vendor id and use it in r8169 and w6692 driver The PCI vendor id of U.S. Robotics isn't defined in pci_ids.h so far, only ISDN driver w6692 has a private definition. Move the definition to pci_ids.h and use it in the r8169 driver too. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/w6692.c | 3 --- drivers/net/ethernet/realtek/r8169.c | 2 +- include/linux/pci_ids.h | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index 5acf6ab67cd3..6f60aced11c5 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c @@ -52,10 +52,7 @@ static const struct w6692map w6692_map[] = {W6692_USR, "USR W6692"} }; -#ifndef PCI_VENDOR_ID_USR -#define PCI_VENDOR_ID_USR 0x16ec #define PCI_DEVICE_ID_USR_6692 0x3409 -#endif struct w6692_ch { struct bchannel bch; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 56de045268f8..b3010cc51cdd 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -224,7 +224,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4302), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_AT, 0xc107), 0, 0, RTL_CFG_0 }, - { PCI_DEVICE(0x16ec, 0x0116), 0, 0, RTL_CFG_0 }, + { PCI_DEVICE(PCI_VENDOR_ID_USR, 0x0116), 0, 0, RTL_CFG_0 }, { PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0024, 0, 0, RTL_CFG_0 }, { 0x0001, 0x8168, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 69f0abe1ba1a..144de2e89531 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2359,6 +2359,8 @@ #define PCI_VENDOR_ID_SYNOPSYS 0x16c3 +#define PCI_VENDOR_ID_USR 0x16ec + #define PCI_VENDOR_ID_VITESSE 0x1725 #define PCI_DEVICE_ID_VITESSE_VSC7174 0x7174 -- cgit v1.2.3 From 3a379bbcea0af6280e1ca0d1edfcf4e68cde6ee0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 19 Jul 2017 11:52:29 +0200 Subject: i3c: Add core I3C infrastructure Add core infrastructure to support I3C in Linux and document it. This infrastructure adds basic I3C support. Advanced features will be added afterwards. There are a few design choices that are worth mentioning because they impact the way I3C device drivers can interact with their devices: - all functions used to send I3C/I2C frames must be called in non-atomic context. Mainly done this way to ease implementation, but this is not set in stone, and if anyone needs async support, new functions can be added later on. - the bus element is a separate object, but it's tightly coupled with the master object. We thus have a 1:1 relationship between i3c_bus and i3c_master_controller objects, and if 2 master controllers are connected to the same bus and both exposed to the same Linux instance they will appear as two distinct busses, and devices on this bus will be exposed twice. - I2C backward compatibility has been designed to be transparent to I2C drivers and the I2C subsystem. The I3C master just registers an I2C adapter which creates a new I2C bus. I'd say that, from a representation PoV it's not ideal because what should appear as a single I3C bus exposing I3C and I2C devices here appears as 2 different buses connected to each other through the parenting (the I3C master is the parent of the I2C and I3C busses). On the other hand, I don't see a better solution if we want something that is not invasive. Missing features: - I3C HDR modes are not supported - no support for multi-master and the associated concepts (mastership handover, support for secondary masters, ...) - I2C devices can only be described using DT because this is the only use case I have. However, the framework can easily be extended with ACPI and board info support - I3C slave framework. This has been completely omitted, but shouldn't have a huge impact on the I3C framework because I3C slaves don't see the whole bus, it's only about handling master requests and generating IBIs. Some of the struct, constant and enum definitions could be shared, but most of the I3C slave framework logic will be different Signed-off-by: Boris Brezillon Reviewed-by: Arnd Bergmann Acked-by: Greg Kroah-Hartman --- drivers/Kconfig | 2 + drivers/Makefile | 2 +- drivers/i3c/Kconfig | 24 + drivers/i3c/Makefile | 4 + drivers/i3c/device.c | 233 ++++ drivers/i3c/internals.h | 26 + drivers/i3c/master.c | 2661 +++++++++++++++++++++++++++++++++++++++ drivers/i3c/master/Kconfig | 0 drivers/i3c/master/Makefile | 0 include/linux/i3c/ccc.h | 385 ++++++ include/linux/i3c/device.h | 331 +++++ include/linux/i3c/master.h | 648 ++++++++++ include/linux/mod_devicetable.h | 17 + 13 files changed, 4332 insertions(+), 1 deletion(-) create mode 100644 drivers/i3c/Kconfig create mode 100644 drivers/i3c/Makefile create mode 100644 drivers/i3c/device.c create mode 100644 drivers/i3c/internals.h create mode 100644 drivers/i3c/master.c create mode 100644 drivers/i3c/master/Kconfig create mode 100644 drivers/i3c/master/Makefile create mode 100644 include/linux/i3c/ccc.h create mode 100644 include/linux/i3c/device.h create mode 100644 include/linux/i3c/master.h (limited to 'include/linux') diff --git a/drivers/Kconfig b/drivers/Kconfig index ab4d43923c4d..8395bc515996 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -57,6 +57,8 @@ source "drivers/char/Kconfig" source "drivers/i2c/Kconfig" +source "drivers/i3c/Kconfig" + source "drivers/spi/Kconfig" source "drivers/spmi/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 578f469f72fb..e1ce029d28fd 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -111,7 +111,7 @@ obj-$(CONFIG_SERIO) += input/serio/ obj-$(CONFIG_GAMEPORT) += input/gameport/ obj-$(CONFIG_INPUT) += input/ obj-$(CONFIG_RTC_LIB) += rtc/ -obj-y += i2c/ media/ +obj-y += i2c/ i3c/ media/ obj-$(CONFIG_PPS) += pps/ obj-y += ptp/ obj-$(CONFIG_W1) += w1/ diff --git a/drivers/i3c/Kconfig b/drivers/i3c/Kconfig new file mode 100644 index 000000000000..30a441506f61 --- /dev/null +++ b/drivers/i3c/Kconfig @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +menuconfig I3C + tristate "I3C support" + select I2C + help + I3C is a serial protocol standardized by the MIPI alliance. + + It's supposed to be backward compatible with I2C while providing + support for high speed transfers and native interrupt support + without the need for extra pins. + + The I3C protocol also standardizes the slave device types and is + mainly designed to communicate with sensors. + + If you want I3C support, you should say Y here and also to the + specific driver for your bus adapter(s) below. + + This I3C support can also be built as a module. If so, the module + will be called i3c. + +if I3C +source "drivers/i3c/master/Kconfig" +endif # I3C diff --git a/drivers/i3c/Makefile b/drivers/i3c/Makefile new file mode 100644 index 000000000000..11982efbc6d9 --- /dev/null +++ b/drivers/i3c/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +i3c-y := device.o master.o +obj-$(CONFIG_I3C) += i3c.o +obj-$(CONFIG_I3C) += master/ diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c new file mode 100644 index 000000000000..69cc040c3a1c --- /dev/null +++ b/drivers/i3c/device.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Cadence Design Systems Inc. + * + * Author: Boris Brezillon + */ + +#include +#include +#include +#include +#include +#include + +#include "internals.h" + +/** + * i3c_device_do_priv_xfers() - do I3C SDR private transfers directed to a + * specific device + * + * @dev: device with which the transfers should be done + * @xfers: array of transfers + * @nxfers: number of transfers + * + * Initiate one or several private SDR transfers with @dev. + * + * This function can sleep and thus cannot be called in atomic context. + * + * Return: 0 in case of success, a negative error core otherwise. + */ +int i3c_device_do_priv_xfers(struct i3c_device *dev, + struct i3c_priv_xfer *xfers, + int nxfers) +{ + int ret, i; + + if (nxfers < 1) + return 0; + + for (i = 0; i < nxfers; i++) { + if (!xfers[i].len || !xfers[i].data.in) + return -EINVAL; + } + + i3c_bus_normaluse_lock(dev->bus); + ret = i3c_dev_do_priv_xfers_locked(dev->desc, xfers, nxfers); + i3c_bus_normaluse_unlock(dev->bus); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_device_do_priv_xfers); + +/** + * i3c_device_get_info() - get I3C device information + * + * @dev: device we want information on + * @info: the information object to fill in + * + * Retrieve I3C dev info. + */ +void i3c_device_get_info(struct i3c_device *dev, + struct i3c_device_info *info) +{ + if (!info) + return; + + i3c_bus_normaluse_lock(dev->bus); + if (dev->desc) + *info = dev->desc->info; + i3c_bus_normaluse_unlock(dev->bus); +} +EXPORT_SYMBOL_GPL(i3c_device_get_info); + +/** + * i3c_device_disable_ibi() - Disable IBIs coming from a specific device + * @dev: device on which IBIs should be disabled + * + * This function disable IBIs coming from a specific device and wait for + * all pending IBIs to be processed. + * + * Return: 0 in case of success, a negative error core otherwise. + */ +int i3c_device_disable_ibi(struct i3c_device *dev) +{ + int ret = -ENOENT; + + i3c_bus_normaluse_lock(dev->bus); + if (dev->desc) { + mutex_lock(&dev->desc->ibi_lock); + ret = i3c_dev_disable_ibi_locked(dev->desc); + mutex_unlock(&dev->desc->ibi_lock); + } + i3c_bus_normaluse_unlock(dev->bus); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_device_disable_ibi); + +/** + * i3c_device_enable_ibi() - Enable IBIs coming from a specific device + * @dev: device on which IBIs should be enabled + * + * This function enable IBIs coming from a specific device and wait for + * all pending IBIs to be processed. This should be called on a device + * where i3c_device_request_ibi() has succeeded. + * + * Note that IBIs from this device might be received before this function + * returns to its caller. + * + * Return: 0 in case of success, a negative error core otherwise. + */ +int i3c_device_enable_ibi(struct i3c_device *dev) +{ + int ret = -ENOENT; + + i3c_bus_normaluse_lock(dev->bus); + if (dev->desc) { + mutex_lock(&dev->desc->ibi_lock); + ret = i3c_dev_enable_ibi_locked(dev->desc); + mutex_unlock(&dev->desc->ibi_lock); + } + i3c_bus_normaluse_unlock(dev->bus); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_device_enable_ibi); + +/** + * i3c_device_request_ibi() - Request an IBI + * @dev: device for which we should enable IBIs + * @req: setup requested for this IBI + * + * This function is responsible for pre-allocating all resources needed to + * process IBIs coming from @dev. When this function returns, the IBI is not + * enabled until i3c_device_enable_ibi() is called. + * + * Return: 0 in case of success, a negative error core otherwise. + */ +int i3c_device_request_ibi(struct i3c_device *dev, + const struct i3c_ibi_setup *req) +{ + int ret = -ENOENT; + + if (!req->handler || !req->num_slots) + return -EINVAL; + + i3c_bus_normaluse_lock(dev->bus); + if (dev->desc) { + mutex_lock(&dev->desc->ibi_lock); + ret = i3c_dev_request_ibi_locked(dev->desc, req); + mutex_unlock(&dev->desc->ibi_lock); + } + i3c_bus_normaluse_unlock(dev->bus); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_device_request_ibi); + +/** + * i3c_device_free_ibi() - Free all resources needed for IBI handling + * @dev: device on which you want to release IBI resources + * + * This function is responsible for de-allocating resources previously + * allocated by i3c_device_request_ibi(). It should be called after disabling + * IBIs with i3c_device_disable_ibi(). + */ +void i3c_device_free_ibi(struct i3c_device *dev) +{ + i3c_bus_normaluse_lock(dev->bus); + if (dev->desc) { + mutex_lock(&dev->desc->ibi_lock); + i3c_dev_free_ibi_locked(dev->desc); + mutex_unlock(&dev->desc->ibi_lock); + } + i3c_bus_normaluse_unlock(dev->bus); +} +EXPORT_SYMBOL_GPL(i3c_device_free_ibi); + +/** + * i3cdev_to_dev() - Returns the device embedded in @i3cdev + * @i3cdev: I3C device + * + * Return: a pointer to a device object. + */ +struct device *i3cdev_to_dev(struct i3c_device *i3cdev) +{ + return &i3cdev->dev; +} +EXPORT_SYMBOL_GPL(i3cdev_to_dev); + +/** + * dev_to_i3cdev() - Returns the I3C device containing @dev + * @dev: device object + * + * Return: a pointer to an I3C device object. + */ +struct i3c_device *dev_to_i3cdev(struct device *dev) +{ + return container_of(dev, struct i3c_device, dev); +} +EXPORT_SYMBOL_GPL(dev_to_i3cdev); + +/** + * i3c_driver_register_with_owner() - register an I3C device driver + * + * @drv: driver to register + * @owner: module that owns this driver + * + * Register @drv to the core. + * + * Return: 0 in case of success, a negative error core otherwise. + */ +int i3c_driver_register_with_owner(struct i3c_driver *drv, struct module *owner) +{ + drv->driver.owner = owner; + drv->driver.bus = &i3c_bus_type; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(i3c_driver_register_with_owner); + +/** + * i3c_driver_unregister() - unregister an I3C device driver + * + * @drv: driver to unregister + * + * Unregister @drv. + */ +void i3c_driver_unregister(struct i3c_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(i3c_driver_unregister); diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h new file mode 100644 index 000000000000..86b7b44cfca2 --- /dev/null +++ b/drivers/i3c/internals.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Cadence Design Systems Inc. + * + * Author: Boris Brezillon + */ + +#ifndef I3C_INTERNALS_H +#define I3C_INTERNALS_H + +#include + +extern struct bus_type i3c_bus_type; + +void i3c_bus_normaluse_lock(struct i3c_bus *bus); +void i3c_bus_normaluse_unlock(struct i3c_bus *bus); + +int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, + struct i3c_priv_xfer *xfers, + int nxfers); +int i3c_dev_disable_ibi_locked(struct i3c_dev_desc *dev); +int i3c_dev_enable_ibi_locked(struct i3c_dev_desc *dev); +int i3c_dev_request_ibi_locked(struct i3c_dev_desc *dev, + const struct i3c_ibi_setup *req); +void i3c_dev_free_ibi_locked(struct i3c_dev_desc *dev); +#endif /* I3C_INTERNAL_H */ diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c new file mode 100644 index 000000000000..0ea7bb045fad --- /dev/null +++ b/drivers/i3c/master.c @@ -0,0 +1,2661 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Cadence Design Systems Inc. + * + * Author: Boris Brezillon + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internals.h" + +static DEFINE_IDR(i3c_bus_idr); +static DEFINE_MUTEX(i3c_core_lock); + +/** + * i3c_bus_maintenance_lock - Lock the bus for a maintenance operation + * @bus: I3C bus to take the lock on + * + * This function takes the bus lock so that no other operations can occur on + * the bus. This is needed for all kind of bus maintenance operation, like + * - enabling/disabling slave events + * - re-triggering DAA + * - changing the dynamic address of a device + * - relinquishing mastership + * - ... + * + * The reason for this kind of locking is that we don't want drivers and core + * logic to rely on I3C device information that could be changed behind their + * back. + */ +static void i3c_bus_maintenance_lock(struct i3c_bus *bus) +{ + down_write(&bus->lock); +} + +/** + * i3c_bus_maintenance_unlock - Release the bus lock after a maintenance + * operation + * @bus: I3C bus to release the lock on + * + * Should be called when the bus maintenance operation is done. See + * i3c_bus_maintenance_lock() for more details on what these maintenance + * operations are. + */ +static void i3c_bus_maintenance_unlock(struct i3c_bus *bus) +{ + up_write(&bus->lock); +} + +/** + * i3c_bus_normaluse_lock - Lock the bus for a normal operation + * @bus: I3C bus to take the lock on + * + * This function takes the bus lock for any operation that is not a maintenance + * operation (see i3c_bus_maintenance_lock() for a non-exhaustive list of + * maintenance operations). Basically all communications with I3C devices are + * normal operations (HDR, SDR transfers or CCC commands that do not change bus + * state or I3C dynamic address). + * + * Note that this lock is not guaranteeing serialization of normal operations. + * In other words, transfer requests passed to the I3C master can be submitted + * in parallel and I3C master drivers have to use their own locking to make + * sure two different communications are not inter-mixed, or access to the + * output/input queue is not done while the engine is busy. + */ +void i3c_bus_normaluse_lock(struct i3c_bus *bus) +{ + down_read(&bus->lock); +} + +/** + * i3c_bus_normaluse_unlock - Release the bus lock after a normal operation + * @bus: I3C bus to release the lock on + * + * Should be called when a normal operation is done. See + * i3c_bus_normaluse_lock() for more details on what these normal operations + * are. + */ +void i3c_bus_normaluse_unlock(struct i3c_bus *bus) +{ + up_read(&bus->lock); +} + +static struct i3c_master_controller *dev_to_i3cmaster(struct device *dev) +{ + return container_of(dev, struct i3c_master_controller, dev); +} + +static const struct device_type i3c_device_type; + +static struct i3c_bus *dev_to_i3cbus(struct device *dev) +{ + struct i3c_master_controller *master; + + if (dev->type == &i3c_device_type) + return dev_to_i3cdev(dev)->bus; + + master = dev_to_i3cmaster(dev); + + return &master->bus; +} + +static struct i3c_dev_desc *dev_to_i3cdesc(struct device *dev) +{ + struct i3c_master_controller *master; + + if (dev->type == &i3c_device_type) + return dev_to_i3cdev(dev)->desc; + + master = container_of(dev, struct i3c_master_controller, dev); + + return master->this; +} + +static ssize_t bcr_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *bus = dev_to_i3cbus(dev); + struct i3c_dev_desc *desc; + ssize_t ret; + + i3c_bus_normaluse_lock(bus); + desc = dev_to_i3cdesc(dev); + ret = sprintf(buf, "%x\n", desc->info.bcr); + i3c_bus_normaluse_unlock(bus); + + return ret; +} +static DEVICE_ATTR_RO(bcr); + +static ssize_t dcr_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *bus = dev_to_i3cbus(dev); + struct i3c_dev_desc *desc; + ssize_t ret; + + i3c_bus_normaluse_lock(bus); + desc = dev_to_i3cdesc(dev); + ret = sprintf(buf, "%x\n", desc->info.dcr); + i3c_bus_normaluse_unlock(bus); + + return ret; +} +static DEVICE_ATTR_RO(dcr); + +static ssize_t pid_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *bus = dev_to_i3cbus(dev); + struct i3c_dev_desc *desc; + ssize_t ret; + + i3c_bus_normaluse_lock(bus); + desc = dev_to_i3cdesc(dev); + ret = sprintf(buf, "%llx\n", desc->info.pid); + i3c_bus_normaluse_unlock(bus); + + return ret; +} +static DEVICE_ATTR_RO(pid); + +static ssize_t dynamic_address_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *bus = dev_to_i3cbus(dev); + struct i3c_dev_desc *desc; + ssize_t ret; + + i3c_bus_normaluse_lock(bus); + desc = dev_to_i3cdesc(dev); + ret = sprintf(buf, "%02x\n", desc->info.dyn_addr); + i3c_bus_normaluse_unlock(bus); + + return ret; +} +static DEVICE_ATTR_RO(dynamic_address); + +static const char * const hdrcap_strings[] = { + "hdr-ddr", "hdr-tsp", "hdr-tsl", +}; + +static ssize_t hdrcap_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *bus = dev_to_i3cbus(dev); + struct i3c_dev_desc *desc; + ssize_t offset = 0, ret; + unsigned long caps; + int mode; + + i3c_bus_normaluse_lock(bus); + desc = dev_to_i3cdesc(dev); + caps = desc->info.hdr_cap; + for_each_set_bit(mode, &caps, 8) { + if (mode >= ARRAY_SIZE(hdrcap_strings)) + break; + + if (!hdrcap_strings[mode]) + continue; + + ret = sprintf(buf + offset, offset ? " %s" : "%s", + hdrcap_strings[mode]); + if (ret < 0) + goto out; + + offset += ret; + } + + ret = sprintf(buf + offset, "\n"); + if (ret < 0) + goto out; + + ret = offset + ret; + +out: + i3c_bus_normaluse_unlock(bus); + + return ret; +} +static DEVICE_ATTR_RO(hdrcap); + +static struct attribute *i3c_device_attrs[] = { + &dev_attr_bcr.attr, + &dev_attr_dcr.attr, + &dev_attr_pid.attr, + &dev_attr_dynamic_address.attr, + &dev_attr_hdrcap.attr, + NULL, +}; +ATTRIBUTE_GROUPS(i3c_device); + +static int i3c_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct i3c_device *i3cdev = dev_to_i3cdev(dev); + struct i3c_device_info devinfo; + u16 manuf, part, ext; + + i3c_device_get_info(i3cdev, &devinfo); + manuf = I3C_PID_MANUF_ID(devinfo.pid); + part = I3C_PID_PART_ID(devinfo.pid); + ext = I3C_PID_EXTRA_INFO(devinfo.pid); + + if (I3C_PID_RND_LOWER_32BITS(devinfo.pid)) + return add_uevent_var(env, "MODALIAS=i3c:dcr%02Xmanuf%04X", + devinfo.dcr, manuf); + + return add_uevent_var(env, + "MODALIAS=i3c:dcr%02Xmanuf%04Xpart%04xext%04x", + devinfo.dcr, manuf, part, ext); +} + +static const struct device_type i3c_device_type = { + .groups = i3c_device_groups, + .uevent = i3c_device_uevent, +}; + +static const struct i3c_device_id * +i3c_device_match_id(struct i3c_device *i3cdev, + const struct i3c_device_id *id_table) +{ + struct i3c_device_info devinfo; + const struct i3c_device_id *id; + + i3c_device_get_info(i3cdev, &devinfo); + + /* + * The lower 32bits of the provisional ID is just filled with a random + * value, try to match using DCR info. + */ + if (!I3C_PID_RND_LOWER_32BITS(devinfo.pid)) { + u16 manuf = I3C_PID_MANUF_ID(devinfo.pid); + u16 part = I3C_PID_PART_ID(devinfo.pid); + u16 ext_info = I3C_PID_EXTRA_INFO(devinfo.pid); + + /* First try to match by manufacturer/part ID. */ + for (id = id_table; id->match_flags != 0; id++) { + if ((id->match_flags & I3C_MATCH_MANUF_AND_PART) != + I3C_MATCH_MANUF_AND_PART) + continue; + + if (manuf != id->manuf_id || part != id->part_id) + continue; + + if ((id->match_flags & I3C_MATCH_EXTRA_INFO) && + ext_info != id->extra_info) + continue; + + return id; + } + } + + /* Fallback to DCR match. */ + for (id = id_table; id->match_flags != 0; id++) { + if ((id->match_flags & I3C_MATCH_DCR) && + id->dcr == devinfo.dcr) + return id; + } + + return NULL; +} + +static int i3c_device_match(struct device *dev, struct device_driver *drv) +{ + struct i3c_device *i3cdev; + struct i3c_driver *i3cdrv; + + if (dev->type != &i3c_device_type) + return 0; + + i3cdev = dev_to_i3cdev(dev); + i3cdrv = drv_to_i3cdrv(drv); + if (i3c_device_match_id(i3cdev, i3cdrv->id_table)) + return 1; + + return 0; +} + +static int i3c_device_probe(struct device *dev) +{ + struct i3c_device *i3cdev = dev_to_i3cdev(dev); + struct i3c_driver *driver = drv_to_i3cdrv(dev->driver); + + return driver->probe(i3cdev); +} + +static int i3c_device_remove(struct device *dev) +{ + struct i3c_device *i3cdev = dev_to_i3cdev(dev); + struct i3c_driver *driver = drv_to_i3cdrv(dev->driver); + int ret; + + ret = driver->remove(i3cdev); + if (ret) + return ret; + + i3c_device_free_ibi(i3cdev); + + return ret; +} + +struct bus_type i3c_bus_type = { + .name = "i3c", + .match = i3c_device_match, + .probe = i3c_device_probe, + .remove = i3c_device_remove, +}; + +static enum i3c_addr_slot_status +i3c_bus_get_addr_slot_status(struct i3c_bus *bus, u16 addr) +{ + int status, bitpos = addr * 2; + + if (addr > I2C_MAX_ADDR) + return I3C_ADDR_SLOT_RSVD; + + status = bus->addrslots[bitpos / BITS_PER_LONG]; + status >>= bitpos % BITS_PER_LONG; + + return status & I3C_ADDR_SLOT_STATUS_MASK; +} + +static void i3c_bus_set_addr_slot_status(struct i3c_bus *bus, u16 addr, + enum i3c_addr_slot_status status) +{ + int bitpos = addr * 2; + unsigned long *ptr; + + if (addr > I2C_MAX_ADDR) + return; + + ptr = bus->addrslots + (bitpos / BITS_PER_LONG); + *ptr &= ~(I3C_ADDR_SLOT_STATUS_MASK << (bitpos % BITS_PER_LONG)); + *ptr |= status << (bitpos % BITS_PER_LONG); +} + +static bool i3c_bus_dev_addr_is_avail(struct i3c_bus *bus, u8 addr) +{ + enum i3c_addr_slot_status status; + + status = i3c_bus_get_addr_slot_status(bus, addr); + + return status == I3C_ADDR_SLOT_FREE; +} + +static int i3c_bus_get_free_addr(struct i3c_bus *bus, u8 start_addr) +{ + enum i3c_addr_slot_status status; + u8 addr; + + for (addr = start_addr; addr < I3C_MAX_ADDR; addr++) { + status = i3c_bus_get_addr_slot_status(bus, addr); + if (status == I3C_ADDR_SLOT_FREE) + return addr; + } + + return -ENOMEM; +} + +static void i3c_bus_init_addrslots(struct i3c_bus *bus) +{ + int i; + + /* Addresses 0 to 7 are reserved. */ + for (i = 0; i < 8; i++) + i3c_bus_set_addr_slot_status(bus, i, I3C_ADDR_SLOT_RSVD); + + /* + * Reserve broadcast address and all addresses that might collide + * with the broadcast address when facing a single bit error. + */ + i3c_bus_set_addr_slot_status(bus, I3C_BROADCAST_ADDR, + I3C_ADDR_SLOT_RSVD); + for (i = 0; i < 7; i++) + i3c_bus_set_addr_slot_status(bus, I3C_BROADCAST_ADDR ^ BIT(i), + I3C_ADDR_SLOT_RSVD); +} + +static void i3c_bus_cleanup(struct i3c_bus *i3cbus) +{ + mutex_lock(&i3c_core_lock); + idr_remove(&i3c_bus_idr, i3cbus->id); + mutex_unlock(&i3c_core_lock); +} + +static int i3c_bus_init(struct i3c_bus *i3cbus) +{ + int ret; + + init_rwsem(&i3cbus->lock); + INIT_LIST_HEAD(&i3cbus->devs.i2c); + INIT_LIST_HEAD(&i3cbus->devs.i3c); + i3c_bus_init_addrslots(i3cbus); + i3cbus->mode = I3C_BUS_MODE_PURE; + + mutex_lock(&i3c_core_lock); + ret = idr_alloc(&i3c_bus_idr, i3cbus, 0, 0, GFP_KERNEL); + mutex_unlock(&i3c_core_lock); + + if (ret < 0) + return ret; + + i3cbus->id = ret; + + return 0; +} + +static const char * const i3c_bus_mode_strings[] = { + [I3C_BUS_MODE_PURE] = "pure", + [I3C_BUS_MODE_MIXED_FAST] = "mixed-fast", + [I3C_BUS_MODE_MIXED_SLOW] = "mixed-slow", +}; + +static ssize_t mode_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *i3cbus = dev_to_i3cbus(dev); + ssize_t ret; + + i3c_bus_normaluse_lock(i3cbus); + if (i3cbus->mode < 0 || + i3cbus->mode > ARRAY_SIZE(i3c_bus_mode_strings) || + !i3c_bus_mode_strings[i3cbus->mode]) + ret = sprintf(buf, "unknown\n"); + else + ret = sprintf(buf, "%s\n", i3c_bus_mode_strings[i3cbus->mode]); + i3c_bus_normaluse_unlock(i3cbus); + + return ret; +} +static DEVICE_ATTR_RO(mode); + +static ssize_t current_master_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *i3cbus = dev_to_i3cbus(dev); + ssize_t ret; + + i3c_bus_normaluse_lock(i3cbus); + ret = sprintf(buf, "%d-%llx\n", i3cbus->id, + i3cbus->cur_master->info.pid); + i3c_bus_normaluse_unlock(i3cbus); + + return ret; +} +static DEVICE_ATTR_RO(current_master); + +static ssize_t i3c_scl_frequency_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *i3cbus = dev_to_i3cbus(dev); + ssize_t ret; + + i3c_bus_normaluse_lock(i3cbus); + ret = sprintf(buf, "%ld\n", i3cbus->scl_rate.i3c); + i3c_bus_normaluse_unlock(i3cbus); + + return ret; +} +static DEVICE_ATTR_RO(i3c_scl_frequency); + +static ssize_t i2c_scl_frequency_show(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct i3c_bus *i3cbus = dev_to_i3cbus(dev); + ssize_t ret; + + i3c_bus_normaluse_lock(i3cbus); + ret = sprintf(buf, "%ld\n", i3cbus->scl_rate.i2c); + i3c_bus_normaluse_unlock(i3cbus); + + return ret; +} +static DEVICE_ATTR_RO(i2c_scl_frequency); + +static struct attribute *i3c_masterdev_attrs[] = { + &dev_attr_mode.attr, + &dev_attr_current_master.attr, + &dev_attr_i3c_scl_frequency.attr, + &dev_attr_i2c_scl_frequency.attr, + &dev_attr_bcr.attr, + &dev_attr_dcr.attr, + &dev_attr_pid.attr, + &dev_attr_dynamic_address.attr, + &dev_attr_hdrcap.attr, + NULL, +}; +ATTRIBUTE_GROUPS(i3c_masterdev); + +static void i3c_masterdev_release(struct device *dev) +{ + struct i3c_master_controller *master = dev_to_i3cmaster(dev); + struct i3c_bus *bus = dev_to_i3cbus(dev); + + if (master->wq) + destroy_workqueue(master->wq); + + WARN_ON(!list_empty(&bus->devs.i2c) || !list_empty(&bus->devs.i3c)); + i3c_bus_cleanup(bus); + + of_node_put(dev->of_node); +} + +static const struct device_type i3c_masterdev_type = { + .groups = i3c_masterdev_groups, +}; + +int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode) +{ + i3cbus->mode = mode; + + if (!i3cbus->scl_rate.i3c) + i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE; + + if (!i3cbus->scl_rate.i2c) { + if (i3cbus->mode == I3C_BUS_MODE_MIXED_SLOW) + i3cbus->scl_rate.i2c = I3C_BUS_I2C_FM_SCL_RATE; + else + i3cbus->scl_rate.i2c = I3C_BUS_I2C_FM_PLUS_SCL_RATE; + } + + /* + * I3C/I2C frequency may have been overridden, check that user-provided + * values are not exceeding max possible frequency. + */ + if (i3cbus->scl_rate.i3c > I3C_BUS_MAX_I3C_SCL_RATE || + i3cbus->scl_rate.i2c > I3C_BUS_I2C_FM_PLUS_SCL_RATE) + return -EINVAL; + + return 0; +} + +static struct i3c_master_controller * +i2c_adapter_to_i3c_master(struct i2c_adapter *adap) +{ + return container_of(adap, struct i3c_master_controller, i2c); +} + +static struct i2c_adapter * +i3c_master_to_i2c_adapter(struct i3c_master_controller *master) +{ + return &master->i2c; +} + +static void i3c_master_free_i2c_dev(struct i2c_dev_desc *dev) +{ + kfree(dev); +} + +static struct i2c_dev_desc * +i3c_master_alloc_i2c_dev(struct i3c_master_controller *master, + const struct i2c_dev_boardinfo *boardinfo) +{ + struct i2c_dev_desc *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->common.master = master; + dev->boardinfo = boardinfo; + + return dev; +} + +static void *i3c_ccc_cmd_dest_init(struct i3c_ccc_cmd_dest *dest, u8 addr, + u16 payloadlen) +{ + dest->addr = addr; + dest->payload.len = payloadlen; + if (payloadlen) + dest->payload.data = kzalloc(payloadlen, GFP_KERNEL); + else + dest->payload.data = NULL; + + return dest->payload.data; +} + +static void i3c_ccc_cmd_dest_cleanup(struct i3c_ccc_cmd_dest *dest) +{ + kfree(dest->payload.data); +} + +static void i3c_ccc_cmd_init(struct i3c_ccc_cmd *cmd, bool rnw, u8 id, + struct i3c_ccc_cmd_dest *dests, + unsigned int ndests) +{ + cmd->rnw = rnw ? 1 : 0; + cmd->id = id; + cmd->dests = dests; + cmd->ndests = ndests; + cmd->err = I3C_ERROR_UNKNOWN; +} + +static int i3c_master_send_ccc_cmd_locked(struct i3c_master_controller *master, + struct i3c_ccc_cmd *cmd) +{ + int ret; + + if (!cmd || !master) + return -EINVAL; + + if (WARN_ON(master->init_done && + !rwsem_is_locked(&master->bus.lock))) + return -EINVAL; + + if (!master->ops->send_ccc_cmd) + return -ENOTSUPP; + + if ((cmd->id & I3C_CCC_DIRECT) && (!cmd->dests || !cmd->ndests)) + return -EINVAL; + + if (master->ops->supports_ccc_cmd && + !master->ops->supports_ccc_cmd(master, cmd)) + return -ENOTSUPP; + + ret = master->ops->send_ccc_cmd(master, cmd); + if (ret) { + if (cmd->err != I3C_ERROR_UNKNOWN) + return cmd->err; + + return ret; + } + + return 0; +} + +static struct i2c_dev_desc * +i3c_master_find_i2c_dev_by_addr(const struct i3c_master_controller *master, + u16 addr) +{ + struct i2c_dev_desc *dev; + + i3c_bus_for_each_i2cdev(&master->bus, dev) { + if (dev->boardinfo->base.addr == addr) + return dev; + } + + return NULL; +} + +/** + * i3c_master_get_free_addr() - get a free address on the bus + * @master: I3C master object + * @start_addr: where to start searching + * + * This function must be called with the bus lock held in write mode. + * + * Return: the first free address starting at @start_addr (included) or -ENOMEM + * if there's no more address available. + */ +int i3c_master_get_free_addr(struct i3c_master_controller *master, + u8 start_addr) +{ + return i3c_bus_get_free_addr(&master->bus, start_addr); +} +EXPORT_SYMBOL_GPL(i3c_master_get_free_addr); + +static void i3c_device_release(struct device *dev) +{ + struct i3c_device *i3cdev = dev_to_i3cdev(dev); + + WARN_ON(i3cdev->desc); + + of_node_put(i3cdev->dev.of_node); + kfree(i3cdev); +} + +static void i3c_master_free_i3c_dev(struct i3c_dev_desc *dev) +{ + kfree(dev); +} + +static struct i3c_dev_desc * +i3c_master_alloc_i3c_dev(struct i3c_master_controller *master, + const struct i3c_device_info *info) +{ + struct i3c_dev_desc *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->common.master = master; + dev->info = *info; + mutex_init(&dev->ibi_lock); + + return dev; +} + +static int i3c_master_rstdaa_locked(struct i3c_master_controller *master, + u8 addr) +{ + enum i3c_addr_slot_status addrstat; + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + if (!master) + return -EINVAL; + + addrstat = i3c_bus_get_addr_slot_status(&master->bus, addr); + if (addr != I3C_BROADCAST_ADDR && addrstat != I3C_ADDR_SLOT_I3C_DEV) + return -EINVAL; + + i3c_ccc_cmd_dest_init(&dest, addr, 0); + i3c_ccc_cmd_init(&cmd, false, + I3C_CCC_RSTDAA(addr == I3C_BROADCAST_ADDR), + &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +/** + * i3c_master_entdaa_locked() - start a DAA (Dynamic Address Assignment) + * procedure + * @master: master used to send frames on the bus + * + * Send a ENTDAA CCC command to start a DAA procedure. + * + * Note that this function only sends the ENTDAA CCC command, all the logic + * behind dynamic address assignment has to be handled in the I3C master + * driver. + * + * This function must be called with the bus lock held in write mode. + * + * Return: 0 in case of success, a positive I3C error code if the error is + * one of the official Mx error codes, and a negative error code otherwise. + */ +int i3c_master_entdaa_locked(struct i3c_master_controller *master) +{ + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + i3c_ccc_cmd_dest_init(&dest, I3C_BROADCAST_ADDR, 0); + i3c_ccc_cmd_init(&cmd, false, I3C_CCC_ENTDAA, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_master_entdaa_locked); + +static int i3c_master_enec_disec_locked(struct i3c_master_controller *master, + u8 addr, bool enable, u8 evts) +{ + struct i3c_ccc_events *events; + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + events = i3c_ccc_cmd_dest_init(&dest, addr, sizeof(*events)); + if (!events) + return -ENOMEM; + + events->events = evts; + i3c_ccc_cmd_init(&cmd, false, + enable ? + I3C_CCC_ENEC(addr == I3C_BROADCAST_ADDR) : + I3C_CCC_DISEC(addr == I3C_BROADCAST_ADDR), + &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +/** + * i3c_master_disec_locked() - send a DISEC CCC command + * @master: master used to send frames on the bus + * @addr: a valid I3C slave address or %I3C_BROADCAST_ADDR + * @evts: events to disable + * + * Send a DISEC CCC command to disable some or all events coming from a + * specific slave, or all devices if @addr is %I3C_BROADCAST_ADDR. + * + * This function must be called with the bus lock held in write mode. + * + * Return: 0 in case of success, a positive I3C error code if the error is + * one of the official Mx error codes, and a negative error code otherwise. + */ +int i3c_master_disec_locked(struct i3c_master_controller *master, u8 addr, + u8 evts) +{ + return i3c_master_enec_disec_locked(master, addr, false, evts); +} +EXPORT_SYMBOL_GPL(i3c_master_disec_locked); + +/** + * i3c_master_enec_locked() - send an ENEC CCC command + * @master: master used to send frames on the bus + * @addr: a valid I3C slave address or %I3C_BROADCAST_ADDR + * @evts: events to disable + * + * Sends an ENEC CCC command to enable some or all events coming from a + * specific slave, or all devices if @addr is %I3C_BROADCAST_ADDR. + * + * This function must be called with the bus lock held in write mode. + * + * Return: 0 in case of success, a positive I3C error code if the error is + * one of the official Mx error codes, and a negative error code otherwise. + */ +int i3c_master_enec_locked(struct i3c_master_controller *master, u8 addr, + u8 evts) +{ + return i3c_master_enec_disec_locked(master, addr, true, evts); +} +EXPORT_SYMBOL_GPL(i3c_master_enec_locked); + +/** + * i3c_master_defslvs_locked() - send a DEFSLVS CCC command + * @master: master used to send frames on the bus + * + * Send a DEFSLVS CCC command containing all the devices known to the @master. + * This is useful when you have secondary masters on the bus to propagate + * device information. + * + * This should be called after all I3C devices have been discovered (in other + * words, after the DAA procedure has finished) and instantiated in + * &i3c_master_controller_ops->bus_init(). + * It should also be called if a master ACKed an Hot-Join request and assigned + * a dynamic address to the device joining the bus. + * + * This function must be called with the bus lock held in write mode. + * + * Return: 0 in case of success, a positive I3C error code if the error is + * one of the official Mx error codes, and a negative error code otherwise. + */ +int i3c_master_defslvs_locked(struct i3c_master_controller *master) +{ + struct i3c_ccc_defslvs *defslvs; + struct i3c_ccc_dev_desc *desc; + struct i3c_ccc_cmd_dest dest; + struct i3c_dev_desc *i3cdev; + struct i2c_dev_desc *i2cdev; + struct i3c_ccc_cmd cmd; + struct i3c_bus *bus; + bool send = false; + int ndevs = 0, ret; + + if (!master) + return -EINVAL; + + bus = i3c_master_get_bus(master); + i3c_bus_for_each_i3cdev(bus, i3cdev) { + ndevs++; + + if (i3cdev == master->this) + continue; + + if (I3C_BCR_DEVICE_ROLE(i3cdev->info.bcr) == + I3C_BCR_I3C_MASTER) + send = true; + } + + /* No other master on the bus, skip DEFSLVS. */ + if (!send) + return 0; + + i3c_bus_for_each_i2cdev(bus, i2cdev) + ndevs++; + + defslvs = i3c_ccc_cmd_dest_init(&dest, I3C_BROADCAST_ADDR, + sizeof(*defslvs) + + ((ndevs - 1) * + sizeof(struct i3c_ccc_dev_desc))); + if (!defslvs) + return -ENOMEM; + + defslvs->count = ndevs; + defslvs->master.bcr = master->this->info.bcr; + defslvs->master.dcr = master->this->info.dcr; + defslvs->master.dyn_addr = master->this->info.dyn_addr << 1; + defslvs->master.static_addr = I3C_BROADCAST_ADDR << 1; + + desc = defslvs->slaves; + i3c_bus_for_each_i2cdev(bus, i2cdev) { + desc->lvr = i2cdev->boardinfo->lvr; + desc->static_addr = i2cdev->boardinfo->base.addr << 1; + desc++; + } + + i3c_bus_for_each_i3cdev(bus, i3cdev) { + /* Skip the I3C dev representing this master. */ + if (i3cdev == master->this) + continue; + + desc->bcr = i3cdev->info.bcr; + desc->dcr = i3cdev->info.dcr; + desc->dyn_addr = i3cdev->info.dyn_addr << 1; + desc->static_addr = i3cdev->info.static_addr << 1; + desc++; + } + + i3c_ccc_cmd_init(&cmd, false, I3C_CCC_DEFSLVS, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_master_defslvs_locked); + +static int i3c_master_setda_locked(struct i3c_master_controller *master, + u8 oldaddr, u8 newaddr, bool setdasa) +{ + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_setda *setda; + struct i3c_ccc_cmd cmd; + int ret; + + if (!oldaddr || !newaddr) + return -EINVAL; + + setda = i3c_ccc_cmd_dest_init(&dest, oldaddr, sizeof(*setda)); + if (!setda) + return -ENOMEM; + + setda->addr = newaddr << 1; + i3c_ccc_cmd_init(&cmd, false, + setdasa ? I3C_CCC_SETDASA : I3C_CCC_SETNEWDA, + &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_setdasa_locked(struct i3c_master_controller *master, + u8 static_addr, u8 dyn_addr) +{ + return i3c_master_setda_locked(master, static_addr, dyn_addr, true); +} + +static int i3c_master_setnewda_locked(struct i3c_master_controller *master, + u8 oldaddr, u8 newaddr) +{ + return i3c_master_setda_locked(master, oldaddr, newaddr, false); +} + +static int i3c_master_getmrl_locked(struct i3c_master_controller *master, + struct i3c_device_info *info) +{ + struct i3c_ccc_cmd_dest dest; + unsigned int expected_len; + struct i3c_ccc_mrl *mrl; + struct i3c_ccc_cmd cmd; + int ret; + + mrl = i3c_ccc_cmd_dest_init(&dest, info->dyn_addr, sizeof(*mrl)); + if (!mrl) + return -ENOMEM; + + /* + * When the device does not have IBI payload GETMRL only returns 2 + * bytes of data. + */ + if (!(info->bcr & I3C_BCR_IBI_PAYLOAD)) + dest.payload.len -= 1; + + expected_len = dest.payload.len; + i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETMRL, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + if (ret) + goto out; + + if (dest.payload.len != expected_len) { + ret = -EIO; + goto out; + } + + info->max_read_len = be16_to_cpu(mrl->read_len); + + if (info->bcr & I3C_BCR_IBI_PAYLOAD) + info->max_ibi_len = mrl->ibi_len; + +out: + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_getmwl_locked(struct i3c_master_controller *master, + struct i3c_device_info *info) +{ + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_mwl *mwl; + struct i3c_ccc_cmd cmd; + int ret; + + mwl = i3c_ccc_cmd_dest_init(&dest, info->dyn_addr, sizeof(*mwl)); + if (!mwl) + return -ENOMEM; + + i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETMWL, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + if (ret) + goto out; + + if (dest.payload.len != sizeof(*mwl)) + return -EIO; + + info->max_write_len = be16_to_cpu(mwl->len); + +out: + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_getmxds_locked(struct i3c_master_controller *master, + struct i3c_device_info *info) +{ + struct i3c_ccc_getmxds *getmaxds; + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + getmaxds = i3c_ccc_cmd_dest_init(&dest, info->dyn_addr, + sizeof(*getmaxds)); + if (!getmaxds) + return -ENOMEM; + + i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETMXDS, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + if (ret) + goto out; + + if (dest.payload.len != 2 && dest.payload.len != 5) { + ret = -EIO; + goto out; + } + + info->max_read_ds = getmaxds->maxrd; + info->max_write_ds = getmaxds->maxwr; + if (dest.payload.len == 5) + info->max_read_turnaround = getmaxds->maxrdturn[0] | + ((u32)getmaxds->maxrdturn[1] << 8) | + ((u32)getmaxds->maxrdturn[2] << 16); + +out: + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_gethdrcap_locked(struct i3c_master_controller *master, + struct i3c_device_info *info) +{ + struct i3c_ccc_gethdrcap *gethdrcap; + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + gethdrcap = i3c_ccc_cmd_dest_init(&dest, info->dyn_addr, + sizeof(*gethdrcap)); + if (!gethdrcap) + return -ENOMEM; + + i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETHDRCAP, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + if (ret) + goto out; + + if (dest.payload.len != 1) { + ret = -EIO; + goto out; + } + + info->hdr_cap = gethdrcap->modes; + +out: + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_getpid_locked(struct i3c_master_controller *master, + struct i3c_device_info *info) +{ + struct i3c_ccc_getpid *getpid; + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret, i; + + getpid = i3c_ccc_cmd_dest_init(&dest, info->dyn_addr, sizeof(*getpid)); + if (!getpid) + return -ENOMEM; + + i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETPID, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + if (ret) + goto out; + + info->pid = 0; + for (i = 0; i < sizeof(getpid->pid); i++) { + int sft = (sizeof(getpid->pid) - i - 1) * 8; + + info->pid |= (u64)getpid->pid[i] << sft; + } + +out: + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_getbcr_locked(struct i3c_master_controller *master, + struct i3c_device_info *info) +{ + struct i3c_ccc_getbcr *getbcr; + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + getbcr = i3c_ccc_cmd_dest_init(&dest, info->dyn_addr, sizeof(*getbcr)); + if (!getbcr) + return -ENOMEM; + + i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETBCR, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + if (ret) + goto out; + + info->bcr = getbcr->bcr; + +out: + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_getdcr_locked(struct i3c_master_controller *master, + struct i3c_device_info *info) +{ + struct i3c_ccc_getdcr *getdcr; + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + getdcr = i3c_ccc_cmd_dest_init(&dest, info->dyn_addr, sizeof(*getdcr)); + if (!getdcr) + return -ENOMEM; + + i3c_ccc_cmd_init(&cmd, true, I3C_CCC_GETDCR, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + if (ret) + goto out; + + info->dcr = getdcr->dcr; + +out: + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + +static int i3c_master_retrieve_dev_info(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + enum i3c_addr_slot_status slot_status; + int ret; + + if (!dev->info.dyn_addr) + return -EINVAL; + + slot_status = i3c_bus_get_addr_slot_status(&master->bus, + dev->info.dyn_addr); + if (slot_status == I3C_ADDR_SLOT_RSVD || + slot_status == I3C_ADDR_SLOT_I2C_DEV) + return -EINVAL; + + ret = i3c_master_getpid_locked(master, &dev->info); + if (ret) + return ret; + + ret = i3c_master_getbcr_locked(master, &dev->info); + if (ret) + return ret; + + ret = i3c_master_getdcr_locked(master, &dev->info); + if (ret) + return ret; + + if (dev->info.bcr & I3C_BCR_MAX_DATA_SPEED_LIM) { + ret = i3c_master_getmxds_locked(master, &dev->info); + if (ret) + return ret; + } + + if (dev->info.bcr & I3C_BCR_IBI_PAYLOAD) + dev->info.max_ibi_len = 1; + + i3c_master_getmrl_locked(master, &dev->info); + i3c_master_getmwl_locked(master, &dev->info); + + if (dev->info.bcr & I3C_BCR_HDR_CAP) { + ret = i3c_master_gethdrcap_locked(master, &dev->info); + if (ret) + return ret; + } + + return 0; +} + +static void i3c_master_put_i3c_addrs(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + + if (dev->info.static_addr) + i3c_bus_set_addr_slot_status(&master->bus, + dev->info.static_addr, + I3C_ADDR_SLOT_FREE); + + if (dev->info.dyn_addr) + i3c_bus_set_addr_slot_status(&master->bus, dev->info.dyn_addr, + I3C_ADDR_SLOT_FREE); + + if (dev->boardinfo && dev->boardinfo->init_dyn_addr) + i3c_bus_set_addr_slot_status(&master->bus, dev->info.dyn_addr, + I3C_ADDR_SLOT_FREE); +} + +static int i3c_master_get_i3c_addrs(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + enum i3c_addr_slot_status status; + + if (!dev->info.static_addr && !dev->info.dyn_addr) + return 0; + + if (dev->info.static_addr) { + status = i3c_bus_get_addr_slot_status(&master->bus, + dev->info.static_addr); + if (status != I3C_ADDR_SLOT_FREE) + return -EBUSY; + + i3c_bus_set_addr_slot_status(&master->bus, + dev->info.static_addr, + I3C_ADDR_SLOT_I3C_DEV); + } + + /* + * ->init_dyn_addr should have been reserved before that, so, if we're + * trying to apply a pre-reserved dynamic address, we should not try + * to reserve the address slot a second time. + */ + if (dev->info.dyn_addr && + (!dev->boardinfo || + dev->boardinfo->init_dyn_addr != dev->info.dyn_addr)) { + status = i3c_bus_get_addr_slot_status(&master->bus, + dev->info.dyn_addr); + if (status != I3C_ADDR_SLOT_FREE) + goto err_release_static_addr; + + i3c_bus_set_addr_slot_status(&master->bus, dev->info.dyn_addr, + I3C_ADDR_SLOT_I3C_DEV); + } + + return 0; + +err_release_static_addr: + if (dev->info.static_addr) + i3c_bus_set_addr_slot_status(&master->bus, + dev->info.static_addr, + I3C_ADDR_SLOT_FREE); + + return -EBUSY; +} + +static int i3c_master_attach_i3c_dev(struct i3c_master_controller *master, + struct i3c_dev_desc *dev) +{ + int ret; + + /* + * We don't attach devices to the controller until they are + * addressable on the bus. + */ + if (!dev->info.static_addr && !dev->info.dyn_addr) + return 0; + + ret = i3c_master_get_i3c_addrs(dev); + if (ret) + return ret; + + /* Do not attach the master device itself. */ + if (master->this != dev && master->ops->attach_i3c_dev) { + ret = master->ops->attach_i3c_dev(dev); + if (ret) { + i3c_master_put_i3c_addrs(dev); + return ret; + } + } + + list_add_tail(&dev->common.node, &master->bus.devs.i3c); + + return 0; +} + +static int i3c_master_reattach_i3c_dev(struct i3c_dev_desc *dev, + u8 old_dyn_addr) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + enum i3c_addr_slot_status status; + int ret; + + if (dev->info.dyn_addr != old_dyn_addr) { + status = i3c_bus_get_addr_slot_status(&master->bus, + dev->info.dyn_addr); + if (status != I3C_ADDR_SLOT_FREE) + return -EBUSY; + i3c_bus_set_addr_slot_status(&master->bus, + dev->info.dyn_addr, + I3C_ADDR_SLOT_I3C_DEV); + } + + if (master->ops->reattach_i3c_dev) { + ret = master->ops->reattach_i3c_dev(dev, old_dyn_addr); + if (ret) { + i3c_master_put_i3c_addrs(dev); + return ret; + } + } + + return 0; +} + +static void i3c_master_detach_i3c_dev(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + + /* Do not detach the master device itself. */ + if (master->this != dev && master->ops->detach_i3c_dev) + master->ops->detach_i3c_dev(dev); + + i3c_master_put_i3c_addrs(dev); + list_del(&dev->common.node); +} + +static int i3c_master_attach_i2c_dev(struct i3c_master_controller *master, + struct i2c_dev_desc *dev) +{ + int ret; + + if (master->ops->attach_i2c_dev) { + ret = master->ops->attach_i2c_dev(dev); + if (ret) + return ret; + } + + list_add_tail(&dev->common.node, &master->bus.devs.i2c); + + return 0; +} + +static void i3c_master_detach_i2c_dev(struct i2c_dev_desc *dev) +{ + struct i3c_master_controller *master = i2c_dev_get_master(dev); + + list_del(&dev->common.node); + + if (master->ops->detach_i2c_dev) + master->ops->detach_i2c_dev(dev); +} + +static void i3c_master_pre_assign_dyn_addr(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + int ret; + + if (!dev->boardinfo || !dev->boardinfo->init_dyn_addr || + !dev->boardinfo->static_addr) + return; + + ret = i3c_master_setdasa_locked(master, dev->info.static_addr, + dev->boardinfo->init_dyn_addr); + if (ret) + return; + + dev->info.dyn_addr = dev->boardinfo->init_dyn_addr; + ret = i3c_master_reattach_i3c_dev(dev, 0); + if (ret) + goto err_rstdaa; + + ret = i3c_master_retrieve_dev_info(dev); + if (ret) + goto err_rstdaa; + + return; + +err_rstdaa: + i3c_master_rstdaa_locked(master, dev->boardinfo->init_dyn_addr); +} + +static void +i3c_master_register_new_i3c_devs(struct i3c_master_controller *master) +{ + struct i3c_dev_desc *desc; + int ret; + + if (!master->init_done) + return; + + i3c_bus_for_each_i3cdev(&master->bus, desc) { + if (desc->dev || !desc->info.dyn_addr || desc == master->this) + continue; + + desc->dev = kzalloc(sizeof(*desc->dev), GFP_KERNEL); + if (!desc->dev) + continue; + + desc->dev->bus = &master->bus; + desc->dev->desc = desc; + desc->dev->dev.parent = &master->dev; + desc->dev->dev.type = &i3c_device_type; + desc->dev->dev.bus = &i3c_bus_type; + desc->dev->dev.release = i3c_device_release; + dev_set_name(&desc->dev->dev, "%d-%llx", master->bus.id, + desc->info.pid); + + if (desc->boardinfo) + desc->dev->dev.of_node = desc->boardinfo->of_node; + + ret = device_register(&desc->dev->dev); + if (ret) + dev_err(&master->dev, + "Failed to add I3C device (err = %d)\n", ret); + } +} + +/** + * i3c_master_do_daa() - do a DAA (Dynamic Address Assignment) + * @master: master doing the DAA + * + * This function is instantiating an I3C device object and adding it to the + * I3C device list. All device information are automatically retrieved using + * standard CCC commands. + * + * The I3C device object is returned in case the master wants to attach + * private data to it using i3c_dev_set_master_data(). + * + * This function must be called with the bus lock held in write mode. + * + * Return: a 0 in case of success, an negative error code otherwise. + */ +int i3c_master_do_daa(struct i3c_master_controller *master) +{ + int ret; + + i3c_bus_maintenance_lock(&master->bus); + ret = master->ops->do_daa(master); + i3c_bus_maintenance_unlock(&master->bus); + + if (ret) + return ret; + + i3c_bus_normaluse_lock(&master->bus); + i3c_master_register_new_i3c_devs(master); + i3c_bus_normaluse_unlock(&master->bus); + + return 0; +} +EXPORT_SYMBOL_GPL(i3c_master_do_daa); + +/** + * i3c_master_set_info() - set master device information + * @master: master used to send frames on the bus + * @info: I3C device information + * + * Set master device info. This should be called from + * &i3c_master_controller_ops->bus_init(). + * + * Not all &i3c_device_info fields are meaningful for a master device. + * Here is a list of fields that should be properly filled: + * + * - &i3c_device_info->dyn_addr + * - &i3c_device_info->bcr + * - &i3c_device_info->dcr + * - &i3c_device_info->pid + * - &i3c_device_info->hdr_cap if %I3C_BCR_HDR_CAP bit is set in + * &i3c_device_info->bcr + * + * This function must be called with the bus lock held in maintenance mode. + * + * Return: 0 if @info contains valid information (not every piece of + * information can be checked, but we can at least make sure @info->dyn_addr + * and @info->bcr are correct), -EINVAL otherwise. + */ +int i3c_master_set_info(struct i3c_master_controller *master, + const struct i3c_device_info *info) +{ + struct i3c_dev_desc *i3cdev; + int ret; + + if (!i3c_bus_dev_addr_is_avail(&master->bus, info->dyn_addr)) + return -EINVAL; + + if (I3C_BCR_DEVICE_ROLE(info->bcr) == I3C_BCR_I3C_MASTER && + master->secondary) + return -EINVAL; + + if (master->this) + return -EINVAL; + + i3cdev = i3c_master_alloc_i3c_dev(master, info); + if (IS_ERR(i3cdev)) + return PTR_ERR(i3cdev); + + master->this = i3cdev; + master->bus.cur_master = master->this; + + ret = i3c_master_attach_i3c_dev(master, i3cdev); + if (ret) + goto err_free_dev; + + return 0; + +err_free_dev: + i3c_master_free_i3c_dev(i3cdev); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_master_set_info); + +static void i3c_master_detach_free_devs(struct i3c_master_controller *master) +{ + struct i3c_dev_desc *i3cdev, *i3ctmp; + struct i2c_dev_desc *i2cdev, *i2ctmp; + + list_for_each_entry_safe(i3cdev, i3ctmp, &master->bus.devs.i3c, + common.node) { + i3c_master_detach_i3c_dev(i3cdev); + + if (i3cdev->boardinfo && i3cdev->boardinfo->init_dyn_addr) + i3c_bus_set_addr_slot_status(&master->bus, + i3cdev->boardinfo->init_dyn_addr, + I3C_ADDR_SLOT_FREE); + + i3c_master_free_i3c_dev(i3cdev); + } + + list_for_each_entry_safe(i2cdev, i2ctmp, &master->bus.devs.i2c, + common.node) { + i3c_master_detach_i2c_dev(i2cdev); + i3c_bus_set_addr_slot_status(&master->bus, + i2cdev->boardinfo->base.addr, + I3C_ADDR_SLOT_FREE); + i3c_master_free_i2c_dev(i2cdev); + } +} + +/** + * i3c_master_bus_init() - initialize an I3C bus + * @master: main master initializing the bus + * + * This function is following all initialisation steps described in the I3C + * specification: + * + * 1. Attach I2C and statically defined I3C devs to the master so that the + * master can fill its internal device table appropriately + * + * 2. Call &i3c_master_controller_ops->bus_init() method to initialize + * the master controller. That's usually where the bus mode is selected + * (pure bus or mixed fast/slow bus) + * + * 3. Instruct all devices on the bus to drop their dynamic address. This is + * particularly important when the bus was previously configured by someone + * else (for example the bootloader) + * + * 4. Disable all slave events. + * + * 5. Pre-assign dynamic addresses requested by the FW with SETDASA for I3C + * devices that have a static address + * + * 6. Do a DAA (Dynamic Address Assignment) to assign dynamic addresses to all + * remaining I3C devices + * + * Once this is done, all I3C and I2C devices should be usable. + * + * Return: a 0 in case of success, an negative error code otherwise. + */ +static int i3c_master_bus_init(struct i3c_master_controller *master) +{ + enum i3c_addr_slot_status status; + struct i2c_dev_boardinfo *i2cboardinfo; + struct i3c_dev_boardinfo *i3cboardinfo; + struct i3c_dev_desc *i3cdev; + struct i2c_dev_desc *i2cdev; + int ret; + + /* + * First attach all devices with static definitions provided by the + * FW. + */ + list_for_each_entry(i2cboardinfo, &master->boardinfo.i2c, node) { + status = i3c_bus_get_addr_slot_status(&master->bus, + i2cboardinfo->base.addr); + if (status != I3C_ADDR_SLOT_FREE) { + ret = -EBUSY; + goto err_detach_devs; + } + + i3c_bus_set_addr_slot_status(&master->bus, + i2cboardinfo->base.addr, + I3C_ADDR_SLOT_I2C_DEV); + + i2cdev = i3c_master_alloc_i2c_dev(master, i2cboardinfo); + if (IS_ERR(i2cdev)) { + ret = PTR_ERR(i2cdev); + goto err_detach_devs; + } + + ret = i3c_master_attach_i2c_dev(master, i2cdev); + if (ret) { + i3c_master_free_i2c_dev(i2cdev); + goto err_detach_devs; + } + } + list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) { + struct i3c_device_info info = { + .static_addr = i3cboardinfo->static_addr, + }; + + if (i3cboardinfo->init_dyn_addr) { + status = i3c_bus_get_addr_slot_status(&master->bus, + i3cboardinfo->init_dyn_addr); + if (status != I3C_ADDR_SLOT_FREE) { + ret = -EBUSY; + goto err_detach_devs; + } + } + + i3cdev = i3c_master_alloc_i3c_dev(master, &info); + if (IS_ERR(i3cdev)) { + ret = PTR_ERR(i3cdev); + goto err_detach_devs; + } + + i3cdev->boardinfo = i3cboardinfo; + + ret = i3c_master_attach_i3c_dev(master, i3cdev); + if (ret) { + i3c_master_free_i3c_dev(i3cdev); + goto err_detach_devs; + } + } + + /* + * Now execute the controller specific ->bus_init() routine, which + * might configure its internal logic to match the bus limitations. + */ + ret = master->ops->bus_init(master); + if (ret) + goto err_detach_devs; + + /* + * The master device should have been instantiated in ->bus_init(), + * complain if this was not the case. + */ + if (!master->this) { + dev_err(&master->dev, + "master_set_info() was not called in ->bus_init()\n"); + ret = -EINVAL; + goto err_bus_cleanup; + } + + /* + * Reset all dynamic address that may have been assigned before + * (assigned by the bootloader for example). + */ + ret = i3c_master_rstdaa_locked(master, I3C_BROADCAST_ADDR); + if (ret && ret != I3C_ERROR_M2) + goto err_bus_cleanup; + + /* Disable all slave events before starting DAA. */ + ret = i3c_master_disec_locked(master, I3C_BROADCAST_ADDR, + I3C_CCC_EVENT_SIR | I3C_CCC_EVENT_MR | + I3C_CCC_EVENT_HJ); + if (ret && ret != I3C_ERROR_M2) + goto err_bus_cleanup; + + /* + * Pre-assign dynamic address and retrieve device information if + * needed. + */ + i3c_bus_for_each_i3cdev(&master->bus, i3cdev) + i3c_master_pre_assign_dyn_addr(i3cdev); + + ret = i3c_master_do_daa(master); + if (ret) + goto err_rstdaa; + + return 0; + +err_rstdaa: + i3c_master_rstdaa_locked(master, I3C_BROADCAST_ADDR); + +err_bus_cleanup: + if (master->ops->bus_cleanup) + master->ops->bus_cleanup(master); + +err_detach_devs: + i3c_master_detach_free_devs(master); + + return ret; +} + +static void i3c_master_bus_cleanup(struct i3c_master_controller *master) +{ + if (master->ops->bus_cleanup) + master->ops->bus_cleanup(master); + + i3c_master_detach_free_devs(master); +} + +static struct i3c_dev_desc * +i3c_master_search_i3c_dev_duplicate(struct i3c_dev_desc *refdev) +{ + struct i3c_master_controller *master = refdev->common.master; + struct i3c_dev_desc *i3cdev; + + i3c_bus_for_each_i3cdev(&master->bus, i3cdev) { + if (i3cdev != refdev && i3cdev->info.pid == refdev->info.pid) + return i3cdev; + } + + return NULL; +} + +/** + * i3c_master_add_i3c_dev_locked() - add an I3C slave to the bus + * @master: master used to send frames on the bus + * @addr: I3C slave dynamic address assigned to the device + * + * This function is instantiating an I3C device object and adding it to the + * I3C device list. All device information are automatically retrieved using + * standard CCC commands. + * + * The I3C device object is returned in case the master wants to attach + * private data to it using i3c_dev_set_master_data(). + * + * This function must be called with the bus lock held in write mode. + * + * Return: a 0 in case of success, an negative error code otherwise. + */ +int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master, + u8 addr) +{ + struct i3c_device_info info = { .dyn_addr = addr }; + struct i3c_dev_desc *newdev, *olddev; + u8 old_dyn_addr = addr, expected_dyn_addr; + struct i3c_ibi_setup ibireq = { }; + bool enable_ibi = false; + int ret; + + if (!master) + return -EINVAL; + + newdev = i3c_master_alloc_i3c_dev(master, &info); + if (IS_ERR(newdev)) + return PTR_ERR(newdev); + + ret = i3c_master_attach_i3c_dev(master, newdev); + if (ret) { + ret = PTR_ERR(newdev); + goto err_free_dev; + } + + ret = i3c_master_retrieve_dev_info(newdev); + if (ret) + goto err_free_dev; + + olddev = i3c_master_search_i3c_dev_duplicate(newdev); + if (olddev) { + newdev->boardinfo = olddev->boardinfo; + newdev->info.static_addr = olddev->info.static_addr; + newdev->dev = olddev->dev; + if (newdev->dev) + newdev->dev->desc = newdev; + + /* + * We need to restore the IBI state too, so let's save the + * IBI information and try to restore them after olddev has + * been detached+released and its IBI has been stopped and + * the associated resources have been freed. + */ + mutex_lock(&olddev->ibi_lock); + if (olddev->ibi) { + ibireq.handler = olddev->ibi->handler; + ibireq.max_payload_len = olddev->ibi->max_payload_len; + ibireq.num_slots = olddev->ibi->num_slots; + + if (olddev->ibi->enabled) { + enable_ibi = true; + i3c_dev_disable_ibi_locked(olddev); + } + + i3c_dev_free_ibi_locked(olddev); + } + mutex_unlock(&olddev->ibi_lock); + + old_dyn_addr = olddev->info.dyn_addr; + + i3c_master_detach_i3c_dev(olddev); + i3c_master_free_i3c_dev(olddev); + } + + ret = i3c_master_reattach_i3c_dev(newdev, old_dyn_addr); + if (ret) + goto err_detach_dev; + + /* + * Depending on our previous state, the expected dynamic address might + * differ: + * - if the device already had a dynamic address assigned, let's try to + * re-apply this one + * - if the device did not have a dynamic address and the firmware + * requested a specific address, pick this one + * - in any other case, keep the address automatically assigned by the + * master + */ + if (old_dyn_addr && old_dyn_addr != newdev->info.dyn_addr) + expected_dyn_addr = old_dyn_addr; + else if (newdev->boardinfo && newdev->boardinfo->init_dyn_addr) + expected_dyn_addr = newdev->boardinfo->init_dyn_addr; + else + expected_dyn_addr = newdev->info.dyn_addr; + + if (newdev->info.dyn_addr != expected_dyn_addr) { + /* + * Try to apply the expected dynamic address. If it fails, keep + * the address assigned by the master. + */ + ret = i3c_master_setnewda_locked(master, + newdev->info.dyn_addr, + expected_dyn_addr); + if (!ret) { + old_dyn_addr = newdev->info.dyn_addr; + newdev->info.dyn_addr = expected_dyn_addr; + i3c_master_reattach_i3c_dev(newdev, old_dyn_addr); + } else { + dev_err(&master->dev, + "Failed to assign reserved/old address to device %d%llx", + master->bus.id, newdev->info.pid); + } + } + + /* + * Now is time to try to restore the IBI setup. If we're lucky, + * everything works as before, otherwise, all we can do is complain. + * FIXME: maybe we should add callback to inform the driver that it + * should request the IBI again instead of trying to hide that from + * him. + */ + if (ibireq.handler) { + mutex_lock(&newdev->ibi_lock); + ret = i3c_dev_request_ibi_locked(newdev, &ibireq); + if (ret) { + dev_err(&master->dev, + "Failed to request IBI on device %d-%llx", + master->bus.id, newdev->info.pid); + } else if (enable_ibi) { + ret = i3c_dev_enable_ibi_locked(newdev); + if (ret) + dev_err(&master->dev, + "Failed to re-enable IBI on device %d-%llx", + master->bus.id, newdev->info.pid); + } + mutex_unlock(&newdev->ibi_lock); + } + + return 0; + +err_detach_dev: + if (newdev->dev && newdev->dev->desc) + newdev->dev->desc = NULL; + + i3c_master_detach_i3c_dev(newdev); + +err_free_dev: + i3c_master_free_i3c_dev(newdev); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_master_add_i3c_dev_locked); + +#define OF_I3C_REG1_IS_I2C_DEV BIT(31) + +static int +of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master, + struct device_node *node, u32 *reg) +{ + struct i2c_dev_boardinfo *boardinfo; + struct device *dev = &master->dev; + int ret; + + boardinfo = devm_kzalloc(dev, sizeof(*boardinfo), GFP_KERNEL); + if (!boardinfo) + return -ENOMEM; + + ret = of_i2c_get_board_info(dev, node, &boardinfo->base); + if (ret) + return ret; + + /* LVR is encoded in reg[2]. */ + boardinfo->lvr = reg[2]; + + if (boardinfo->lvr & I3C_LVR_I2C_FM_MODE) + master->bus.scl_rate.i2c = I3C_BUS_I2C_FM_SCL_RATE; + + list_add_tail(&boardinfo->node, &master->boardinfo.i2c); + of_node_get(node); + + return 0; +} + +static int +of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, + struct device_node *node, u32 *reg) +{ + struct i3c_dev_boardinfo *boardinfo; + struct device *dev = &master->dev; + struct i3c_device_info info = { }; + enum i3c_addr_slot_status addrstatus; + u32 init_dyn_addr = 0; + + boardinfo = devm_kzalloc(dev, sizeof(*boardinfo), GFP_KERNEL); + if (!boardinfo) + return -ENOMEM; + + if (reg[0]) { + if (reg[0] > I3C_MAX_ADDR) + return -EINVAL; + + addrstatus = i3c_bus_get_addr_slot_status(&master->bus, + reg[0]); + if (addrstatus != I3C_ADDR_SLOT_FREE) + return -EINVAL; + } + + boardinfo->static_addr = reg[0]; + + if (!of_property_read_u32(node, "assigned-address", &init_dyn_addr)) { + if (init_dyn_addr > I3C_MAX_ADDR) + return -EINVAL; + + addrstatus = i3c_bus_get_addr_slot_status(&master->bus, + init_dyn_addr); + if (addrstatus != I3C_ADDR_SLOT_FREE) + return -EINVAL; + } + + boardinfo->pid = ((u64)reg[1] << 32) | reg[2]; + + if ((info.pid & GENMASK_ULL(63, 48)) || + I3C_PID_RND_LOWER_32BITS(info.pid)) + return -EINVAL; + + boardinfo->init_dyn_addr = init_dyn_addr; + boardinfo->of_node = of_node_get(node); + list_add_tail(&boardinfo->node, &master->boardinfo.i3c); + + return 0; +} + +static int of_i3c_master_add_dev(struct i3c_master_controller *master, + struct device_node *node) +{ + u32 reg[3]; + int ret; + + if (!master || !node) + return -EINVAL; + + ret = of_property_read_u32_array(node, "reg", reg, ARRAY_SIZE(reg)); + if (ret) + return ret; + + /* + * The manufacturer ID can't be 0. If reg[1] == 0 that means we're + * dealing with an I2C device. + */ + if (!reg[1]) + ret = of_i3c_master_add_i2c_boardinfo(master, node, reg); + else + ret = of_i3c_master_add_i3c_boardinfo(master, node, reg); + + return ret; +} + +static int of_populate_i3c_bus(struct i3c_master_controller *master) +{ + struct device *dev = &master->dev; + struct device_node *i3cbus_np = dev->of_node; + struct device_node *node; + int ret; + u32 val; + + if (!i3cbus_np) + return 0; + + for_each_available_child_of_node(i3cbus_np, node) { + ret = of_i3c_master_add_dev(master, node); + if (ret) + return ret; + } + + /* + * The user might want to limit I2C and I3C speed in case some devices + * on the bus are not supporting typical rates, or if the bus topology + * prevents it from using max possible rate. + */ + if (!of_property_read_u32(i3cbus_np, "i2c-scl-hz", &val)) + master->bus.scl_rate.i2c = val; + + if (!of_property_read_u32(i3cbus_np, "i3c-scl-hz", &val)) + master->bus.scl_rate.i3c = val; + + return 0; +} + +static int i3c_master_i2c_adapter_xfer(struct i2c_adapter *adap, + struct i2c_msg *xfers, int nxfers) +{ + struct i3c_master_controller *master = i2c_adapter_to_i3c_master(adap); + struct i2c_dev_desc *dev; + int i, ret; + u16 addr; + + if (!xfers || !master || nxfers <= 0) + return -EINVAL; + + if (!master->ops->i2c_xfers) + return -ENOTSUPP; + + /* Doing transfers to different devices is not supported. */ + addr = xfers[0].addr; + for (i = 1; i < nxfers; i++) { + if (addr != xfers[i].addr) + return -ENOTSUPP; + } + + i3c_bus_normaluse_lock(&master->bus); + dev = i3c_master_find_i2c_dev_by_addr(master, addr); + if (!dev) + ret = -ENOENT; + else + ret = master->ops->i2c_xfers(dev, xfers, nxfers); + i3c_bus_normaluse_unlock(&master->bus); + + return ret ? ret : nxfers; +} + +static u32 i3c_master_i2c_functionalities(struct i2c_adapter *adap) +{ + struct i3c_master_controller *master = i2c_adapter_to_i3c_master(adap); + + return master->ops->i2c_funcs(master); +} + +static const struct i2c_algorithm i3c_master_i2c_algo = { + .master_xfer = i3c_master_i2c_adapter_xfer, + .functionality = i3c_master_i2c_functionalities, +}; + +static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master) +{ + struct i2c_adapter *adap = i3c_master_to_i2c_adapter(master); + struct i2c_dev_desc *i2cdev; + int ret; + + adap->dev.parent = master->dev.parent; + adap->owner = master->dev.parent->driver->owner; + adap->algo = &i3c_master_i2c_algo; + strncpy(adap->name, dev_name(master->dev.parent), sizeof(adap->name)); + + /* FIXME: Should we allow i3c masters to override these values? */ + adap->timeout = 1000; + adap->retries = 3; + + ret = i2c_add_adapter(adap); + if (ret) + return ret; + + /* + * We silently ignore failures here. The bus should keep working + * correctly even if one or more i2c devices are not registered. + */ + i3c_bus_for_each_i2cdev(&master->bus, i2cdev) + i2cdev->dev = i2c_new_device(adap, &i2cdev->boardinfo->base); + + return 0; +} + +static void i3c_master_i2c_adapter_cleanup(struct i3c_master_controller *master) +{ + struct i2c_dev_desc *i2cdev; + + i2c_del_adapter(&master->i2c); + + i3c_bus_for_each_i2cdev(&master->bus, i2cdev) + i2cdev->dev = NULL; +} + +static void i3c_master_unregister_i3c_devs(struct i3c_master_controller *master) +{ + struct i3c_dev_desc *i3cdev; + + i3c_bus_for_each_i3cdev(&master->bus, i3cdev) { + if (!i3cdev->dev) + continue; + + i3cdev->dev->desc = NULL; + if (device_is_registered(&i3cdev->dev->dev)) + device_unregister(&i3cdev->dev->dev); + else + put_device(&i3cdev->dev->dev); + i3cdev->dev = NULL; + } +} + +/** + * i3c_master_queue_ibi() - Queue an IBI + * @dev: the device this IBI is coming from + * @slot: the IBI slot used to store the payload + * + * Queue an IBI to the controller workqueue. The IBI handler attached to + * the dev will be called from a workqueue context. + */ +void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot) +{ + atomic_inc(&dev->ibi->pending_ibis); + queue_work(dev->common.master->wq, &slot->work); +} +EXPORT_SYMBOL_GPL(i3c_master_queue_ibi); + +static void i3c_master_handle_ibi(struct work_struct *work) +{ + struct i3c_ibi_slot *slot = container_of(work, struct i3c_ibi_slot, + work); + struct i3c_dev_desc *dev = slot->dev; + struct i3c_master_controller *master = i3c_dev_get_master(dev); + struct i3c_ibi_payload payload; + + payload.data = slot->data; + payload.len = slot->len; + + if (dev->dev) + dev->ibi->handler(dev->dev, &payload); + + master->ops->recycle_ibi_slot(dev, slot); + if (atomic_dec_and_test(&dev->ibi->pending_ibis)) + complete(&dev->ibi->all_ibis_handled); +} + +static void i3c_master_init_ibi_slot(struct i3c_dev_desc *dev, + struct i3c_ibi_slot *slot) +{ + slot->dev = dev; + INIT_WORK(&slot->work, i3c_master_handle_ibi); +} + +struct i3c_generic_ibi_slot { + struct list_head node; + struct i3c_ibi_slot base; +}; + +struct i3c_generic_ibi_pool { + spinlock_t lock; + unsigned int num_slots; + struct i3c_generic_ibi_slot *slots; + void *payload_buf; + struct list_head free_slots; + struct list_head pending; +}; + +/** + * i3c_generic_ibi_free_pool() - Free a generic IBI pool + * @pool: the IBI pool to free + * + * Free all IBI slots allated by a generic IBI pool. + */ +void i3c_generic_ibi_free_pool(struct i3c_generic_ibi_pool *pool) +{ + struct i3c_generic_ibi_slot *slot; + unsigned int nslots = 0; + + while (!list_empty(&pool->free_slots)) { + slot = list_first_entry(&pool->free_slots, + struct i3c_generic_ibi_slot, node); + list_del(&slot->node); + nslots++; + } + + /* + * If the number of freed slots is not equal to the number of allocated + * slots we have a leak somewhere. + */ + WARN_ON(nslots != pool->num_slots); + + kfree(pool->payload_buf); + kfree(pool->slots); + kfree(pool); +} +EXPORT_SYMBOL_GPL(i3c_generic_ibi_free_pool); + +/** + * i3c_generic_ibi_alloc_pool() - Create a generic IBI pool + * @dev: the device this pool will be used for + * @req: IBI setup request describing what the device driver expects + * + * Create a generic IBI pool based on the information provided in @req. + * + * Return: a valid IBI pool in case of success, an ERR_PTR() otherwise. + */ +struct i3c_generic_ibi_pool * +i3c_generic_ibi_alloc_pool(struct i3c_dev_desc *dev, + const struct i3c_ibi_setup *req) +{ + struct i3c_generic_ibi_pool *pool; + struct i3c_generic_ibi_slot *slot; + unsigned int i; + int ret; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&pool->lock); + INIT_LIST_HEAD(&pool->free_slots); + INIT_LIST_HEAD(&pool->pending); + + pool->slots = kcalloc(req->num_slots, sizeof(*slot), GFP_KERNEL); + if (!pool->slots) { + ret = -ENOMEM; + goto err_free_pool; + } + + if (req->max_payload_len) { + pool->payload_buf = kcalloc(req->num_slots, + req->max_payload_len, GFP_KERNEL); + if (!pool->payload_buf) { + ret = -ENOMEM; + goto err_free_pool; + } + } + + for (i = 0; i < req->num_slots; i++) { + slot = &pool->slots[i]; + i3c_master_init_ibi_slot(dev, &slot->base); + + if (req->max_payload_len) + slot->base.data = pool->payload_buf + + (i * req->max_payload_len); + + list_add_tail(&slot->node, &pool->free_slots); + pool->num_slots++; + } + + return pool; + +err_free_pool: + i3c_generic_ibi_free_pool(pool); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(i3c_generic_ibi_alloc_pool); + +/** + * i3c_generic_ibi_get_free_slot() - Get a free slot from a generic IBI pool + * @pool: the pool to query an IBI slot on + * + * Search for a free slot in a generic IBI pool. + * The slot should be returned to the pool using i3c_generic_ibi_recycle_slot() + * when it's no longer needed. + * + * Return: a pointer to a free slot, or NULL if there's no free slot available. + */ +struct i3c_ibi_slot * +i3c_generic_ibi_get_free_slot(struct i3c_generic_ibi_pool *pool) +{ + struct i3c_generic_ibi_slot *slot; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + slot = list_first_entry_or_null(&pool->free_slots, + struct i3c_generic_ibi_slot, node); + if (slot) + list_del(&slot->node); + spin_unlock_irqrestore(&pool->lock, flags); + + return slot ? &slot->base : NULL; +} +EXPORT_SYMBOL_GPL(i3c_generic_ibi_get_free_slot); + +/** + * i3c_generic_ibi_recycle_slot() - Return a slot to a generic IBI pool + * @pool: the pool to return the IBI slot to + * @s: IBI slot to recycle + * + * Add an IBI slot back to its generic IBI pool. Should be called from the + * master driver struct_master_controller_ops->recycle_ibi() method. + */ +void i3c_generic_ibi_recycle_slot(struct i3c_generic_ibi_pool *pool, + struct i3c_ibi_slot *s) +{ + struct i3c_generic_ibi_slot *slot; + unsigned long flags; + + if (!s) + return; + + slot = container_of(s, struct i3c_generic_ibi_slot, base); + spin_lock_irqsave(&pool->lock, flags); + list_add_tail(&slot->node, &pool->free_slots); + spin_unlock_irqrestore(&pool->lock, flags); +} +EXPORT_SYMBOL_GPL(i3c_generic_ibi_recycle_slot); + +static int i3c_master_check_ops(const struct i3c_master_controller_ops *ops) +{ + if (!ops || !ops->bus_init || !ops->priv_xfers || + !ops->send_ccc_cmd || !ops->do_daa || !ops->i2c_xfers || + !ops->i2c_funcs) + return -EINVAL; + + if (ops->request_ibi && + (!ops->enable_ibi || !ops->disable_ibi || !ops->free_ibi || + !ops->recycle_ibi_slot)) + return -EINVAL; + + return 0; +} + +/** + * i3c_master_register() - register an I3C master + * @master: master used to send frames on the bus + * @parent: the parent device (the one that provides this I3C master + * controller) + * @ops: the master controller operations + * @secondary: true if you are registering a secondary master. Will return + * -ENOTSUPP if set to true since secondary masters are not yet + * supported + * + * This function takes care of everything for you: + * + * - creates and initializes the I3C bus + * - populates the bus with static I2C devs if @parent->of_node is not + * NULL + * - registers all I3C devices added by the controller during bus + * initialization + * - registers the I2C adapter and all I2C devices + * + * Return: 0 in case of success, a negative error code otherwise. + */ +int i3c_master_register(struct i3c_master_controller *master, + struct device *parent, + const struct i3c_master_controller_ops *ops, + bool secondary) +{ + struct i3c_bus *i3cbus = i3c_master_get_bus(master); + enum i3c_bus_mode mode = I3C_BUS_MODE_PURE; + struct i2c_dev_boardinfo *i2cbi; + int ret; + + /* We do not support secondary masters yet. */ + if (secondary) + return -ENOTSUPP; + + ret = i3c_master_check_ops(ops); + if (ret) + return ret; + + master->dev.parent = parent; + master->dev.of_node = of_node_get(parent->of_node); + master->dev.bus = &i3c_bus_type; + master->dev.type = &i3c_masterdev_type; + master->dev.release = i3c_masterdev_release; + master->ops = ops; + master->secondary = secondary; + INIT_LIST_HEAD(&master->boardinfo.i2c); + INIT_LIST_HEAD(&master->boardinfo.i3c); + + ret = i3c_bus_init(i3cbus); + if (ret) + return ret; + + device_initialize(&master->dev); + dev_set_name(&master->dev, "i3c-%d", i3cbus->id); + + ret = of_populate_i3c_bus(master); + if (ret) + goto err_put_dev; + + list_for_each_entry(i2cbi, &master->boardinfo.i2c, node) { + switch (i2cbi->lvr & I3C_LVR_I2C_INDEX_MASK) { + case I3C_LVR_I2C_INDEX(0): + if (mode < I3C_BUS_MODE_MIXED_FAST) + mode = I3C_BUS_MODE_MIXED_FAST; + break; + case I3C_LVR_I2C_INDEX(1): + case I3C_LVR_I2C_INDEX(2): + if (mode < I3C_BUS_MODE_MIXED_SLOW) + mode = I3C_BUS_MODE_MIXED_SLOW; + break; + default: + ret = -EINVAL; + goto err_put_dev; + } + } + + ret = i3c_bus_set_mode(i3cbus, mode); + if (ret) + goto err_put_dev; + + master->wq = alloc_workqueue("%s", 0, 0, dev_name(parent)); + if (!master->wq) { + ret = -ENOMEM; + goto err_put_dev; + } + + ret = i3c_master_bus_init(master); + if (ret) + goto err_put_dev; + + ret = device_add(&master->dev); + if (ret) + goto err_cleanup_bus; + + /* + * Expose our I3C bus as an I2C adapter so that I2C devices are exposed + * through the I2C subsystem. + */ + ret = i3c_master_i2c_adapter_init(master); + if (ret) + goto err_del_dev; + + /* + * We're done initializing the bus and the controller, we can now + * register I3C devices dicovered during the initial DAA. + */ + master->init_done = true; + i3c_bus_normaluse_lock(&master->bus); + i3c_master_register_new_i3c_devs(master); + i3c_bus_normaluse_unlock(&master->bus); + + return 0; + +err_del_dev: + device_del(&master->dev); + +err_cleanup_bus: + i3c_master_bus_cleanup(master); + +err_put_dev: + put_device(&master->dev); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_master_register); + +/** + * i3c_master_unregister() - unregister an I3C master + * @master: master used to send frames on the bus + * + * Basically undo everything done in i3c_master_register(). + * + * Return: 0 in case of success, a negative error code otherwise. + */ +int i3c_master_unregister(struct i3c_master_controller *master) +{ + i3c_master_i2c_adapter_cleanup(master); + i3c_master_unregister_i3c_devs(master); + i3c_master_bus_cleanup(master); + device_unregister(&master->dev); + + return 0; +} +EXPORT_SYMBOL_GPL(i3c_master_unregister); + +int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, + struct i3c_priv_xfer *xfers, + int nxfers) +{ + struct i3c_master_controller *master; + + if (!dev) + return -ENOENT; + + master = i3c_dev_get_master(dev); + if (!master || !xfers) + return -EINVAL; + + if (!master->ops->priv_xfers) + return -ENOTSUPP; + + return master->ops->priv_xfers(dev, xfers, nxfers); +} + +int i3c_dev_disable_ibi_locked(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master; + int ret; + + if (!dev->ibi) + return -EINVAL; + + master = i3c_dev_get_master(dev); + ret = master->ops->disable_ibi(dev); + if (ret) + return ret; + + reinit_completion(&dev->ibi->all_ibis_handled); + if (atomic_read(&dev->ibi->pending_ibis)) + wait_for_completion(&dev->ibi->all_ibis_handled); + + dev->ibi->enabled = false; + + return 0; +} + +int i3c_dev_enable_ibi_locked(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + int ret; + + if (!dev->ibi) + return -EINVAL; + + ret = master->ops->enable_ibi(dev); + if (!ret) + dev->ibi->enabled = true; + + return ret; +} + +int i3c_dev_request_ibi_locked(struct i3c_dev_desc *dev, + const struct i3c_ibi_setup *req) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + struct i3c_device_ibi_info *ibi; + int ret; + + if (!master->ops->request_ibi) + return -ENOTSUPP; + + if (dev->ibi) + return -EBUSY; + + ibi = kzalloc(sizeof(*ibi), GFP_KERNEL); + if (!ibi) + return -ENOMEM; + + atomic_set(&ibi->pending_ibis, 0); + init_completion(&ibi->all_ibis_handled); + ibi->handler = req->handler; + ibi->max_payload_len = req->max_payload_len; + ibi->num_slots = req->num_slots; + + dev->ibi = ibi; + ret = master->ops->request_ibi(dev, req); + if (ret) { + kfree(ibi); + dev->ibi = NULL; + } + + return ret; +} + +void i3c_dev_free_ibi_locked(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master = i3c_dev_get_master(dev); + + if (!dev->ibi) + return; + + if (WARN_ON(dev->ibi->enabled)) + WARN_ON(i3c_dev_disable_ibi_locked(dev)); + + master->ops->free_ibi(dev); + kfree(dev->ibi); + dev->ibi = NULL; +} + +static int __init i3c_init(void) +{ + return bus_register(&i3c_bus_type); +} +subsys_initcall(i3c_init); + +static void __exit i3c_exit(void) +{ + idr_destroy(&i3c_bus_idr); + bus_unregister(&i3c_bus_type); +} +module_exit(i3c_exit); + +MODULE_AUTHOR("Boris Brezillon "); +MODULE_DESCRIPTION("I3C core"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/i3c/master/Kconfig b/drivers/i3c/master/Kconfig new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/drivers/i3c/master/Makefile b/drivers/i3c/master/Makefile new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/include/linux/i3c/ccc.h b/include/linux/i3c/ccc.h new file mode 100644 index 000000000000..73b0982cc519 --- /dev/null +++ b/include/linux/i3c/ccc.h @@ -0,0 +1,385 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Cadence Design Systems Inc. + * + * Author: Boris Brezillon + */ + +#ifndef I3C_CCC_H +#define I3C_CCC_H + +#include +#include + +/* I3C CCC (Common Command Codes) related definitions */ +#define I3C_CCC_DIRECT BIT(7) + +#define I3C_CCC_ID(id, broadcast) \ + ((id) | ((broadcast) ? 0 : I3C_CCC_DIRECT)) + +/* Commands valid in both broadcast and unicast modes */ +#define I3C_CCC_ENEC(broadcast) I3C_CCC_ID(0x0, broadcast) +#define I3C_CCC_DISEC(broadcast) I3C_CCC_ID(0x1, broadcast) +#define I3C_CCC_ENTAS(as, broadcast) I3C_CCC_ID(0x2 + (as), broadcast) +#define I3C_CCC_RSTDAA(broadcast) I3C_CCC_ID(0x6, broadcast) +#define I3C_CCC_SETMWL(broadcast) I3C_CCC_ID(0x9, broadcast) +#define I3C_CCC_SETMRL(broadcast) I3C_CCC_ID(0xa, broadcast) +#define I3C_CCC_SETXTIME(broadcast) ((broadcast) ? 0x28 : 0x98) +#define I3C_CCC_VENDOR(id, broadcast) ((id) + ((broadcast) ? 0x61 : 0xe0)) + +/* Broadcast-only commands */ +#define I3C_CCC_ENTDAA I3C_CCC_ID(0x7, true) +#define I3C_CCC_DEFSLVS I3C_CCC_ID(0x8, true) +#define I3C_CCC_ENTTM I3C_CCC_ID(0xb, true) +#define I3C_CCC_ENTHDR(x) I3C_CCC_ID(0x20 + (x), true) + +/* Unicast-only commands */ +#define I3C_CCC_SETDASA I3C_CCC_ID(0x7, false) +#define I3C_CCC_SETNEWDA I3C_CCC_ID(0x8, false) +#define I3C_CCC_GETMWL I3C_CCC_ID(0xb, false) +#define I3C_CCC_GETMRL I3C_CCC_ID(0xc, false) +#define I3C_CCC_GETPID I3C_CCC_ID(0xd, false) +#define I3C_CCC_GETBCR I3C_CCC_ID(0xe, false) +#define I3C_CCC_GETDCR I3C_CCC_ID(0xf, false) +#define I3C_CCC_GETSTATUS I3C_CCC_ID(0x10, false) +#define I3C_CCC_GETACCMST I3C_CCC_ID(0x11, false) +#define I3C_CCC_SETBRGTGT I3C_CCC_ID(0x13, false) +#define I3C_CCC_GETMXDS I3C_CCC_ID(0x14, false) +#define I3C_CCC_GETHDRCAP I3C_CCC_ID(0x15, false) +#define I3C_CCC_GETXTIME I3C_CCC_ID(0x19, false) + +#define I3C_CCC_EVENT_SIR BIT(0) +#define I3C_CCC_EVENT_MR BIT(1) +#define I3C_CCC_EVENT_HJ BIT(3) + +/** + * struct i3c_ccc_events - payload passed to ENEC/DISEC CCC + * + * @events: bitmask of I3C_CCC_EVENT_xxx events. + * + * Depending on the CCC command, the specific events coming from all devices + * (broadcast version) or a specific device (unicast version) will be + * enabled (ENEC) or disabled (DISEC). + */ +struct i3c_ccc_events { + u8 events; +}; + +/** + * struct i3c_ccc_mwl - payload passed to SETMWL/GETMWL CCC + * + * @len: maximum write length in bytes + * + * The maximum write length is only applicable to SDR private messages or + * extended Write CCCs (like SETXTIME). + */ +struct i3c_ccc_mwl { + __be16 len; +}; + +/** + * struct i3c_ccc_mrl - payload passed to SETMRL/GETMRL CCC + * + * @len: maximum read length in bytes + * @ibi_len: maximum IBI payload length + * + * The maximum read length is only applicable to SDR private messages or + * extended Read CCCs (like GETXTIME). + * The IBI length is only valid if the I3C slave is IBI capable + * (%I3C_BCR_IBI_REQ_CAP is set). + */ +struct i3c_ccc_mrl { + __be16 read_len; + u8 ibi_len; +} __packed; + +/** + * struct i3c_ccc_dev_desc - I3C/I2C device descriptor used for DEFSLVS + * + * @dyn_addr: dynamic address assigned to the I3C slave or 0 if the entry is + * describing an I2C slave. + * @dcr: DCR value (not applicable to entries describing I2C devices) + * @lvr: LVR value (not applicable to entries describing I3C devices) + * @bcr: BCR value or 0 if this entry is describing an I2C slave + * @static_addr: static address or 0 if the device does not have a static + * address + * + * The DEFSLVS command should be passed an array of i3c_ccc_dev_desc + * descriptors (one entry per I3C/I2C dev controlled by the master). + */ +struct i3c_ccc_dev_desc { + u8 dyn_addr; + union { + u8 dcr; + u8 lvr; + }; + u8 bcr; + u8 static_addr; +}; + +/** + * struct i3c_ccc_defslvs - payload passed to DEFSLVS CCC + * + * @count: number of dev descriptors + * @master: descriptor describing the current master + * @slaves: array of descriptors describing slaves controlled by the + * current master + * + * Information passed to the broadcast DEFSLVS to propagate device + * information to all masters currently acting as slaves on the bus. + * This is only meaningful if you have more than one master. + */ +struct i3c_ccc_defslvs { + u8 count; + struct i3c_ccc_dev_desc master; + struct i3c_ccc_dev_desc slaves[0]; +} __packed; + +/** + * enum i3c_ccc_test_mode - enum listing all available test modes + * + * @I3C_CCC_EXIT_TEST_MODE: exit test mode + * @I3C_CCC_VENDOR_TEST_MODE: enter vendor test mode + */ +enum i3c_ccc_test_mode { + I3C_CCC_EXIT_TEST_MODE, + I3C_CCC_VENDOR_TEST_MODE, +}; + +/** + * struct i3c_ccc_enttm - payload passed to ENTTM CCC + * + * @mode: one of the &enum i3c_ccc_test_mode modes + * + * Information passed to the ENTTM CCC to instruct an I3C device to enter a + * specific test mode. + */ +struct i3c_ccc_enttm { + u8 mode; +}; + +/** + * struct i3c_ccc_setda - payload passed to SETNEWDA and SETDASA CCCs + * + * @addr: dynamic address to assign to an I3C device + * + * Information passed to the SETNEWDA and SETDASA CCCs to assign/change the + * dynamic address of an I3C device. + */ +struct i3c_ccc_setda { + u8 addr; +}; + +/** + * struct i3c_ccc_getpid - payload passed to GETPID CCC + * + * @pid: 48 bits PID in big endian + */ +struct i3c_ccc_getpid { + u8 pid[6]; +}; + +/** + * struct i3c_ccc_getbcr - payload passed to GETBCR CCC + * + * @bcr: BCR (Bus Characteristic Register) value + */ +struct i3c_ccc_getbcr { + u8 bcr; +}; + +/** + * struct i3c_ccc_getdcr - payload passed to GETDCR CCC + * + * @dcr: DCR (Device Characteristic Register) value + */ +struct i3c_ccc_getdcr { + u8 dcr; +}; + +#define I3C_CCC_STATUS_PENDING_INT(status) ((status) & GENMASK(3, 0)) +#define I3C_CCC_STATUS_PROTOCOL_ERROR BIT(5) +#define I3C_CCC_STATUS_ACTIVITY_MODE(status) \ + (((status) & GENMASK(7, 6)) >> 6) + +/** + * struct i3c_ccc_getstatus - payload passed to GETSTATUS CCC + * + * @status: status of the I3C slave (see I3C_CCC_STATUS_xxx macros for more + * information). + */ +struct i3c_ccc_getstatus { + __be16 status; +}; + +/** + * struct i3c_ccc_getaccmst - payload passed to GETACCMST CCC + * + * @newmaster: address of the master taking bus ownership + */ +struct i3c_ccc_getaccmst { + u8 newmaster; +}; + +/** + * struct i3c_ccc_bridged_slave_desc - bridged slave descriptor + * + * @addr: dynamic address of the bridged device + * @id: ID of the slave device behind the bridge + */ +struct i3c_ccc_bridged_slave_desc { + u8 addr; + __be16 id; +} __packed; + +/** + * struct i3c_ccc_setbrgtgt - payload passed to SETBRGTGT CCC + * + * @count: number of bridged slaves + * @bslaves: bridged slave descriptors + */ +struct i3c_ccc_setbrgtgt { + u8 count; + struct i3c_ccc_bridged_slave_desc bslaves[0]; +} __packed; + +/** + * enum i3c_sdr_max_data_rate - max data rate values for private SDR transfers + */ +enum i3c_sdr_max_data_rate { + I3C_SDR0_FSCL_MAX, + I3C_SDR1_FSCL_8MHZ, + I3C_SDR2_FSCL_6MHZ, + I3C_SDR3_FSCL_4MHZ, + I3C_SDR4_FSCL_2MHZ, +}; + +/** + * enum i3c_tsco - clock to data turn-around + */ +enum i3c_tsco { + I3C_TSCO_8NS, + I3C_TSCO_9NS, + I3C_TSCO_10NS, + I3C_TSCO_11NS, + I3C_TSCO_12NS, +}; + +#define I3C_CCC_MAX_SDR_FSCL_MASK GENMASK(2, 0) +#define I3C_CCC_MAX_SDR_FSCL(x) ((x) & I3C_CCC_MAX_SDR_FSCL_MASK) + +/** + * struct i3c_ccc_getmxds - payload passed to GETMXDS CCC + * + * @maxwr: write limitations + * @maxrd: read limitations + * @maxrdturn: maximum read turn-around expressed micro-seconds and + * little-endian formatted + */ +struct i3c_ccc_getmxds { + u8 maxwr; + u8 maxrd; + u8 maxrdturn[3]; +} __packed; + +#define I3C_CCC_HDR_MODE(mode) BIT(mode) + +/** + * struct i3c_ccc_gethdrcap - payload passed to GETHDRCAP CCC + * + * @modes: bitmap of supported HDR modes + */ +struct i3c_ccc_gethdrcap { + u8 modes; +} __packed; + +/** + * enum i3c_ccc_setxtime_subcmd - SETXTIME sub-commands + */ +enum i3c_ccc_setxtime_subcmd { + I3C_CCC_SETXTIME_ST = 0x7f, + I3C_CCC_SETXTIME_DT = 0xbf, + I3C_CCC_SETXTIME_ENTER_ASYNC_MODE0 = 0xdf, + I3C_CCC_SETXTIME_ENTER_ASYNC_MODE1 = 0xef, + I3C_CCC_SETXTIME_ENTER_ASYNC_MODE2 = 0xf7, + I3C_CCC_SETXTIME_ENTER_ASYNC_MODE3 = 0xfb, + I3C_CCC_SETXTIME_ASYNC_TRIGGER = 0xfd, + I3C_CCC_SETXTIME_TPH = 0x3f, + I3C_CCC_SETXTIME_TU = 0x9f, + I3C_CCC_SETXTIME_ODR = 0x8f, +}; + +/** + * struct i3c_ccc_setxtime - payload passed to SETXTIME CCC + * + * @subcmd: one of the sub-commands ddefined in &enum i3c_ccc_setxtime_subcmd + * @data: sub-command payload. Amount of data is determined by + * &i3c_ccc_setxtime->subcmd + */ +struct i3c_ccc_setxtime { + u8 subcmd; + u8 data[0]; +} __packed; + +#define I3C_CCC_GETXTIME_SYNC_MODE BIT(0) +#define I3C_CCC_GETXTIME_ASYNC_MODE(x) BIT((x) + 1) +#define I3C_CCC_GETXTIME_OVERFLOW BIT(7) + +/** + * struct i3c_ccc_getxtime - payload retrieved from GETXTIME CCC + * + * @supported_modes: bitmap describing supported XTIME modes + * @state: current status (enabled mode and overflow status) + * @frequency: slave's internal oscillator frequency in 500KHz steps + * @inaccuracy: slave's internal oscillator inaccuracy in 0.1% steps + */ +struct i3c_ccc_getxtime { + u8 supported_modes; + u8 state; + u8 frequency; + u8 inaccuracy; +} __packed; + +/** + * struct i3c_ccc_cmd_payload - CCC payload + * + * @len: payload length + * @data: payload data. This buffer must be DMA-able + */ +struct i3c_ccc_cmd_payload { + u16 len; + void *data; +}; + +/** + * struct i3c_ccc_cmd_dest - CCC command destination + * + * @addr: can be an I3C device address or the broadcast address if this is a + * broadcast CCC + * @payload: payload to be sent to this device or broadcasted + */ +struct i3c_ccc_cmd_dest { + u8 addr; + struct i3c_ccc_cmd_payload payload; +}; + +/** + * struct i3c_ccc_cmd - CCC command + * + * @rnw: true if the CCC should retrieve data from the device. Only valid for + * unicast commands + * @id: CCC command id + * @ndests: number of destinations. Should always be one for broadcast commands + * @dests: array of destinations and associated payload for this CCC. Most of + * the time, only one destination is provided + * @err: I3C error code + */ +struct i3c_ccc_cmd { + u8 rnw; + u8 id; + unsigned int ndests; + struct i3c_ccc_cmd_dest *dests; + enum i3c_error_code err; +}; + +#endif /* I3C_CCC_H */ diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h new file mode 100644 index 000000000000..5ecb055fd375 --- /dev/null +++ b/include/linux/i3c/device.h @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Cadence Design Systems Inc. + * + * Author: Boris Brezillon + */ + +#ifndef I3C_DEV_H +#define I3C_DEV_H + +#include +#include +#include +#include +#include +#include + +/** + * enum i3c_error_code - I3C error codes + * + * These are the standard error codes as defined by the I3C specification. + * When -EIO is returned by the i3c_device_do_priv_xfers() or + * i3c_device_send_hdr_cmds() one can check the error code in + * &struct_i3c_priv_xfer.err or &struct i3c_hdr_cmd.err to get a better idea of + * what went wrong. + * + * @I3C_ERROR_UNKNOWN: unknown error, usually means the error is not I3C + * related + * @I3C_ERROR_M0: M0 error + * @I3C_ERROR_M1: M1 error + * @I3C_ERROR_M2: M2 error + */ +enum i3c_error_code { + I3C_ERROR_UNKNOWN = 0, + I3C_ERROR_M0 = 1, + I3C_ERROR_M1, + I3C_ERROR_M2, +}; + +/** + * enum i3c_hdr_mode - HDR mode ids + * @I3C_HDR_DDR: DDR mode + * @I3C_HDR_TSP: TSP mode + * @I3C_HDR_TSL: TSL mode + */ +enum i3c_hdr_mode { + I3C_HDR_DDR, + I3C_HDR_TSP, + I3C_HDR_TSL, +}; + +/** + * struct i3c_priv_xfer - I3C SDR private transfer + * @rnw: encodes the transfer direction. true for a read, false for a write + * @len: transfer length in bytes of the transfer + * @data: input/output buffer + * @data.in: input buffer. Must point to a DMA-able buffer + * @data.out: output buffer. Must point to a DMA-able buffer + * @err: I3C error code + */ +struct i3c_priv_xfer { + u8 rnw; + u16 len; + union { + void *in; + const void *out; + } data; + enum i3c_error_code err; +}; + +/** + * enum i3c_dcr - I3C DCR values + * @I3C_DCR_GENERIC_DEVICE: generic I3C device + */ +enum i3c_dcr { + I3C_DCR_GENERIC_DEVICE = 0, +}; + +#define I3C_PID_MANUF_ID(pid) (((pid) & GENMASK_ULL(47, 33)) >> 33) +#define I3C_PID_RND_LOWER_32BITS(pid) (!!((pid) & BIT_ULL(32))) +#define I3C_PID_RND_VAL(pid) ((pid) & GENMASK_ULL(31, 0)) +#define I3C_PID_PART_ID(pid) (((pid) & GENMASK_ULL(31, 16)) >> 16) +#define I3C_PID_INSTANCE_ID(pid) (((pid) & GENMASK_ULL(15, 12)) >> 12) +#define I3C_PID_EXTRA_INFO(pid) ((pid) & GENMASK_ULL(11, 0)) + +#define I3C_BCR_DEVICE_ROLE(bcr) ((bcr) & GENMASK(7, 6)) +#define I3C_BCR_I3C_SLAVE (0 << 6) +#define I3C_BCR_I3C_MASTER (1 << 6) +#define I3C_BCR_HDR_CAP BIT(5) +#define I3C_BCR_BRIDGE BIT(4) +#define I3C_BCR_OFFLINE_CAP BIT(3) +#define I3C_BCR_IBI_PAYLOAD BIT(2) +#define I3C_BCR_IBI_REQ_CAP BIT(1) +#define I3C_BCR_MAX_DATA_SPEED_LIM BIT(0) + +/** + * struct i3c_device_info - I3C device information + * @pid: Provisional ID + * @bcr: Bus Characteristic Register + * @dcr: Device Characteristic Register + * @static_addr: static/I2C address + * @dyn_addr: dynamic address + * @hdr_cap: supported HDR modes + * @max_read_ds: max read speed information + * @max_write_ds: max write speed information + * @max_ibi_len: max IBI payload length + * @max_read_turnaround: max read turn-around time in micro-seconds + * @max_read_len: max private SDR read length in bytes + * @max_write_len: max private SDR write length in bytes + * + * These are all basic information that should be advertised by an I3C device. + * Some of them are optional depending on the device type and device + * capabilities. + * For each I3C slave attached to a master with + * i3c_master_add_i3c_dev_locked(), the core will send the relevant CCC command + * to retrieve these data. + */ +struct i3c_device_info { + u64 pid; + u8 bcr; + u8 dcr; + u8 static_addr; + u8 dyn_addr; + u8 hdr_cap; + u8 max_read_ds; + u8 max_write_ds; + u8 max_ibi_len; + u32 max_read_turnaround; + u16 max_read_len; + u16 max_write_len; +}; + +/* + * I3C device internals are kept hidden from I3C device users. It's just + * simpler to refactor things when everything goes through getter/setters, and + * I3C device drivers should not have to worry about internal representation + * anyway. + */ +struct i3c_device; + +/* These macros should be used to i3c_device_id entries. */ +#define I3C_MATCH_MANUF_AND_PART (I3C_MATCH_MANUF | I3C_MATCH_PART) + +#define I3C_DEVICE(_manufid, _partid, _drvdata) \ + { \ + .match_flags = I3C_MATCH_MANUF_AND_PART, \ + .manuf_id = _manufid, \ + .part_id = _partid, \ + .data = _drvdata, \ + } + +#define I3C_DEVICE_EXTRA_INFO(_manufid, _partid, _info, _drvdata) \ + { \ + .match_flags = I3C_MATCH_MANUF_AND_PART | \ + I3C_MATCH_EXTRA_INFO, \ + .manuf_id = _manufid, \ + .part_id = _partid, \ + .extra_info = _info, \ + .data = _drvdata, \ + } + +#define I3C_CLASS(_dcr, _drvdata) \ + { \ + .match_flags = I3C_MATCH_DCR, \ + .dcr = _dcr, \ + } + +/** + * struct i3c_driver - I3C device driver + * @driver: inherit from device_driver + * @probe: I3C device probe method + * @remove: I3C device remove method + * @id_table: I3C device match table. Will be used by the framework to decide + * which device to bind to this driver + */ +struct i3c_driver { + struct device_driver driver; + int (*probe)(struct i3c_device *dev); + int (*remove)(struct i3c_device *dev); + const struct i3c_device_id *id_table; +}; + +static inline struct i3c_driver *drv_to_i3cdrv(struct device_driver *drv) +{ + return container_of(drv, struct i3c_driver, driver); +} + +struct device *i3cdev_to_dev(struct i3c_device *i3cdev); +struct i3c_device *dev_to_i3cdev(struct device *dev); + +static inline void i3cdev_set_drvdata(struct i3c_device *i3cdev, + void *data) +{ + struct device *dev = i3cdev_to_dev(i3cdev); + + dev_set_drvdata(dev, data); +} + +static inline void *i3cdev_get_drvdata(struct i3c_device *i3cdev) +{ + struct device *dev = i3cdev_to_dev(i3cdev); + + return dev_get_drvdata(dev); +} + +int i3c_driver_register_with_owner(struct i3c_driver *drv, + struct module *owner); +void i3c_driver_unregister(struct i3c_driver *drv); + +#define i3c_driver_register(__drv) \ + i3c_driver_register_with_owner(__drv, THIS_MODULE) + +/** + * module_i3c_driver() - Register a module providing an I3C driver + * @__drv: the I3C driver to register + * + * Provide generic init/exit functions that simply register/unregister an I3C + * driver. + * Should be used by any driver that does not require extra init/cleanup steps. + */ +#define module_i3c_driver(__drv) \ + module_driver(__drv, i3c_driver_register, i3c_driver_unregister) + +/** + * i3c_i2c_driver_register() - Register an i2c and an i3c driver + * @i3cdrv: the I3C driver to register + * @i2cdrv: the I2C driver to register + * + * This function registers both @i2cdev and @i3cdev, and fails if one of these + * registrations fails. This is mainly useful for devices that support both I2C + * and I3C modes. + * Note that when CONFIG_I3C is not enabled, this function only registers the + * I2C driver. + * + * Return: 0 if both registrations succeeds, a negative error code otherwise. + */ +static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, + struct i2c_driver *i2cdrv) +{ + int ret; + + ret = i2c_add_driver(i2cdrv); + if (ret || !IS_ENABLED(CONFIG_I3C)) + return ret; + + ret = i3c_driver_register(i3cdrv); + if (ret) + i2c_del_driver(i2cdrv); + + return ret; +} + +/** + * i3c_i2c_driver_unregister() - Unregister an i2c and an i3c driver + * @i3cdrv: the I3C driver to register + * @i2cdrv: the I2C driver to register + * + * This function unregisters both @i3cdrv and @i2cdrv. + * Note that when CONFIG_I3C is not enabled, this function only unregisters the + * @i2cdrv. + */ +static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, + struct i2c_driver *i2cdrv) +{ + if (IS_ENABLED(CONFIG_I3C)) + i3c_driver_unregister(i3cdrv); + + i2c_del_driver(i2cdrv); +} + +/** + * module_i3c_i2c_driver() - Register a module providing an I3C and an I2C + * driver + * @__i3cdrv: the I3C driver to register + * @__i2cdrv: the I3C driver to register + * + * Provide generic init/exit functions that simply register/unregister an I3C + * and an I2C driver. + * This macro can be used even if CONFIG_I3C is disabled, in this case, only + * the I2C driver will be registered. + * Should be used by any driver that does not require extra init/cleanup steps. + */ +#define module_i3c_i2c_driver(__i3cdrv, __i2cdrv) \ + module_driver(__i3cdrv, \ + i3c_i2c_driver_register, \ + i3c_i2c_driver_unregister) + +int i3c_device_do_priv_xfers(struct i3c_device *dev, + struct i3c_priv_xfer *xfers, + int nxfers); + +void i3c_device_get_info(struct i3c_device *dev, struct i3c_device_info *info); + +struct i3c_ibi_payload { + unsigned int len; + const void *data; +}; + +/** + * struct i3c_ibi_setup - IBI setup object + * @max_payload_len: maximum length of the payload associated to an IBI. If one + * IBI appears to have a payload that is bigger than this + * number, the IBI will be rejected. + * @num_slots: number of pre-allocated IBI slots. This should be chosen so that + * the system never runs out of IBI slots, otherwise you'll lose + * IBIs. + * @handler: IBI handler, every time an IBI is received. This handler is called + * in a workqueue context. It is allowed to sleep and send new + * messages on the bus, though it's recommended to keep the + * processing done there as fast as possible to avoid delaying + * processing of other queued on the same workqueue. + * + * Temporary structure used to pass information to i3c_device_request_ibi(). + * This object can be allocated on the stack since i3c_device_request_ibi() + * copies every bit of information and do not use it after + * i3c_device_request_ibi() has returned. + */ +struct i3c_ibi_setup { + unsigned int max_payload_len; + unsigned int num_slots; + void (*handler)(struct i3c_device *dev, + const struct i3c_ibi_payload *payload); +}; + +int i3c_device_request_ibi(struct i3c_device *dev, + const struct i3c_ibi_setup *setup); +void i3c_device_free_ibi(struct i3c_device *dev); +int i3c_device_enable_ibi(struct i3c_device *dev); +int i3c_device_disable_ibi(struct i3c_device *dev); + +#endif /* I3C_DEV_H */ diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h new file mode 100644 index 000000000000..f13fd8b1dd79 --- /dev/null +++ b/include/linux/i3c/master.h @@ -0,0 +1,648 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Cadence Design Systems Inc. + * + * Author: Boris Brezillon + */ + +#ifndef I3C_MASTER_H +#define I3C_MASTER_H + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define I3C_HOT_JOIN_ADDR 0x2 +#define I3C_BROADCAST_ADDR 0x7e +#define I3C_MAX_ADDR GENMASK(6, 0) + +struct i3c_master_controller; +struct i3c_bus; +struct i2c_device; +struct i3c_device; + +/** + * struct i3c_i2c_dev_desc - Common part of the I3C/I2C device descriptor + * @node: node element used to insert the slot into the I2C or I3C device + * list + * @master: I3C master that instantiated this device. Will be used to do + * I2C/I3C transfers + * @master_priv: master private data assigned to the device. Can be used to + * add master specific information + * + * This structure is describing common I3C/I2C dev information. + */ +struct i3c_i2c_dev_desc { + struct list_head node; + struct i3c_master_controller *master; + void *master_priv; +}; + +#define I3C_LVR_I2C_INDEX_MASK GENMASK(7, 5) +#define I3C_LVR_I2C_INDEX(x) ((x) << 5) +#define I3C_LVR_I2C_FM_MODE BIT(4) + +#define I2C_MAX_ADDR GENMASK(9, 0) + +/** + * struct i2c_dev_boardinfo - I2C device board information + * @node: used to insert the boardinfo object in the I2C boardinfo list + * @base: regular I2C board information + * @lvr: LVR (Legacy Virtual Register) needed by the I3C core to know about + * the I2C device limitations + * + * This structure is used to attach board-level information to an I2C device. + * Each I2C device connected on the I3C bus should have one. + */ +struct i2c_dev_boardinfo { + struct list_head node; + struct i2c_board_info base; + u8 lvr; +}; + +/** + * struct i2c_dev_desc - I2C device descriptor + * @common: common part of the I2C device descriptor + * @boardinfo: pointer to the boardinfo attached to this I2C device + * @dev: I2C device object registered to the I2C framework + * + * Each I2C device connected on the bus will have an i2c_dev_desc. + * This object is created by the core and later attached to the controller + * using &struct_i3c_master_controller->ops->attach_i2c_dev(). + * + * &struct_i2c_dev_desc is the internal representation of an I2C device + * connected on an I3C bus. This object is also passed to all + * &struct_i3c_master_controller_ops hooks. + */ +struct i2c_dev_desc { + struct i3c_i2c_dev_desc common; + const struct i2c_dev_boardinfo *boardinfo; + struct i2c_client *dev; +}; + +/** + * struct i3c_ibi_slot - I3C IBI (In-Band Interrupt) slot + * @work: work associated to this slot. The IBI handler will be called from + * there + * @dev: the I3C device that has generated this IBI + * @len: length of the payload associated to this IBI + * @data: payload buffer + * + * An IBI slot is an object pre-allocated by the controller and used when an + * IBI comes in. + * Every time an IBI comes in, the I3C master driver should find a free IBI + * slot in its IBI slot pool, retrieve the IBI payload and queue the IBI using + * i3c_master_queue_ibi(). + * + * How IBI slots are allocated is left to the I3C master driver, though, for + * simple kmalloc-based allocation, the generic IBI slot pool can be used. + */ +struct i3c_ibi_slot { + struct work_struct work; + struct i3c_dev_desc *dev; + unsigned int len; + void *data; +}; + +/** + * struct i3c_device_ibi_info - IBI information attached to a specific device + * @all_ibis_handled: used to be informed when no more IBIs are waiting to be + * processed. Used by i3c_device_disable_ibi() to wait for + * all IBIs to be dequeued + * @pending_ibis: count the number of pending IBIs. Each pending IBI has its + * work element queued to the controller workqueue + * @max_payload_len: maximum payload length for an IBI coming from this device. + * this value is specified when calling + * i3c_device_request_ibi() and should not change at run + * time. All messages IBIs exceeding this limit should be + * rejected by the master + * @num_slots: number of IBI slots reserved for this device + * @enabled: reflect the IBI status + * @handler: IBI handler specified at i3c_device_request_ibi() call time. This + * handler will be called from the controller workqueue, and as such + * is allowed to sleep (though it is recommended to process the IBI + * as fast as possible to not stall processing of other IBIs queued + * on the same workqueue). + * New I3C messages can be sent from the IBI handler + * + * The &struct_i3c_device_ibi_info object is allocated when + * i3c_device_request_ibi() is called and attached to a specific device. This + * object is here to manage IBIs coming from a specific I3C device. + * + * Note that this structure is the generic view of the IBI management + * infrastructure. I3C master drivers may have their own internal + * representation which they can associate to the device using + * controller-private data. + */ +struct i3c_device_ibi_info { + struct completion all_ibis_handled; + atomic_t pending_ibis; + unsigned int max_payload_len; + unsigned int num_slots; + unsigned int enabled; + void (*handler)(struct i3c_device *dev, + const struct i3c_ibi_payload *payload); +}; + +/** + * struct i3c_dev_boardinfo - I3C device board information + * @node: used to insert the boardinfo object in the I3C boardinfo list + * @init_dyn_addr: initial dynamic address requested by the FW. We provide no + * guarantee that the device will end up using this address, + * but try our best to assign this specific address to the + * device + * @static_addr: static address the I3C device listen on before it's been + * assigned a dynamic address by the master. Will be used during + * bus initialization to assign it a specific dynamic address + * before starting DAA (Dynamic Address Assignment) + * @pid: I3C Provisional ID exposed by the device. This is a unique identifier + * that may be used to attach boardinfo to i3c_dev_desc when the device + * does not have a static address + * @of_node: optional DT node in case the device has been described in the DT + * + * This structure is used to attach board-level information to an I3C device. + * Not all I3C devices connected on the bus will have a boardinfo. It's only + * needed if you want to attach extra resources to a device or assign it a + * specific dynamic address. + */ +struct i3c_dev_boardinfo { + struct list_head node; + u8 init_dyn_addr; + u8 static_addr; + u64 pid; + struct device_node *of_node; +}; + +/** + * struct i3c_dev_desc - I3C device descriptor + * @common: common part of the I3C device descriptor + * @info: I3C device information. Will be automatically filled when you create + * your device with i3c_master_add_i3c_dev_locked() + * @ibi_lock: lock used to protect the &struct_i3c_device->ibi + * @ibi: IBI info attached to a device. Should be NULL until + * i3c_device_request_ibi() is called + * @dev: pointer to the I3C device object exposed to I3C device drivers. This + * should never be accessed from I3C master controller drivers. Only core + * code should manipulate it in when updating the dev <-> desc link or + * when propagating IBI events to the driver + * @boardinfo: pointer to the boardinfo attached to this I3C device + * + * Internal representation of an I3C device. This object is only used by the + * core and passed to I3C master controller drivers when they're requested to + * do some operations on the device. + * The core maintains the link between the internal I3C dev descriptor and the + * object exposed to the I3C device drivers (&struct_i3c_device). + */ +struct i3c_dev_desc { + struct i3c_i2c_dev_desc common; + struct i3c_device_info info; + struct mutex ibi_lock; + struct i3c_device_ibi_info *ibi; + struct i3c_device *dev; + const struct i3c_dev_boardinfo *boardinfo; +}; + +/** + * struct i3c_device - I3C device object + * @dev: device object to register the I3C dev to the device model + * @desc: pointer to an i3c device descriptor object. This link is updated + * every time the I3C device is rediscovered with a different dynamic + * address assigned + * @bus: I3C bus this device is attached to + * + * I3C device object exposed to I3C device drivers. The takes care of linking + * this object to the relevant &struct_i3c_dev_desc one. + * All I3C devs on the I3C bus are represented, including I3C masters. For each + * of them, we have an instance of &struct i3c_device. + */ +struct i3c_device { + struct device dev; + struct i3c_dev_desc *desc; + struct i3c_bus *bus; +}; + +/* + * The I3C specification says the maximum number of devices connected on the + * bus is 11, but this number depends on external parameters like trace length, + * capacitive load per Device, and the types of Devices present on the Bus. + * I3C master can also have limitations, so this number is just here as a + * reference and should be adjusted on a per-controller/per-board basis. + */ +#define I3C_BUS_MAX_DEVS 11 + +#define I3C_BUS_MAX_I3C_SCL_RATE 12900000 +#define I3C_BUS_TYP_I3C_SCL_RATE 12500000 +#define I3C_BUS_I2C_FM_PLUS_SCL_RATE 1000000 +#define I3C_BUS_I2C_FM_SCL_RATE 400000 +#define I3C_BUS_TLOW_OD_MIN_NS 200 + +/** + * enum i3c_bus_mode - I3C bus mode + * @I3C_BUS_MODE_PURE: only I3C devices are connected to the bus. No limitation + * expected + * @I3C_BUS_MODE_MIXED_FAST: I2C devices with 50ns spike filter are present on + * the bus. The only impact in this mode is that the + * high SCL pulse has to stay below 50ns to trick I2C + * devices when transmitting I3C frames + * @I3C_BUS_MODE_MIXED_SLOW: I2C devices without 50ns spike filter are present + * on the bus + */ +enum i3c_bus_mode { + I3C_BUS_MODE_PURE, + I3C_BUS_MODE_MIXED_FAST, + I3C_BUS_MODE_MIXED_SLOW, +}; + +/** + * enum i3c_addr_slot_status - I3C address slot status + * @I3C_ADDR_SLOT_FREE: address is free + * @I3C_ADDR_SLOT_RSVD: address is reserved + * @I3C_ADDR_SLOT_I2C_DEV: address is assigned to an I2C device + * @I3C_ADDR_SLOT_I3C_DEV: address is assigned to an I3C device + * @I3C_ADDR_SLOT_STATUS_MASK: address slot mask + * + * On an I3C bus, addresses are assigned dynamically, and we need to know which + * addresses are free to use and which ones are already assigned. + * + * Addresses marked as reserved are those reserved by the I3C protocol + * (broadcast address, ...). + */ +enum i3c_addr_slot_status { + I3C_ADDR_SLOT_FREE, + I3C_ADDR_SLOT_RSVD, + I3C_ADDR_SLOT_I2C_DEV, + I3C_ADDR_SLOT_I3C_DEV, + I3C_ADDR_SLOT_STATUS_MASK = 3, +}; + +/** + * struct i3c_bus - I3C bus object + * @cur_master: I3C master currently driving the bus. Since I3C is multi-master + * this can change over the time. Will be used to let a master + * know whether it needs to request bus ownership before sending + * a frame or not + * @id: bus ID. Assigned by the framework when register the bus + * @addrslots: a bitmap with 2-bits per-slot to encode the address status and + * ease the DAA (Dynamic Address Assignment) procedure (see + * &enum i3c_addr_slot_status) + * @mode: bus mode (see &enum i3c_bus_mode) + * @scl_rate.i3c: maximum rate for the clock signal when doing I3C SDR/priv + * transfers + * @scl_rate.i2c: maximum rate for the clock signal when doing I2C transfers + * @scl_rate: SCL signal rate for I3C and I2C mode + * @devs.i3c: contains a list of I3C device descriptors representing I3C + * devices connected on the bus and successfully attached to the + * I3C master + * @devs.i2c: contains a list of I2C device descriptors representing I2C + * devices connected on the bus and successfully attached to the + * I3C master + * @devs: 2 lists containing all I3C/I2C devices connected to the bus + * @lock: read/write lock on the bus. This is needed to protect against + * operations that have an impact on the whole bus and the devices + * connected to it. For example, when asking slaves to drop their + * dynamic address (RSTDAA CCC), we need to make sure no one is trying + * to send I3C frames to these devices. + * Note that this lock does not protect against concurrency between + * devices: several drivers can send different I3C/I2C frames through + * the same master in parallel. This is the responsibility of the + * master to guarantee that frames are actually sent sequentially and + * not interlaced + * + * The I3C bus is represented with its own object and not implicitly described + * by the I3C master to cope with the multi-master functionality, where one bus + * can be shared amongst several masters, each of them requesting bus ownership + * when they need to. + */ +struct i3c_bus { + struct i3c_dev_desc *cur_master; + int id; + unsigned long addrslots[((I2C_MAX_ADDR + 1) * 2) / BITS_PER_LONG]; + enum i3c_bus_mode mode; + struct { + unsigned long i3c; + unsigned long i2c; + } scl_rate; + struct { + struct list_head i3c; + struct list_head i2c; + } devs; + struct rw_semaphore lock; +}; + +/** + * struct i3c_master_controller_ops - I3C master methods + * @bus_init: hook responsible for the I3C bus initialization. You should at + * least call master_set_info() from there and set the bus mode. + * You can also put controller specific initialization in there. + * This method is mandatory. + * @bus_cleanup: cleanup everything done in + * &i3c_master_controller_ops->bus_init(). + * This method is optional. + * @attach_i3c_dev: called every time an I3C device is attached to the bus. It + * can be after a DAA or when a device is statically declared + * by the FW, in which case it will only have a static address + * and the dynamic address will be 0. + * When this function is called, device information have not + * been retrieved yet. + * This is a good place to attach master controller specific + * data to I3C devices. + * This method is optional. + * @reattach_i3c_dev: called every time an I3C device has its addressed + * changed. It can be because the device has been powered + * down and has lost its address, or it can happen when a + * device had a static address and has been assigned a + * dynamic address with SETDASA. + * This method is optional. + * @detach_i3c_dev: called when an I3C device is detached from the bus. Usually + * happens when the master device is unregistered. + * This method is optional. + * @do_daa: do a DAA (Dynamic Address Assignment) procedure. This is procedure + * should send an ENTDAA CCC command and then add all devices + * discovered sure the DAA using i3c_master_add_i3c_dev_locked(). + * Add devices added with i3c_master_add_i3c_dev_locked() will then be + * attached or re-attached to the controller. + * This method is mandatory. + * @supports_ccc_cmd: should return true if the CCC command is supported, false + * otherwise. + * This method is optional, if not provided the core assumes + * all CCC commands are supported. + * @send_ccc_cmd: send a CCC command + * This method is mandatory. + * @priv_xfers: do one or several private I3C SDR transfers + * This method is mandatory. + * @attach_i2c_dev: called every time an I2C device is attached to the bus. + * This is a good place to attach master controller specific + * data to I2C devices. + * This method is optional. + * @detach_i2c_dev: called when an I2C device is detached from the bus. Usually + * happens when the master device is unregistered. + * This method is optional. + * @i2c_xfers: do one or several I2C transfers. Note that, unlike i3c + * transfers, the core does not guarantee that buffers attached to + * the transfers are DMA-safe. If drivers want to have DMA-safe + * buffers, they should use the i2c_get_dma_safe_msg_buf() + * and i2c_put_dma_safe_msg_buf() helpers provided by the I2C + * framework. + * This method is mandatory. + * @i2c_funcs: expose the supported I2C functionalities. + * This method is mandatory. + * @request_ibi: attach an IBI handler to an I3C device. This implies defining + * an IBI handler and the constraints of the IBI (maximum payload + * length and number of pre-allocated slots). + * Some controllers support less IBI-capable devices than regular + * devices, so this method might return -%EBUSY if there's no + * more space for an extra IBI registration + * This method is optional. + * @free_ibi: free an IBI previously requested with ->request_ibi(). The IBI + * should have been disabled with ->disable_irq() prior to that + * This method is mandatory only if ->request_ibi is not NULL. + * @enable_ibi: enable the IBI. Only valid if ->request_ibi() has been called + * prior to ->enable_ibi(). The controller should first enable + * the IBI on the controller end (for example, unmask the hardware + * IRQ) and then send the ENEC CCC command (with the IBI flag set) + * to the I3C device. + * This method is mandatory only if ->request_ibi is not NULL. + * @disable_ibi: disable an IBI. First send the DISEC CCC command with the IBI + * flag set and then deactivate the hardware IRQ on the + * controller end. + * This method is mandatory only if ->request_ibi is not NULL. + * @recycle_ibi_slot: recycle an IBI slot. Called every time an IBI has been + * processed by its handler. The IBI slot should be put back + * in the IBI slot pool so that the controller can re-use it + * for a future IBI + * This method is mandatory only if ->request_ibi is not + * NULL. + */ +struct i3c_master_controller_ops { + int (*bus_init)(struct i3c_master_controller *master); + void (*bus_cleanup)(struct i3c_master_controller *master); + int (*attach_i3c_dev)(struct i3c_dev_desc *dev); + int (*reattach_i3c_dev)(struct i3c_dev_desc *dev, u8 old_dyn_addr); + void (*detach_i3c_dev)(struct i3c_dev_desc *dev); + int (*do_daa)(struct i3c_master_controller *master); + bool (*supports_ccc_cmd)(struct i3c_master_controller *master, + const struct i3c_ccc_cmd *cmd); + int (*send_ccc_cmd)(struct i3c_master_controller *master, + struct i3c_ccc_cmd *cmd); + int (*priv_xfers)(struct i3c_dev_desc *dev, + struct i3c_priv_xfer *xfers, + int nxfers); + int (*attach_i2c_dev)(struct i2c_dev_desc *dev); + void (*detach_i2c_dev)(struct i2c_dev_desc *dev); + int (*i2c_xfers)(struct i2c_dev_desc *dev, + const struct i2c_msg *xfers, int nxfers); + u32 (*i2c_funcs)(struct i3c_master_controller *master); + int (*request_ibi)(struct i3c_dev_desc *dev, + const struct i3c_ibi_setup *req); + void (*free_ibi)(struct i3c_dev_desc *dev); + int (*enable_ibi)(struct i3c_dev_desc *dev); + int (*disable_ibi)(struct i3c_dev_desc *dev); + void (*recycle_ibi_slot)(struct i3c_dev_desc *dev, + struct i3c_ibi_slot *slot); +}; + +/** + * struct i3c_master_controller - I3C master controller object + * @dev: device to be registered to the device-model + * @this: an I3C device object representing this master. This device will be + * added to the list of I3C devs available on the bus + * @i2c: I2C adapter used for backward compatibility. This adapter is + * registered to the I2C subsystem to be as transparent as possible to + * existing I2C drivers + * @ops: master operations. See &struct i3c_master_controller_ops + * @secondary: true if the master is a secondary master + * @init_done: true when the bus initialization is done + * @boardinfo.i3c: list of I3C boardinfo objects + * @boardinfo.i2c: list of I2C boardinfo objects + * @boardinfo: board-level information attached to devices connected on the bus + * @bus: I3C bus exposed by this master + * @wq: workqueue used to execute IBI handlers. Can also be used by master + * drivers if they need to postpone operations that need to take place + * in a thread context. Typical examples are Hot Join processing which + * requires taking the bus lock in maintenance, which in turn, can only + * be done from a sleep-able context + * + * A &struct i3c_master_controller has to be registered to the I3C subsystem + * through i3c_master_register(). None of &struct i3c_master_controller fields + * should be set manually, just pass appropriate values to + * i3c_master_register(). + */ +struct i3c_master_controller { + struct device dev; + struct i3c_dev_desc *this; + struct i2c_adapter i2c; + const struct i3c_master_controller_ops *ops; + unsigned int secondary : 1; + unsigned int init_done : 1; + struct { + struct list_head i3c; + struct list_head i2c; + } boardinfo; + struct i3c_bus bus; + struct workqueue_struct *wq; +}; + +/** + * i3c_bus_for_each_i2cdev() - iterate over all I2C devices present on the bus + * @bus: the I3C bus + * @dev: an I2C device descriptor pointer updated to point to the current slot + * at each iteration of the loop + * + * Iterate over all I2C devs present on the bus. + */ +#define i3c_bus_for_each_i2cdev(bus, dev) \ + list_for_each_entry(dev, &(bus)->devs.i2c, common.node) + +/** + * i3c_bus_for_each_i3cdev() - iterate over all I3C devices present on the bus + * @bus: the I3C bus + * @dev: and I3C device descriptor pointer updated to point to the current slot + * at each iteration of the loop + * + * Iterate over all I3C devs present on the bus. + */ +#define i3c_bus_for_each_i3cdev(bus, dev) \ + list_for_each_entry(dev, &(bus)->devs.i3c, common.node) + +int i3c_master_do_i2c_xfers(struct i3c_master_controller *master, + const struct i2c_msg *xfers, + int nxfers); + +int i3c_master_disec_locked(struct i3c_master_controller *master, u8 addr, + u8 evts); +int i3c_master_enec_locked(struct i3c_master_controller *master, u8 addr, + u8 evts); +int i3c_master_entdaa_locked(struct i3c_master_controller *master); +int i3c_master_defslvs_locked(struct i3c_master_controller *master); + +int i3c_master_get_free_addr(struct i3c_master_controller *master, + u8 start_addr); + +int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master, + u8 addr); +int i3c_master_do_daa(struct i3c_master_controller *master); + +int i3c_master_set_info(struct i3c_master_controller *master, + const struct i3c_device_info *info); + +int i3c_master_register(struct i3c_master_controller *master, + struct device *parent, + const struct i3c_master_controller_ops *ops, + bool secondary); +int i3c_master_unregister(struct i3c_master_controller *master); + +/** + * i3c_dev_get_master_data() - get master private data attached to an I3C + * device descriptor + * @dev: the I3C device descriptor to get private data from + * + * Return: the private data previously attached with i3c_dev_set_master_data() + * or NULL if no data has been attached to the device. + */ +static inline void *i3c_dev_get_master_data(const struct i3c_dev_desc *dev) +{ + return dev->common.master_priv; +} + +/** + * i3c_dev_set_master_data() - attach master private data to an I3C device + * descriptor + * @dev: the I3C device descriptor to attach private data to + * @data: private data + * + * This functions allows a master controller to attach per-device private data + * which can then be retrieved with i3c_dev_get_master_data(). + */ +static inline void i3c_dev_set_master_data(struct i3c_dev_desc *dev, + void *data) +{ + dev->common.master_priv = data; +} + +/** + * i2c_dev_get_master_data() - get master private data attached to an I2C + * device descriptor + * @dev: the I2C device descriptor to get private data from + * + * Return: the private data previously attached with i2c_dev_set_master_data() + * or NULL if no data has been attached to the device. + */ +static inline void *i2c_dev_get_master_data(const struct i2c_dev_desc *dev) +{ + return dev->common.master_priv; +} + +/** + * i2c_dev_set_master_data() - attach master private data to an I2C device + * descriptor + * @dev: the I2C device descriptor to attach private data to + * @data: private data + * + * This functions allows a master controller to attach per-device private data + * which can then be retrieved with i2c_device_get_master_data(). + */ +static inline void i2c_dev_set_master_data(struct i2c_dev_desc *dev, + void *data) +{ + dev->common.master_priv = data; +} + +/** + * i3c_dev_get_master() - get master used to communicate with a device + * @dev: I3C dev + * + * Return: the master controller driving @dev + */ +static inline struct i3c_master_controller * +i3c_dev_get_master(struct i3c_dev_desc *dev) +{ + return dev->common.master; +} + +/** + * i2c_dev_get_master() - get master used to communicate with a device + * @dev: I2C dev + * + * Return: the master controller driving @dev + */ +static inline struct i3c_master_controller * +i2c_dev_get_master(struct i2c_dev_desc *dev) +{ + return dev->common.master; +} + +/** + * i3c_master_get_bus() - get the bus attached to a master + * @master: master object + * + * Return: the I3C bus @master is connected to + */ +static inline struct i3c_bus * +i3c_master_get_bus(struct i3c_master_controller *master) +{ + return &master->bus; +} + +struct i3c_generic_ibi_pool; + +struct i3c_generic_ibi_pool * +i3c_generic_ibi_alloc_pool(struct i3c_dev_desc *dev, + const struct i3c_ibi_setup *req); +void i3c_generic_ibi_free_pool(struct i3c_generic_ibi_pool *pool); + +struct i3c_ibi_slot * +i3c_generic_ibi_get_free_slot(struct i3c_generic_ibi_pool *pool); +void i3c_generic_ibi_recycle_slot(struct i3c_generic_ibi_pool *pool, + struct i3c_ibi_slot *slot); + +void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot); + +struct i3c_ibi_slot *i3c_master_get_free_ibi_slot(struct i3c_dev_desc *dev); + +#endif /* I3C_MASTER_H */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 01797cb4587e..cbd94df31743 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -448,6 +448,23 @@ struct pci_epf_device_id { kernel_ulong_t driver_data; }; +/* i3c */ + +#define I3C_MATCH_DCR 0x1 +#define I3C_MATCH_MANUF 0x2 +#define I3C_MATCH_PART 0x4 +#define I3C_MATCH_EXTRA_INFO 0x8 + +struct i3c_device_id { + __u8 match_flags; + __u8 dcr; + __u16 manuf_id; + __u16 part_id; + __u16 extra_info; + + const void *data; +}; + /* spi */ #define SPI_NAME_SIZE 32 -- cgit v1.2.3 From daedaa33d9c578220b311fbad3748d3ecd5a8f66 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Nov 2018 14:40:08 +0800 Subject: iommu/vtd: Cleanup dma_remapping.h header Commit e61d98d8dad00 ("x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization") moved dma_remapping.h from drivers/pci/ to current place. It is entirely VT-d specific, but uses a generic name. This merges dma_remapping.h with include/linux/intel-iommu.h and removes dma_remapping.h as the result. Cc: Ashok Raj Cc: Jacob Pan Cc: Sohil Mehta Suggested-by: Christoph Hellwig Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Liu, Yi L Signed-off-by: Joerg Roedel --- arch/x86/kernel/tboot.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- drivers/misc/mic/scif/scif_rma.c | 2 +- drivers/misc/mic/scif/scif_rma.h | 2 +- include/linux/dma_remapping.h | 58 ------------------------------ include/linux/intel-iommu.h | 49 +++++++++++++++++++++++-- 8 files changed, 53 insertions(+), 66 deletions(-) delete mode 100644 include/linux/dma_remapping.h (limited to 'include/linux') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index a2486f444073..6e5ef8fb8a02 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -19,7 +19,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1aaccbe7e1de..1c5d04f002bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,7 +26,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 23d8008a93bb..389d6618c2d5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include /* Primary plane formats for gen <= 3 */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 61a84b958d67..c3e80a3b09fc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" #define VMWGFX_CHIP_SVGAII 0 diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index c824329f7012..b441f6b0c743 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -15,7 +15,7 @@ * Intel SCIF driver. * */ -#include +#include #include #include #include diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h index fa6722279196..d90a06d4e93b 100644 --- a/drivers/misc/mic/scif/scif_rma.h +++ b/drivers/misc/mic/scif/scif_rma.h @@ -53,7 +53,7 @@ #ifndef SCIF_RMA_H #define SCIF_RMA_H -#include +#include #include #include "../bus/scif_bus.h" diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h deleted file mode 100644 index 21b3e7d33d68..000000000000 --- a/include/linux/dma_remapping.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _DMA_REMAPPING_H -#define _DMA_REMAPPING_H - -/* - * VT-d hardware uses 4KiB page size regardless of host page size. - */ -#define VTD_PAGE_SHIFT (12) -#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) -#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) -#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) - -#define VTD_STRIDE_SHIFT (9) -#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) -#define DMA_PTE_LARGE_PAGE (1 << 7) -#define DMA_PTE_SNP (1 << 11) - -#define CONTEXT_TT_MULTI_LEVEL 0 -#define CONTEXT_TT_DEV_IOTLB 1 -#define CONTEXT_TT_PASS_THROUGH 2 -/* Extended context entry types */ -#define CONTEXT_TT_PT_PASID 4 -#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 -#define CONTEXT_TT_MASK (7ULL << 2) - -#define CONTEXT_DINVE (1ULL << 8) -#define CONTEXT_PRS (1ULL << 9) -#define CONTEXT_PASIDE (1ULL << 11) - -struct intel_iommu; -struct dmar_domain; -struct root_entry; - - -#ifdef CONFIG_INTEL_IOMMU -extern int iommu_calculate_agaw(struct intel_iommu *iommu); -extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); -extern int dmar_disabled; -extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; -#else -static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -{ - return 0; -} -static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) -{ - return 0; -} -#define dmar_disabled (1) -#define intel_iommu_enabled (0) -#endif - - -#endif diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b0ae25837361..a58bc05d6798 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -37,9 +36,36 @@ #include /* - * Intel IOMMU register specification per version 1.0 public spec. + * VT-d hardware uses 4KiB page size regardless of host page size. */ +#define VTD_PAGE_SHIFT (12) +#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) +#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) +#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) + +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) +#define DMA_PTE_SNP (1 << 11) + +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 +#define CONTEXT_TT_PASS_THROUGH 2 +/* Extended context entry types */ +#define CONTEXT_TT_PT_PASID 4 +#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 +#define CONTEXT_TT_MASK (7ULL << 2) + +#define CONTEXT_DINVE (1ULL << 8) +#define CONTEXT_PRS (1ULL << 9) +#define CONTEXT_PASIDE (1ULL << 11) +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ #define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ #define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ #define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ @@ -632,4 +658,23 @@ bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); +#ifdef CONFIG_INTEL_IOMMU +extern int iommu_calculate_agaw(struct intel_iommu *iommu); +extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +extern int dmar_disabled; +extern int intel_iommu_enabled; +extern int intel_iommu_tboot_noforce; +#else +static inline int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return 0; +} +static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return 0; +} +#define dmar_disabled (1) +#define intel_iommu_enabled (0) +#endif + #endif -- cgit v1.2.3 From 05f415715ce45da07a0b1a5eac842765b733157f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 16 Oct 2018 04:12:58 -0700 Subject: rcu: Speed up expedited GPs when interrupting RCU reader In PREEMPT kernels, an expedited grace period might send an IPI to a CPU that is executing an RCU read-side critical section. In that case, it would be nice if the rcu_read_unlock() directly interacted with the RCU core code to immediately report the quiescent state. And this does happen in the case where the reader has been preempted. But it would also be a nice performance optimization if immediate reporting also happened in the preemption-free case. This commit therefore adds an ->exp_hint field to the task_struct structure's ->rcu_read_unlock_special field. The IPI handler sets this hint when it has interrupted an RCU read-side critical section, and this causes the outermost rcu_read_unlock() call to invoke rcu_read_unlock_special(), which, if preemption is enabled, reports the quiescent state immediately. If preemption is disabled, then the report is required to be deferred until preemption (or bottom halves or interrupts or whatever) is re-enabled. Because this is a hint, it does nothing for more complicated cases. For example, if the IPI interrupts an RCU reader, but interrupts are disabled across the rcu_read_unlock(), but another rcu_read_lock() is executed before interrupts are re-enabled, the hint will already have been cleared. If you do crazy things like this, reporting will be deferred until some later RCU_SOFTIRQ handler, context switch, cond_resched(), or similar. Reported-by: Joel Fernandes Signed-off-by: Paul E. McKenney Acked-by: Joel Fernandes (Google) --- include/linux/sched.h | 4 +++- kernel/rcu/tree_exp.h | 4 +++- kernel/rcu/tree_plugin.h | 14 +++++++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a51c13c2b1a0..e4c7b6241088 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -572,8 +572,10 @@ union rcu_special { struct { u8 blocked; u8 need_qs; + u8 exp_hint; /* Hint for performance. */ + u8 pad; /* No garbage from compiler! */ } b; /* Bits. */ - u16 s; /* Set of bits. */ + u32 s; /* Set of bits. */ }; enum perf_event_task_context { diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index e669ccf3751b..928fe5893a57 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -692,8 +692,10 @@ static void sync_rcu_exp_handler(void *unused) */ if (t->rcu_read_lock_nesting > 0) { raw_spin_lock_irqsave_rcu_node(rnp, flags); - if (rnp->expmask & rdp->grpmask) + if (rnp->expmask & rdp->grpmask) { rdp->deferred_qs = true; + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true); + } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 05915e536336..618956cc7a55 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -642,13 +642,21 @@ static void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); irqs_were_disabled = irqs_disabled_flags(flags); - if ((preempt_bh_were_disabled || irqs_were_disabled) && - t->rcu_read_unlock_special.b.blocked) { + if (preempt_bh_were_disabled || irqs_were_disabled) { + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); /* Need to defer quiescent state until everything is enabled. */ - raise_softirq_irqoff(RCU_SOFTIRQ); + if (irqs_were_disabled) { + /* Enabling irqs does not reschedule, so... */ + raise_softirq_irqoff(RCU_SOFTIRQ); + } else { + /* Enabling BH or preempt does reschedule, so... */ + set_tsk_need_resched(current); + set_preempt_need_resched(); + } local_irq_restore(flags); return; } + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); rcu_preempt_deferred_qs_irqrestore(t, flags); } -- cgit v1.2.3 From 27e95603f4dfec470c6d26bea5174aa71b30e971 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:10 +0200 Subject: net/mlx5: Add interface to hold and release core resources Sometimes upper layers may want to prevent the destruction of a core resource for a period of time while work on that resource is in progress. Add API to support this. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 16 ++++++++++++++++ include/linux/mlx5/qp.h | 5 +++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 690dc1dd9391..cba4a435043a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -670,3 +670,19 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, + int res_num, + enum mlx5_res_type res_type) +{ + u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN); + + return mlx5_get_rsc(dev, rsn); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_hold); + +void mlx5_core_res_put(struct mlx5_core_rsc_common *res) +{ + mlx5_core_put_rsc(res); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_put); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fbe322c966bc..b26ea9077384 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -596,6 +596,11 @@ int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, int reset, void *out, int out_size); +struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, + int res_num, + enum mlx5_res_type res_type); +void mlx5_core_res_put(struct mlx5_core_rsc_common *res); + static inline const char *mlx5_qp_type_str(int type) { switch (type) { -- cgit v1.2.3 From c99fefea2cc907c98e7f39b3571bb697c8d42106 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 8 Nov 2018 21:10:11 +0200 Subject: net/mlx5: Enumerate page fault types Give meaningful names to type of WQE page faults. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b4c0457fbebd..e326524bafcc 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -212,6 +212,13 @@ enum { MLX5_PFAULT_SUBTYPE_RDMA = 1, }; +enum wqe_page_fault_type { + MLX5_WQE_PF_TYPE_RMP = 0, + MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE = 1, + MLX5_WQE_PF_TYPE_RESP = 2, + MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC = 3, +}; + enum { MLX5_PERM_LOCAL_READ = 1 << 2, MLX5_PERM_LOCAL_WRITE = 1 << 3, -- cgit v1.2.3 From 03f39f47dc86fc4defbf9b97f8417f192d1ccba6 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sat, 10 Nov 2018 21:25:44 +0100 Subject: rtc: class: remove devm_rtc_device_unregister devm_rtc_device_unregister is not used by any driver and should not be used by any new driver. Signed-off-by: Alexandre Belloni --- drivers/rtc/class.c | 26 -------------------------- include/linux/rtc.h | 2 -- 2 files changed, 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 3b43787f154b..6d364085bd86 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -380,13 +380,6 @@ static void devm_rtc_device_release(struct device *dev, void *res) rtc_device_unregister(rtc); } -static int devm_rtc_device_match(struct device *dev, void *res, void *data) -{ - struct rtc **r = res; - - return *r == data; -} - /** * devm_rtc_device_register - resource managed rtc_device_register() * @dev: the device to register @@ -424,25 +417,6 @@ struct rtc_device *devm_rtc_device_register(struct device *dev, } EXPORT_SYMBOL_GPL(devm_rtc_device_register); -/** - * devm_rtc_device_unregister - resource managed devm_rtc_device_unregister() - * @dev: the device to unregister - * @rtc: the RTC class device to unregister - * - * Deallocated a rtc allocated with devm_rtc_device_register(). Normally this - * function will not need to be called and the resource management code will - * ensure that the resource is freed. - */ -void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc) -{ - int rc; - - rc = devres_release(dev, devm_rtc_device_release, - devm_rtc_device_match, rtc); - WARN_ON(rc); -} -EXPORT_SYMBOL_GPL(devm_rtc_device_unregister); - static void devm_rtc_release_device(struct device *dev, void *res) { struct rtc_device *rtc = *(struct rtc_device **)res; diff --git a/include/linux/rtc.h b/include/linux/rtc.h index c8bb4a2b48c3..311375dbb673 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -173,8 +173,6 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev, struct module *owner); struct rtc_device *devm_rtc_allocate_device(struct device *dev); int __rtc_register_device(struct module *owner, struct rtc_device *rtc); -extern void devm_rtc_device_unregister(struct device *dev, - struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); -- cgit v1.2.3 From cfd74017191036871af68368559330507209777c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 8 Nov 2018 06:39:20 +0000 Subject: mtd: rawnand: sh_flctl: convert to SPDX identifiers This patch updates license to use SPDX-License-Identifier instead of verbose license text. As original license mentioned, it is GPL-2.0 in SPDX. Then, MODULE_LICENSE() should be "GPL v2" instead of "GPL". See ${LINUX}/include/linux/module.h "GPL" [GNU Public License v2 or later] "GPL v2" [GNU Public License v2] Signed-off-by: Kuninori Morimoto Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/sh_flctl.c | 17 ++--------------- include/linux/mtd/sh_flctl.h | 16 ++-------------- 2 files changed, 4 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c index 4d20d033de7b..30edcc77b111 100644 --- a/drivers/mtd/nand/raw/sh_flctl.c +++ b/drivers/mtd/nand/raw/sh_flctl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * SuperH FLCTL nand controller * @@ -5,20 +6,6 @@ * Copyright (c) 2008 Atom Create Engineering Co., Ltd. * * Based on fsl_elbc_nand.c, Copyright (c) 2006-2007 Freescale Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #include @@ -1236,7 +1223,7 @@ static struct platform_driver flctl_driver = { module_platform_driver_probe(flctl_driver, flctl_probe); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Yoshihiro Shimoda"); MODULE_DESCRIPTION("SuperH FLCTL driver"); MODULE_ALIAS("platform:sh_flctl"); diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index c759d403cbc0..78fc2d4218c8 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -1,20 +1,8 @@ -/* +/* SPDX-License-Identifier: GPL-2.0 + * * SuperH FLCTL nand controller * * Copyright © 2008 Renesas Solutions Corp. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __SH_FLCTL_H__ -- cgit v1.2.3 From 95adc6b410b7aa895dcf5ed9cb7dc4a20a3d5c5a Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Fri, 19 Oct 2018 21:23:07 +0300 Subject: tpm: use u32 instead of int for PCR index The TPM specs defines PCR index as a positive number, and there is no reason to use a signed number. It is also a possible security issue as currently no functions check for a negative index, which may become a large number when converted to u32. Adjust the API to use u32 instead of int in all PCR related functions. Signed-off-by: Tomas Winkler Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm-interface.c | 6 +++--- drivers/char/tpm/tpm-sysfs.c | 2 +- drivers/char/tpm/tpm.h | 10 +++++----- drivers/char/tpm/tpm1-cmd.c | 6 +++--- drivers/char/tpm/tpm2-cmd.c | 5 ++--- include/linux/tpm.h | 11 +++++++---- security/integrity/ima/ima_crypto.c | 5 +++-- 7 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 014354e3dd1e..1ba033b13ab2 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -38,7 +38,7 @@ * recently changed pcr on suspend, so force the flush * with an extend to the selected _unused_ non-volatile pcr. */ -static int tpm_suspend_pcr; +static u32 tpm_suspend_pcr; module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644); MODULE_PARM_DESC(suspend_pcr, "PCR to use for dummy writes to facilitate flush on suspend."); @@ -454,7 +454,7 @@ EXPORT_SYMBOL_GPL(tpm_is_tpm2); * * Return: same as with tpm_transmit_cmd() */ -int tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) +int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf) { int rc; @@ -484,7 +484,7 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read); * * Return: same as with tpm_transmit_cmd() */ -int tpm_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash) +int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash) { int rc; struct tpm2_digest digest_list[ARRAY_SIZE(chip->active_banks)]; diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c index 96fc7433c57d..b88e08ec2c59 100644 --- a/drivers/char/tpm/tpm-sysfs.c +++ b/drivers/char/tpm/tpm-sysfs.c @@ -102,7 +102,7 @@ static ssize_t pcrs_show(struct device *dev, struct device_attribute *attr, cap_t cap; u8 digest[TPM_DIGEST_SIZE]; ssize_t rc; - int i, j, num_pcrs; + u32 i, j, num_pcrs; char *str = buf; struct tpm_chip *chip = to_tpm_chip(dev); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index e0778d19da98..f27d1f38a93d 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -509,14 +509,14 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space, int tpm_get_timeouts(struct tpm_chip *); int tpm_auto_startup(struct tpm_chip *chip); -int tpm1_pm_suspend(struct tpm_chip *chip, int tpm_suspend_pcr); +int tpm1_pm_suspend(struct tpm_chip *chip, u32 tpm_suspend_pcr); int tpm1_auto_startup(struct tpm_chip *chip); int tpm1_do_selftest(struct tpm_chip *chip); int tpm1_get_timeouts(struct tpm_chip *chip); unsigned long tpm1_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal); -int tpm1_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash, +int tpm1_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash, const char *log_msg); -int tpm1_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf); +int tpm1_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf); ssize_t tpm1_getcap(struct tpm_chip *chip, u32 subcap_id, cap_t *cap, const char *desc, size_t min_cap_length); int tpm1_get_random(struct tpm_chip *chip, u8 *out, size_t max); @@ -558,8 +558,8 @@ static inline u32 tpm2_rc_value(u32 rc) } int tpm2_get_timeouts(struct tpm_chip *chip); -int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf); -int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, u32 count, +int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf); +int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count, struct tpm2_digest *digests); int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max); void tpm2_flush_context_cmd(struct tpm_chip *chip, u32 handle, diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c index 6b04648f8184..6f306338953b 100644 --- a/drivers/char/tpm/tpm1-cmd.c +++ b/drivers/char/tpm/tpm1-cmd.c @@ -449,7 +449,7 @@ int tpm1_get_timeouts(struct tpm_chip *chip) } #define TPM_ORD_PCR_EXTEND 20 -int tpm1_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash, +int tpm1_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash, const char *log_msg) { struct tpm_buf buf; @@ -572,7 +572,7 @@ out: } #define TPM_ORD_PCRREAD 21 -int tpm1_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) +int tpm1_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf) { struct tpm_buf buf; int rc; @@ -729,7 +729,7 @@ out: * * 0 on success, * * < 0 on error. */ -int tpm1_pm_suspend(struct tpm_chip *chip, int tpm_suspend_pcr) +int tpm1_pm_suspend(struct tpm_chip *chip, u32 tpm_suspend_pcr) { u8 dummy_hash[TPM_DIGEST_SIZE] = { 0 }; struct tpm_buf buf; diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 6ca4fc0a0d6f..ae86fb0218ab 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -175,7 +175,7 @@ struct tpm2_pcr_read_out { * * Return: Same as with tpm_transmit_cmd. */ -int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) +int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf) { int rc; struct tpm_buf buf; @@ -225,7 +225,7 @@ struct tpm2_null_auth_area { * * Return: Same as with tpm_transmit_cmd. */ -int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, u32 count, +int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count, struct tpm2_digest *digests) { struct tpm_buf buf; @@ -272,7 +272,6 @@ int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, u32 count, return rc; } - struct tpm2_get_random_out { __be16 size; u8 buffer[TPM_MAX_RNG_DATA]; diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 4609b94142d4..b49a55cf775f 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -53,8 +53,8 @@ struct tpm_class_ops { #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); -extern int tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf); -extern int tpm_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash); +extern int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf); +extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash); extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen); extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max); extern int tpm_seal_trusted(struct tpm_chip *chip, @@ -69,15 +69,18 @@ static inline int tpm_is_tpm2(struct tpm_chip *chip) { return -ENODEV; } -static inline int tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) + +static inline int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf) { return -ENODEV; } -static inline int tpm_pcr_extend(struct tpm_chip *chip, int pcr_idx, + +static inline int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash) { return -ENODEV; } + static inline int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen) { return -ENODEV; diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index d9e7728027c6..acf2c7df7145 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -643,7 +643,7 @@ int ima_calc_buffer_hash(const void *buf, loff_t len, return calc_buffer_shash(buf, len, hash); } -static void __init ima_pcrread(int idx, u8 *pcr) +static void __init ima_pcrread(u32 idx, u8 *pcr) { if (!ima_tpm_chip) return; @@ -659,7 +659,8 @@ static int __init ima_calc_boot_aggregate_tfm(char *digest, struct crypto_shash *tfm) { u8 pcr_i[TPM_DIGEST_SIZE]; - int rc, i; + int rc; + u32 i; SHASH_DESC_ON_STACK(shash, tfm); shash->tfm = tfm; -- cgit v1.2.3 From 0914ade209c452cff6a29b1c0ae6fff3167fa1d0 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Tue, 9 Oct 2018 23:00:33 +0530 Subject: x86/ima: define arch_ima_get_secureboot Distros are concerned about totally disabling the kexec_load syscall. As a compromise, the kexec_load syscall will only be disabled when CONFIG_KEXEC_VERIFY_SIG is configured and the system is booted with secureboot enabled. This patch defines the new arch specific function called arch_ima_get_secureboot() to retrieve the secureboot state of the system. Signed-off-by: Nayna Jain Suggested-by: Seth Forshee Cc: David Howells Cc: Eric Biederman Cc: Peter Jones Cc: Vivek Goyal Cc: Dave Young Signed-off-by: Mimi Zohar --- arch/x86/kernel/Makefile | 2 ++ arch/x86/kernel/ima_arch.c | 17 +++++++++++++++++ include/linux/ima.h | 9 +++++++++ 3 files changed, 28 insertions(+) create mode 100644 arch/x86/kernel/ima_arch.c (limited to 'include/linux') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8824d01c0c35..f0910a1e1db7 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -150,3 +150,5 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o endif + +obj-$(CONFIG_IMA) += ima_arch.o diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c new file mode 100644 index 000000000000..bb5a88d2b271 --- /dev/null +++ b/arch/x86/kernel/ima_arch.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2018 IBM Corporation + */ +#include +#include + +extern struct boot_params boot_params; + +bool arch_ima_get_secureboot(void) +{ + if (efi_enabled(EFI_BOOT) && + (boot_params.secure_boot == efi_secureboot_mode_enabled)) + return true; + else + return false; +} diff --git a/include/linux/ima.h b/include/linux/ima.h index 97914a2833d1..948135fb60f1 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -30,6 +30,15 @@ extern void ima_post_path_mknod(struct dentry *dentry); extern void ima_add_kexec_buffer(struct kimage *image); #endif +#ifdef CONFIG_X86 +extern bool arch_ima_get_secureboot(void); +#else +static inline bool arch_ima_get_secureboot(void) +{ + return false; +} +#endif + #else static inline int ima_bprm_check(struct linux_binprm *bprm) { -- cgit v1.2.3 From 9b076f1c0f4869b838a1b7aa0edb5664d47ec8aa Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 8 Nov 2018 14:07:14 +1100 Subject: fanotify: introduce new event mask FAN_OPEN_EXEC A new event mask FAN_OPEN_EXEC has been defined so that users have the ability to receive events specifically when a file has been opened with the intent to be executed. Events of FAN_OPEN_EXEC type will be generated when a file has been opened using either execve(), execveat() or uselib() system calls. The feature is implemented within fsnotify_open() by generating the FAN_OPEN_EXEC event type if __FMODE_EXEC is set within file->f_flags. Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 3 ++- fs/notify/fsnotify.c | 2 +- include/linux/fanotify.h | 2 +- include/linux/fsnotify.h | 2 ++ include/linux/fsnotify_backend.h | 7 +++++-- include/uapi/linux/fanotify.h | 1 + 6 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index f4f8359bc597..5a1a15f646ba 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -210,8 +210,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); + BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 11); mask = fanotify_group_event_mask(iter_info, mask, data, data_type); if (!mask) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index d2c34900ae05..b3f58f36a0ab 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -401,7 +401,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index a5a60691e48b..c521e4264f2b 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -37,7 +37,7 @@ /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FAN_ACCESS | FAN_MODIFY | \ - FAN_CLOSE | FAN_OPEN) + FAN_CLOSE | FAN_OPEN | FAN_OPEN_EXEC) /* Events that require a permission response from user */ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index fd1ce10553bf..1fe5ac93b252 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -215,6 +215,8 @@ static inline void fsnotify_open(struct file *file) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; + if (file->f_flags & __FMODE_EXEC) + mask |= FS_OPEN_EXEC; fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 135b973e44d1..39d94e62a836 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -38,6 +38,7 @@ #define FS_DELETE 0x00000200 /* Subfile was deleted */ #define FS_DELETE_SELF 0x00000400 /* Self was deleted */ #define FS_MOVE_SELF 0x00000800 /* Self was moved */ +#define FS_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FS_UNMOUNT 0x00002000 /* inode on umount fs */ #define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ @@ -62,7 +63,8 @@ #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) + FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM | \ + FS_OPEN_EXEC) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) @@ -74,7 +76,8 @@ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME) + FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME | \ + FS_OPEN_EXEC) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index b86740d1c50a..d9664fbc905b 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -10,6 +10,7 @@ #define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ #define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ -- cgit v1.2.3 From a704bba5e3ec3eedddad3c2baa9b7cfa0e2b3388 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 8 Nov 2018 14:10:03 +1100 Subject: fsnotify: refactor fsnotify_parent()/fsnotify() paired calls when event is on path A wrapper function fsnotify_path() has been defined to simplify the paired calls to fsnotify_parent()/fsnotify(). All hooks that made use these paired calls and passed FSNOTIFY_EVENT_PATH have been updated accordingly. Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1fe5ac93b252..c29f2f072c2c 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -26,13 +26,26 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry return __fsnotify_parent(path, dentry, mask); } +/* + * Simple wrapper to consolidate calls fsnotify_parent()/fsnotify() when + * an event is on a path. + */ +static inline int fsnotify_path(struct inode *inode, const struct path *path, + __u32 mask) +{ + int ret = fsnotify_parent(path, NULL, mask); + + if (ret) + return ret; + return fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); +} + /* simple call site for access decisions */ static inline int fsnotify_perm(struct file *file, int mask) { const struct path *path = &file->f_path; struct inode *inode = file_inode(file); __u32 fsnotify_mask = 0; - int ret; if (file->f_mode & FMODE_NONOTIFY) return 0; @@ -45,11 +58,7 @@ static inline int fsnotify_perm(struct file *file, int mask) else BUG(); - ret = fsnotify_parent(path, NULL, fsnotify_mask); - if (ret) - return ret; - - return fsnotify(inode, fsnotify_mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); + return fsnotify_path(inode, path, fsnotify_mask); } /* @@ -180,10 +189,8 @@ static inline void fsnotify_access(struct file *file) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - if (!(file->f_mode & FMODE_NONOTIFY)) { - fsnotify_parent(path, NULL, mask); - fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); - } + if (!(file->f_mode & FMODE_NONOTIFY)) + fsnotify_path(inode, path, mask); } /* @@ -198,10 +205,8 @@ static inline void fsnotify_modify(struct file *file) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - if (!(file->f_mode & FMODE_NONOTIFY)) { - fsnotify_parent(path, NULL, mask); - fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); - } + if (!(file->f_mode & FMODE_NONOTIFY)) + fsnotify_path(inode, path, mask); } /* @@ -218,8 +223,7 @@ static inline void fsnotify_open(struct file *file) if (file->f_flags & __FMODE_EXEC) mask |= FS_OPEN_EXEC; - fsnotify_parent(path, NULL, mask); - fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); + fsnotify_path(inode, path, mask); } /* @@ -235,10 +239,8 @@ static inline void fsnotify_close(struct file *file) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - if (!(file->f_mode & FMODE_NONOTIFY)) { - fsnotify_parent(path, NULL, mask); - fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); - } + if (!(file->f_mode & FMODE_NONOTIFY)) + fsnotify_path(inode, path, mask); } /* -- cgit v1.2.3 From 66917a3130f218dcef9eeab4fd11a71cd00cd7c9 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 8 Nov 2018 14:12:44 +1100 Subject: fanotify: introduce new event mask FAN_OPEN_EXEC_PERM A new event mask FAN_OPEN_EXEC_PERM has been defined. This allows users to receive events and grant access to files that are intending to be opened for execution. Events of FAN_OPEN_EXEC_PERM type will be generated when a file has been opened by using either execve(), execveat() or uselib() system calls. This acts in the same manner as previous permission event mask, meaning that an access response is required from the user application in order to permit any further operations on the file. Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 3 ++- fs/notify/fsnotify.c | 2 +- include/linux/fanotify.h | 3 ++- include/linux/fsnotify.h | 17 ++++++++++++----- include/linux/fsnotify_backend.h | 8 +++++--- include/uapi/linux/fanotify.h | 1 + 6 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 5a1a15f646ba..3723f3d18d20 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -211,8 +211,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); + BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 11); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 12); mask = fanotify_group_event_mask(iter_info, mask, data, data_type); if (!mask) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index b3f58f36a0ab..ecf09b6243d9 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -401,7 +401,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index c521e4264f2b..9e2142795335 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -40,7 +40,8 @@ FAN_CLOSE | FAN_OPEN | FAN_OPEN_EXEC) /* Events that require a permission response from user */ -#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM) +#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ + FAN_OPEN_EXEC_PERM) /* Extra flags that may be reported with event or control handling of events */ #define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index c29f2f072c2c..2ccb08cb5d6a 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -40,9 +40,10 @@ static inline int fsnotify_path(struct inode *inode, const struct path *path, return fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } -/* simple call site for access decisions */ +/* Simple call site for access decisions */ static inline int fsnotify_perm(struct file *file, int mask) { + int ret; const struct path *path = &file->f_path; struct inode *inode = file_inode(file); __u32 fsnotify_mask = 0; @@ -51,12 +52,18 @@ static inline int fsnotify_perm(struct file *file, int mask) return 0; if (!(mask & (MAY_READ | MAY_OPEN))) return 0; - if (mask & MAY_OPEN) + if (mask & MAY_OPEN) { fsnotify_mask = FS_OPEN_PERM; - else if (mask & MAY_READ) + + if (file->f_flags & __FMODE_EXEC) { + ret = fsnotify_path(inode, path, FS_OPEN_EXEC_PERM); + + if (ret) + return ret; + } + } else if (mask & MAY_READ) { fsnotify_mask = FS_ACCESS_PERM; - else - BUG(); + } return fsnotify_path(inode, path, fsnotify_mask); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 39d94e62a836..7639774e7475 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -46,6 +46,7 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ +#define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ @@ -64,11 +65,12 @@ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC) + FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) -#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM) +#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ + FS_OPEN_EXEC_PERM) /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ @@ -77,7 +79,7 @@ FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME | \ - FS_OPEN_EXEC) + FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index d9664fbc905b..909c98fcace2 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -16,6 +16,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ #define FAN_ONDIR 0x40000000 /* event occurred against dir */ -- cgit v1.2.3 From ec93cb6f827b3e1a81b0721b8c893d2a5e37e7d6 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 13 Nov 2018 11:22:25 +0100 Subject: spi: pxa2xx: Add slave mode support Tested on an OLPC XO-1.75 machine, where the Embedded Controller happens to be a SPI master. Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 81 ++++++++++++++++++++++++++++++++++++++---- include/linux/spi/pxa2xx_spi.h | 1 + 2 files changed, 75 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index d46af116d630..a057c3be7e3b 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -626,6 +626,11 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) return IRQ_HANDLED; } + if (irq_status & SSSR_TUR) { + int_error_stop(drv_data, "interrupt_transfer: fifo underrun"); + return IRQ_HANDLED; + } + if (irq_status & SSSR_TINT) { pxa2xx_spi_write(drv_data, SSSR, SSSR_TINT); if (drv_data->read(drv_data)) { @@ -1073,6 +1078,11 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master, pxa2xx_spi_write(drv_data, SSTO, chip->timeout); } + if (spi_controller_is_slave(master)) { + while (drv_data->write(drv_data)) + ; + } + /* * Release the data by enabling service requests and interrupts, * without changing any mode bits @@ -1082,6 +1092,27 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master, return 1; } +static int pxa2xx_spi_slave_abort(struct spi_master *master) +{ + struct driver_data *drv_data = spi_controller_get_devdata(master); + + /* Stop and reset SSP */ + write_SSSR_CS(drv_data, drv_data->clear_sr); + reset_sccr1(drv_data); + if (!pxa25x_ssp_comp(drv_data)) + pxa2xx_spi_write(drv_data, SSTO, 0); + pxa2xx_spi_flush(drv_data); + pxa2xx_spi_write(drv_data, SSCR0, + pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE); + + dev_dbg(&drv_data->pdev->dev, "transfer aborted\n"); + + drv_data->master->cur_msg->status = -EINTR; + spi_finalize_current_transfer(drv_data->master); + + return 0; +} + static void pxa2xx_spi_handle_err(struct spi_controller *master, struct spi_message *msg) { @@ -1209,9 +1240,14 @@ static int setup(struct spi_device *spi) rx_thres = config->rx_threshold; break; default: - tx_thres = TX_THRESH_DFLT; tx_hi_thres = 0; - rx_thres = RX_THRESH_DFLT; + if (spi_controller_is_slave(drv_data->master)) { + tx_thres = 1; + rx_thres = 2; + } else { + tx_thres = TX_THRESH_DFLT; + rx_thres = RX_THRESH_DFLT; + } break; } @@ -1255,6 +1291,12 @@ static int setup(struct spi_device *spi) if (chip_info->enable_loopback) chip->cr1 = SSCR1_LBM; } + if (spi_controller_is_slave(drv_data->master)) { + chip->cr1 |= SSCR1_SCFR; + chip->cr1 |= SSCR1_SCLKDIR; + chip->cr1 |= SSCR1_SFRMDIR; + chip->cr1 |= SSCR1_SPH; + } chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres); chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres) @@ -1494,6 +1536,13 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev) } #endif +#if CONFIG_OF + if (of_id) { + pdata->is_slave = of_property_read_bool(pdev->dev.of_node, + "spi-slave"); + } +#endif + ssp->clk = devm_clk_get(&pdev->dev, NULL); ssp->irq = platform_get_irq(pdev, 0); ssp->type = type; @@ -1559,7 +1608,11 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) return -ENODEV; } - master = spi_alloc_master(dev, sizeof(struct driver_data)); + if (platform_info->is_slave) + master = spi_alloc_slave(dev, sizeof(struct driver_data)); + else + master = spi_alloc_master(dev, sizeof(struct driver_data)); + if (!master) { dev_err(&pdev->dev, "cannot alloc spi_master\n"); pxa_ssp_free(ssp); @@ -1581,6 +1634,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) master->setup = setup; master->set_cs = pxa2xx_spi_set_cs; master->transfer_one = pxa2xx_spi_transfer_one; + master->slave_abort = pxa2xx_spi_slave_abort; master->handle_err = pxa2xx_spi_handle_err; master->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer; master->fw_translate_cs = pxa2xx_spi_fw_translate_cs; @@ -1610,7 +1664,8 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE | SSCR1_TINTE; drv_data->dma_cr1 = DEFAULT_DMA_CR1; drv_data->clear_sr = SSSR_ROR | SSSR_TINT; - drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS | SSSR_ROR; + drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS + | SSSR_ROR | SSSR_TUR; } status = request_irq(ssp->irq, ssp_int, IRQF_SHARED, dev_name(dev), @@ -1658,10 +1713,22 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) pxa2xx_spi_write(drv_data, SSCR0, tmp); break; default: - tmp = SSCR1_RxTresh(RX_THRESH_DFLT) | - SSCR1_TxTresh(TX_THRESH_DFLT); + + if (spi_controller_is_slave(master)) { + tmp = SSCR1_SCFR | + SSCR1_SCLKDIR | + SSCR1_SFRMDIR | + SSCR1_RxTresh(2) | + SSCR1_TxTresh(1) | + SSCR1_SPH; + } else { + tmp = SSCR1_RxTresh(RX_THRESH_DFLT) | + SSCR1_TxTresh(TX_THRESH_DFLT); + } pxa2xx_spi_write(drv_data, SSCR1, tmp); - tmp = SSCR0_SCR(2) | SSCR0_Motorola | SSCR0_DataSize(8); + tmp = SSCR0_Motorola | SSCR0_DataSize(8); + if (!spi_controller_is_slave(master)) + tmp |= SSCR0_SCR(2); pxa2xx_spi_write(drv_data, SSCR0, tmp); break; } diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 9ec4c147abbc..b0674e330ef6 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -25,6 +25,7 @@ struct dma_chan; struct pxa2xx_spi_master { u16 num_chipselect; u8 enable_dma; + bool is_slave; /* DMA engine specific config */ bool (*dma_filter)(struct dma_chan *chan, void *param); -- cgit v1.2.3 From 861e6ed667c83d64a42b0db41a22d6b4de4e913f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 6 Nov 2018 12:35:21 +0100 Subject: EDAC: Drop per-memory controller buses ... and use the single edac_subsys object returned from subsys_system_register(). The idea is to have a single bus and multiple devices on it. Signed-off-by: Borislav Petkov Acked-by: Mauro Carvalho Chehab CC: Aristeu Rozanski Filho CC: Greg KH CC: Justin Ernst CC: linux-edac CC: Mauro Carvalho Chehab CC: Russ Anderson Cc: Tony Luck Link: https://lkml.kernel.org/r/20180926152752.GG5584@zn.tnic --- drivers/edac/edac_mc.c | 9 +-------- drivers/edac/edac_mc_sysfs.c | 30 ++---------------------------- include/linux/edac.h | 6 ------ 3 files changed, 3 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 7d3edd713932..13594ffadcb3 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -55,8 +55,6 @@ static LIST_HEAD(mc_devices); */ static const char *edac_mc_owner; -static struct bus_type mc_bus[EDAC_MAX_MCS]; - int edac_get_report_status(void) { return edac_report; @@ -716,11 +714,6 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, int ret = -EINVAL; edac_dbg(0, "\n"); - if (mci->mc_idx >= EDAC_MAX_MCS) { - pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); - return -ENODEV; - } - #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -760,7 +753,7 @@ int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, /* set load time so that error rate can be tracked */ mci->start_time = jiffies; - mci->bus = &mc_bus[mci->mc_idx]; + mci->bus = edac_get_sysfs_subsys(); if (edac_create_sysfs_mci_device(mci, groups)) { edac_mc_printk(mci, KERN_WARNING, diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 4c1bee59c2e6..464174685589 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -912,27 +912,8 @@ static const struct device_type mci_attr_type = { int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups) { - char *name; int i, err; - /* - * The memory controller needs its own bus, in order to avoid - * namespace conflicts at /sys/bus/edac. - */ - name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!name) - return -ENOMEM; - - mci->bus->name = name; - - edac_dbg(0, "creating bus %s\n", mci->bus->name); - - err = bus_register(mci->bus); - if (err < 0) { - kfree(name); - return err; - } - /* get the /sys/devices/system/edac subsys reference */ mci->dev.type = &mci_attr_type; device_initialize(&mci->dev); @@ -947,7 +928,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); - goto fail_unregister_bus; + goto out; } /* @@ -995,10 +976,8 @@ fail_unregister_dimm: device_unregister(&dimm->dev); } device_unregister(&mci->dev); -fail_unregister_bus: - bus_unregister(mci->bus); - kfree(name); +out: return err; } @@ -1029,13 +1008,8 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) void edac_unregister_sysfs(struct mem_ctl_info *mci) { - struct bus_type *bus = mci->bus; - const char *name = mci->bus->name; - edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(bus); - kfree(name); } static void mc_attr_release(struct device *dev) diff --git a/include/linux/edac.h b/include/linux/edac.h index 1d0c9ea8825d..342dabda9c7e 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -669,10 +669,4 @@ struct mem_ctl_info { bool fake_inject_ue; u16 fake_inject_count; }; - -/* - * Maximum number of memory controllers in the coherent fabric. - */ -#define EDAC_MAX_MCS 2 * MAX_NUMNODES - #endif -- cgit v1.2.3 From 3501ce96bf5d9dd8563dd94595436d3757ec817e Mon Sep 17 00:00:00 2001 From: "A.s. Dong" Date: Thu, 1 Nov 2018 15:19:58 +0000 Subject: firmware: imx: remove resource id enums We already export resource id in dt-bindings headfile which can also be used by drivers. So no need keep the same definitions in regular headfile anymore. Cc: Shawn Guo Cc: Sascha Hauer Cc: Fabio Estevam Reviewed-by: Ulf Hansson Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- include/linux/firmware/imx/types.h | 552 ------------------------------------- 1 file changed, 552 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/imx/types.h b/include/linux/firmware/imx/types.h index 9cbf0c4a6069..80821100e85f 100644 --- a/include/linux/firmware/imx/types.h +++ b/include/linux/firmware/imx/types.h @@ -9,558 +9,6 @@ #ifndef _SC_TYPES_H #define _SC_TYPES_H -/* - * This type is used to indicate a resource. Resources include peripherals - * and bus masters (but not memory regions). Note items from list should - * never be changed or removed (only added to at the end of the list). - */ -enum imx_sc_rsrc { - IMX_SC_R_A53 = 0, - IMX_SC_R_A53_0 = 1, - IMX_SC_R_A53_1 = 2, - IMX_SC_R_A53_2 = 3, - IMX_SC_R_A53_3 = 4, - IMX_SC_R_A72 = 5, - IMX_SC_R_A72_0 = 6, - IMX_SC_R_A72_1 = 7, - IMX_SC_R_A72_2 = 8, - IMX_SC_R_A72_3 = 9, - IMX_SC_R_CCI = 10, - IMX_SC_R_DB = 11, - IMX_SC_R_DRC_0 = 12, - IMX_SC_R_DRC_1 = 13, - IMX_SC_R_GIC_SMMU = 14, - IMX_SC_R_IRQSTR_M4_0 = 15, - IMX_SC_R_IRQSTR_M4_1 = 16, - IMX_SC_R_SMMU = 17, - IMX_SC_R_GIC = 18, - IMX_SC_R_DC_0_BLIT0 = 19, - IMX_SC_R_DC_0_BLIT1 = 20, - IMX_SC_R_DC_0_BLIT2 = 21, - IMX_SC_R_DC_0_BLIT_OUT = 22, - IMX_SC_R_DC_0_CAPTURE0 = 23, - IMX_SC_R_DC_0_CAPTURE1 = 24, - IMX_SC_R_DC_0_WARP = 25, - IMX_SC_R_DC_0_INTEGRAL0 = 26, - IMX_SC_R_DC_0_INTEGRAL1 = 27, - IMX_SC_R_DC_0_VIDEO0 = 28, - IMX_SC_R_DC_0_VIDEO1 = 29, - IMX_SC_R_DC_0_FRAC0 = 30, - IMX_SC_R_DC_0_FRAC1 = 31, - IMX_SC_R_DC_0 = 32, - IMX_SC_R_GPU_2_PID0 = 33, - IMX_SC_R_DC_0_PLL_0 = 34, - IMX_SC_R_DC_0_PLL_1 = 35, - IMX_SC_R_DC_1_BLIT0 = 36, - IMX_SC_R_DC_1_BLIT1 = 37, - IMX_SC_R_DC_1_BLIT2 = 38, - IMX_SC_R_DC_1_BLIT_OUT = 39, - IMX_SC_R_DC_1_CAPTURE0 = 40, - IMX_SC_R_DC_1_CAPTURE1 = 41, - IMX_SC_R_DC_1_WARP = 42, - IMX_SC_R_DC_1_INTEGRAL0 = 43, - IMX_SC_R_DC_1_INTEGRAL1 = 44, - IMX_SC_R_DC_1_VIDEO0 = 45, - IMX_SC_R_DC_1_VIDEO1 = 46, - IMX_SC_R_DC_1_FRAC0 = 47, - IMX_SC_R_DC_1_FRAC1 = 48, - IMX_SC_R_DC_1 = 49, - IMX_SC_R_GPU_3_PID0 = 50, - IMX_SC_R_DC_1_PLL_0 = 51, - IMX_SC_R_DC_1_PLL_1 = 52, - IMX_SC_R_SPI_0 = 53, - IMX_SC_R_SPI_1 = 54, - IMX_SC_R_SPI_2 = 55, - IMX_SC_R_SPI_3 = 56, - IMX_SC_R_UART_0 = 57, - IMX_SC_R_UART_1 = 58, - IMX_SC_R_UART_2 = 59, - IMX_SC_R_UART_3 = 60, - IMX_SC_R_UART_4 = 61, - IMX_SC_R_EMVSIM_0 = 62, - IMX_SC_R_EMVSIM_1 = 63, - IMX_SC_R_DMA_0_CH0 = 64, - IMX_SC_R_DMA_0_CH1 = 65, - IMX_SC_R_DMA_0_CH2 = 66, - IMX_SC_R_DMA_0_CH3 = 67, - IMX_SC_R_DMA_0_CH4 = 68, - IMX_SC_R_DMA_0_CH5 = 69, - IMX_SC_R_DMA_0_CH6 = 70, - IMX_SC_R_DMA_0_CH7 = 71, - IMX_SC_R_DMA_0_CH8 = 72, - IMX_SC_R_DMA_0_CH9 = 73, - IMX_SC_R_DMA_0_CH10 = 74, - IMX_SC_R_DMA_0_CH11 = 75, - IMX_SC_R_DMA_0_CH12 = 76, - IMX_SC_R_DMA_0_CH13 = 77, - IMX_SC_R_DMA_0_CH14 = 78, - IMX_SC_R_DMA_0_CH15 = 79, - IMX_SC_R_DMA_0_CH16 = 80, - IMX_SC_R_DMA_0_CH17 = 81, - IMX_SC_R_DMA_0_CH18 = 82, - IMX_SC_R_DMA_0_CH19 = 83, - IMX_SC_R_DMA_0_CH20 = 84, - IMX_SC_R_DMA_0_CH21 = 85, - IMX_SC_R_DMA_0_CH22 = 86, - IMX_SC_R_DMA_0_CH23 = 87, - IMX_SC_R_DMA_0_CH24 = 88, - IMX_SC_R_DMA_0_CH25 = 89, - IMX_SC_R_DMA_0_CH26 = 90, - IMX_SC_R_DMA_0_CH27 = 91, - IMX_SC_R_DMA_0_CH28 = 92, - IMX_SC_R_DMA_0_CH29 = 93, - IMX_SC_R_DMA_0_CH30 = 94, - IMX_SC_R_DMA_0_CH31 = 95, - IMX_SC_R_I2C_0 = 96, - IMX_SC_R_I2C_1 = 97, - IMX_SC_R_I2C_2 = 98, - IMX_SC_R_I2C_3 = 99, - IMX_SC_R_I2C_4 = 100, - IMX_SC_R_ADC_0 = 101, - IMX_SC_R_ADC_1 = 102, - IMX_SC_R_FTM_0 = 103, - IMX_SC_R_FTM_1 = 104, - IMX_SC_R_CAN_0 = 105, - IMX_SC_R_CAN_1 = 106, - IMX_SC_R_CAN_2 = 107, - IMX_SC_R_DMA_1_CH0 = 108, - IMX_SC_R_DMA_1_CH1 = 109, - IMX_SC_R_DMA_1_CH2 = 110, - IMX_SC_R_DMA_1_CH3 = 111, - IMX_SC_R_DMA_1_CH4 = 112, - IMX_SC_R_DMA_1_CH5 = 113, - IMX_SC_R_DMA_1_CH6 = 114, - IMX_SC_R_DMA_1_CH7 = 115, - IMX_SC_R_DMA_1_CH8 = 116, - IMX_SC_R_DMA_1_CH9 = 117, - IMX_SC_R_DMA_1_CH10 = 118, - IMX_SC_R_DMA_1_CH11 = 119, - IMX_SC_R_DMA_1_CH12 = 120, - IMX_SC_R_DMA_1_CH13 = 121, - IMX_SC_R_DMA_1_CH14 = 122, - IMX_SC_R_DMA_1_CH15 = 123, - IMX_SC_R_DMA_1_CH16 = 124, - IMX_SC_R_DMA_1_CH17 = 125, - IMX_SC_R_DMA_1_CH18 = 126, - IMX_SC_R_DMA_1_CH19 = 127, - IMX_SC_R_DMA_1_CH20 = 128, - IMX_SC_R_DMA_1_CH21 = 129, - IMX_SC_R_DMA_1_CH22 = 130, - IMX_SC_R_DMA_1_CH23 = 131, - IMX_SC_R_DMA_1_CH24 = 132, - IMX_SC_R_DMA_1_CH25 = 133, - IMX_SC_R_DMA_1_CH26 = 134, - IMX_SC_R_DMA_1_CH27 = 135, - IMX_SC_R_DMA_1_CH28 = 136, - IMX_SC_R_DMA_1_CH29 = 137, - IMX_SC_R_DMA_1_CH30 = 138, - IMX_SC_R_DMA_1_CH31 = 139, - IMX_SC_R_UNUSED1 = 140, - IMX_SC_R_UNUSED2 = 141, - IMX_SC_R_UNUSED3 = 142, - IMX_SC_R_UNUSED4 = 143, - IMX_SC_R_GPU_0_PID0 = 144, - IMX_SC_R_GPU_0_PID1 = 145, - IMX_SC_R_GPU_0_PID2 = 146, - IMX_SC_R_GPU_0_PID3 = 147, - IMX_SC_R_GPU_1_PID0 = 148, - IMX_SC_R_GPU_1_PID1 = 149, - IMX_SC_R_GPU_1_PID2 = 150, - IMX_SC_R_GPU_1_PID3 = 151, - IMX_SC_R_PCIE_A = 152, - IMX_SC_R_SERDES_0 = 153, - IMX_SC_R_MATCH_0 = 154, - IMX_SC_R_MATCH_1 = 155, - IMX_SC_R_MATCH_2 = 156, - IMX_SC_R_MATCH_3 = 157, - IMX_SC_R_MATCH_4 = 158, - IMX_SC_R_MATCH_5 = 159, - IMX_SC_R_MATCH_6 = 160, - IMX_SC_R_MATCH_7 = 161, - IMX_SC_R_MATCH_8 = 162, - IMX_SC_R_MATCH_9 = 163, - IMX_SC_R_MATCH_10 = 164, - IMX_SC_R_MATCH_11 = 165, - IMX_SC_R_MATCH_12 = 166, - IMX_SC_R_MATCH_13 = 167, - IMX_SC_R_MATCH_14 = 168, - IMX_SC_R_PCIE_B = 169, - IMX_SC_R_SATA_0 = 170, - IMX_SC_R_SERDES_1 = 171, - IMX_SC_R_HSIO_GPIO = 172, - IMX_SC_R_MATCH_15 = 173, - IMX_SC_R_MATCH_16 = 174, - IMX_SC_R_MATCH_17 = 175, - IMX_SC_R_MATCH_18 = 176, - IMX_SC_R_MATCH_19 = 177, - IMX_SC_R_MATCH_20 = 178, - IMX_SC_R_MATCH_21 = 179, - IMX_SC_R_MATCH_22 = 180, - IMX_SC_R_MATCH_23 = 181, - IMX_SC_R_MATCH_24 = 182, - IMX_SC_R_MATCH_25 = 183, - IMX_SC_R_MATCH_26 = 184, - IMX_SC_R_MATCH_27 = 185, - IMX_SC_R_MATCH_28 = 186, - IMX_SC_R_LCD_0 = 187, - IMX_SC_R_LCD_0_PWM_0 = 188, - IMX_SC_R_LCD_0_I2C_0 = 189, - IMX_SC_R_LCD_0_I2C_1 = 190, - IMX_SC_R_PWM_0 = 191, - IMX_SC_R_PWM_1 = 192, - IMX_SC_R_PWM_2 = 193, - IMX_SC_R_PWM_3 = 194, - IMX_SC_R_PWM_4 = 195, - IMX_SC_R_PWM_5 = 196, - IMX_SC_R_PWM_6 = 197, - IMX_SC_R_PWM_7 = 198, - IMX_SC_R_GPIO_0 = 199, - IMX_SC_R_GPIO_1 = 200, - IMX_SC_R_GPIO_2 = 201, - IMX_SC_R_GPIO_3 = 202, - IMX_SC_R_GPIO_4 = 203, - IMX_SC_R_GPIO_5 = 204, - IMX_SC_R_GPIO_6 = 205, - IMX_SC_R_GPIO_7 = 206, - IMX_SC_R_GPT_0 = 207, - IMX_SC_R_GPT_1 = 208, - IMX_SC_R_GPT_2 = 209, - IMX_SC_R_GPT_3 = 210, - IMX_SC_R_GPT_4 = 211, - IMX_SC_R_KPP = 212, - IMX_SC_R_MU_0A = 213, - IMX_SC_R_MU_1A = 214, - IMX_SC_R_MU_2A = 215, - IMX_SC_R_MU_3A = 216, - IMX_SC_R_MU_4A = 217, - IMX_SC_R_MU_5A = 218, - IMX_SC_R_MU_6A = 219, - IMX_SC_R_MU_7A = 220, - IMX_SC_R_MU_8A = 221, - IMX_SC_R_MU_9A = 222, - IMX_SC_R_MU_10A = 223, - IMX_SC_R_MU_11A = 224, - IMX_SC_R_MU_12A = 225, - IMX_SC_R_MU_13A = 226, - IMX_SC_R_MU_5B = 227, - IMX_SC_R_MU_6B = 228, - IMX_SC_R_MU_7B = 229, - IMX_SC_R_MU_8B = 230, - IMX_SC_R_MU_9B = 231, - IMX_SC_R_MU_10B = 232, - IMX_SC_R_MU_11B = 233, - IMX_SC_R_MU_12B = 234, - IMX_SC_R_MU_13B = 235, - IMX_SC_R_ROM_0 = 236, - IMX_SC_R_FSPI_0 = 237, - IMX_SC_R_FSPI_1 = 238, - IMX_SC_R_IEE = 239, - IMX_SC_R_IEE_R0 = 240, - IMX_SC_R_IEE_R1 = 241, - IMX_SC_R_IEE_R2 = 242, - IMX_SC_R_IEE_R3 = 243, - IMX_SC_R_IEE_R4 = 244, - IMX_SC_R_IEE_R5 = 245, - IMX_SC_R_IEE_R6 = 246, - IMX_SC_R_IEE_R7 = 247, - IMX_SC_R_SDHC_0 = 248, - IMX_SC_R_SDHC_1 = 249, - IMX_SC_R_SDHC_2 = 250, - IMX_SC_R_ENET_0 = 251, - IMX_SC_R_ENET_1 = 252, - IMX_SC_R_MLB_0 = 253, - IMX_SC_R_DMA_2_CH0 = 254, - IMX_SC_R_DMA_2_CH1 = 255, - IMX_SC_R_DMA_2_CH2 = 256, - IMX_SC_R_DMA_2_CH3 = 257, - IMX_SC_R_DMA_2_CH4 = 258, - IMX_SC_R_USB_0 = 259, - IMX_SC_R_USB_1 = 260, - IMX_SC_R_USB_0_PHY = 261, - IMX_SC_R_USB_2 = 262, - IMX_SC_R_USB_2_PHY = 263, - IMX_SC_R_DTCP = 264, - IMX_SC_R_NAND = 265, - IMX_SC_R_LVDS_0 = 266, - IMX_SC_R_LVDS_0_PWM_0 = 267, - IMX_SC_R_LVDS_0_I2C_0 = 268, - IMX_SC_R_LVDS_0_I2C_1 = 269, - IMX_SC_R_LVDS_1 = 270, - IMX_SC_R_LVDS_1_PWM_0 = 271, - IMX_SC_R_LVDS_1_I2C_0 = 272, - IMX_SC_R_LVDS_1_I2C_1 = 273, - IMX_SC_R_LVDS_2 = 274, - IMX_SC_R_LVDS_2_PWM_0 = 275, - IMX_SC_R_LVDS_2_I2C_0 = 276, - IMX_SC_R_LVDS_2_I2C_1 = 277, - IMX_SC_R_M4_0_PID0 = 278, - IMX_SC_R_M4_0_PID1 = 279, - IMX_SC_R_M4_0_PID2 = 280, - IMX_SC_R_M4_0_PID3 = 281, - IMX_SC_R_M4_0_PID4 = 282, - IMX_SC_R_M4_0_RGPIO = 283, - IMX_SC_R_M4_0_SEMA42 = 284, - IMX_SC_R_M4_0_TPM = 285, - IMX_SC_R_M4_0_PIT = 286, - IMX_SC_R_M4_0_UART = 287, - IMX_SC_R_M4_0_I2C = 288, - IMX_SC_R_M4_0_INTMUX = 289, - IMX_SC_R_M4_0_SIM = 290, - IMX_SC_R_M4_0_WDOG = 291, - IMX_SC_R_M4_0_MU_0B = 292, - IMX_SC_R_M4_0_MU_0A0 = 293, - IMX_SC_R_M4_0_MU_0A1 = 294, - IMX_SC_R_M4_0_MU_0A2 = 295, - IMX_SC_R_M4_0_MU_0A3 = 296, - IMX_SC_R_M4_0_MU_1A = 297, - IMX_SC_R_M4_1_PID0 = 298, - IMX_SC_R_M4_1_PID1 = 299, - IMX_SC_R_M4_1_PID2 = 300, - IMX_SC_R_M4_1_PID3 = 301, - IMX_SC_R_M4_1_PID4 = 302, - IMX_SC_R_M4_1_RGPIO = 303, - IMX_SC_R_M4_1_SEMA42 = 304, - IMX_SC_R_M4_1_TPM = 305, - IMX_SC_R_M4_1_PIT = 306, - IMX_SC_R_M4_1_UART = 307, - IMX_SC_R_M4_1_I2C = 308, - IMX_SC_R_M4_1_INTMUX = 309, - IMX_SC_R_M4_1_SIM = 310, - IMX_SC_R_M4_1_WDOG = 311, - IMX_SC_R_M4_1_MU_0B = 312, - IMX_SC_R_M4_1_MU_0A0 = 313, - IMX_SC_R_M4_1_MU_0A1 = 314, - IMX_SC_R_M4_1_MU_0A2 = 315, - IMX_SC_R_M4_1_MU_0A3 = 316, - IMX_SC_R_M4_1_MU_1A = 317, - IMX_SC_R_SAI_0 = 318, - IMX_SC_R_SAI_1 = 319, - IMX_SC_R_SAI_2 = 320, - IMX_SC_R_IRQSTR_SCU2 = 321, - IMX_SC_R_IRQSTR_DSP = 322, - IMX_SC_R_UNUSED5 = 323, - IMX_SC_R_UNUSED6 = 324, - IMX_SC_R_AUDIO_PLL_0 = 325, - IMX_SC_R_PI_0 = 326, - IMX_SC_R_PI_0_PWM_0 = 327, - IMX_SC_R_PI_0_PWM_1 = 328, - IMX_SC_R_PI_0_I2C_0 = 329, - IMX_SC_R_PI_0_PLL = 330, - IMX_SC_R_PI_1 = 331, - IMX_SC_R_PI_1_PWM_0 = 332, - IMX_SC_R_PI_1_PWM_1 = 333, - IMX_SC_R_PI_1_I2C_0 = 334, - IMX_SC_R_PI_1_PLL = 335, - IMX_SC_R_SC_PID0 = 336, - IMX_SC_R_SC_PID1 = 337, - IMX_SC_R_SC_PID2 = 338, - IMX_SC_R_SC_PID3 = 339, - IMX_SC_R_SC_PID4 = 340, - IMX_SC_R_SC_SEMA42 = 341, - IMX_SC_R_SC_TPM = 342, - IMX_SC_R_SC_PIT = 343, - IMX_SC_R_SC_UART = 344, - IMX_SC_R_SC_I2C = 345, - IMX_SC_R_SC_MU_0B = 346, - IMX_SC_R_SC_MU_0A0 = 347, - IMX_SC_R_SC_MU_0A1 = 348, - IMX_SC_R_SC_MU_0A2 = 349, - IMX_SC_R_SC_MU_0A3 = 350, - IMX_SC_R_SC_MU_1A = 351, - IMX_SC_R_SYSCNT_RD = 352, - IMX_SC_R_SYSCNT_CMP = 353, - IMX_SC_R_DEBUG = 354, - IMX_SC_R_SYSTEM = 355, - IMX_SC_R_SNVS = 356, - IMX_SC_R_OTP = 357, - IMX_SC_R_VPU_PID0 = 358, - IMX_SC_R_VPU_PID1 = 359, - IMX_SC_R_VPU_PID2 = 360, - IMX_SC_R_VPU_PID3 = 361, - IMX_SC_R_VPU_PID4 = 362, - IMX_SC_R_VPU_PID5 = 363, - IMX_SC_R_VPU_PID6 = 364, - IMX_SC_R_VPU_PID7 = 365, - IMX_SC_R_VPU_UART = 366, - IMX_SC_R_VPUCORE = 367, - IMX_SC_R_VPUCORE_0 = 368, - IMX_SC_R_VPUCORE_1 = 369, - IMX_SC_R_VPUCORE_2 = 370, - IMX_SC_R_VPUCORE_3 = 371, - IMX_SC_R_DMA_4_CH0 = 372, - IMX_SC_R_DMA_4_CH1 = 373, - IMX_SC_R_DMA_4_CH2 = 374, - IMX_SC_R_DMA_4_CH3 = 375, - IMX_SC_R_DMA_4_CH4 = 376, - IMX_SC_R_ISI_CH0 = 377, - IMX_SC_R_ISI_CH1 = 378, - IMX_SC_R_ISI_CH2 = 379, - IMX_SC_R_ISI_CH3 = 380, - IMX_SC_R_ISI_CH4 = 381, - IMX_SC_R_ISI_CH5 = 382, - IMX_SC_R_ISI_CH6 = 383, - IMX_SC_R_ISI_CH7 = 384, - IMX_SC_R_MJPEG_DEC_S0 = 385, - IMX_SC_R_MJPEG_DEC_S1 = 386, - IMX_SC_R_MJPEG_DEC_S2 = 387, - IMX_SC_R_MJPEG_DEC_S3 = 388, - IMX_SC_R_MJPEG_ENC_S0 = 389, - IMX_SC_R_MJPEG_ENC_S1 = 390, - IMX_SC_R_MJPEG_ENC_S2 = 391, - IMX_SC_R_MJPEG_ENC_S3 = 392, - IMX_SC_R_MIPI_0 = 393, - IMX_SC_R_MIPI_0_PWM_0 = 394, - IMX_SC_R_MIPI_0_I2C_0 = 395, - IMX_SC_R_MIPI_0_I2C_1 = 396, - IMX_SC_R_MIPI_1 = 397, - IMX_SC_R_MIPI_1_PWM_0 = 398, - IMX_SC_R_MIPI_1_I2C_0 = 399, - IMX_SC_R_MIPI_1_I2C_1 = 400, - IMX_SC_R_CSI_0 = 401, - IMX_SC_R_CSI_0_PWM_0 = 402, - IMX_SC_R_CSI_0_I2C_0 = 403, - IMX_SC_R_CSI_1 = 404, - IMX_SC_R_CSI_1_PWM_0 = 405, - IMX_SC_R_CSI_1_I2C_0 = 406, - IMX_SC_R_HDMI = 407, - IMX_SC_R_HDMI_I2S = 408, - IMX_SC_R_HDMI_I2C_0 = 409, - IMX_SC_R_HDMI_PLL_0 = 410, - IMX_SC_R_HDMI_RX = 411, - IMX_SC_R_HDMI_RX_BYPASS = 412, - IMX_SC_R_HDMI_RX_I2C_0 = 413, - IMX_SC_R_ASRC_0 = 414, - IMX_SC_R_ESAI_0 = 415, - IMX_SC_R_SPDIF_0 = 416, - IMX_SC_R_SPDIF_1 = 417, - IMX_SC_R_SAI_3 = 418, - IMX_SC_R_SAI_4 = 419, - IMX_SC_R_SAI_5 = 420, - IMX_SC_R_GPT_5 = 421, - IMX_SC_R_GPT_6 = 422, - IMX_SC_R_GPT_7 = 423, - IMX_SC_R_GPT_8 = 424, - IMX_SC_R_GPT_9 = 425, - IMX_SC_R_GPT_10 = 426, - IMX_SC_R_DMA_2_CH5 = 427, - IMX_SC_R_DMA_2_CH6 = 428, - IMX_SC_R_DMA_2_CH7 = 429, - IMX_SC_R_DMA_2_CH8 = 430, - IMX_SC_R_DMA_2_CH9 = 431, - IMX_SC_R_DMA_2_CH10 = 432, - IMX_SC_R_DMA_2_CH11 = 433, - IMX_SC_R_DMA_2_CH12 = 434, - IMX_SC_R_DMA_2_CH13 = 435, - IMX_SC_R_DMA_2_CH14 = 436, - IMX_SC_R_DMA_2_CH15 = 437, - IMX_SC_R_DMA_2_CH16 = 438, - IMX_SC_R_DMA_2_CH17 = 439, - IMX_SC_R_DMA_2_CH18 = 440, - IMX_SC_R_DMA_2_CH19 = 441, - IMX_SC_R_DMA_2_CH20 = 442, - IMX_SC_R_DMA_2_CH21 = 443, - IMX_SC_R_DMA_2_CH22 = 444, - IMX_SC_R_DMA_2_CH23 = 445, - IMX_SC_R_DMA_2_CH24 = 446, - IMX_SC_R_DMA_2_CH25 = 447, - IMX_SC_R_DMA_2_CH26 = 448, - IMX_SC_R_DMA_2_CH27 = 449, - IMX_SC_R_DMA_2_CH28 = 450, - IMX_SC_R_DMA_2_CH29 = 451, - IMX_SC_R_DMA_2_CH30 = 452, - IMX_SC_R_DMA_2_CH31 = 453, - IMX_SC_R_ASRC_1 = 454, - IMX_SC_R_ESAI_1 = 455, - IMX_SC_R_SAI_6 = 456, - IMX_SC_R_SAI_7 = 457, - IMX_SC_R_AMIX = 458, - IMX_SC_R_MQS_0 = 459, - IMX_SC_R_DMA_3_CH0 = 460, - IMX_SC_R_DMA_3_CH1 = 461, - IMX_SC_R_DMA_3_CH2 = 462, - IMX_SC_R_DMA_3_CH3 = 463, - IMX_SC_R_DMA_3_CH4 = 464, - IMX_SC_R_DMA_3_CH5 = 465, - IMX_SC_R_DMA_3_CH6 = 466, - IMX_SC_R_DMA_3_CH7 = 467, - IMX_SC_R_DMA_3_CH8 = 468, - IMX_SC_R_DMA_3_CH9 = 469, - IMX_SC_R_DMA_3_CH10 = 470, - IMX_SC_R_DMA_3_CH11 = 471, - IMX_SC_R_DMA_3_CH12 = 472, - IMX_SC_R_DMA_3_CH13 = 473, - IMX_SC_R_DMA_3_CH14 = 474, - IMX_SC_R_DMA_3_CH15 = 475, - IMX_SC_R_DMA_3_CH16 = 476, - IMX_SC_R_DMA_3_CH17 = 477, - IMX_SC_R_DMA_3_CH18 = 478, - IMX_SC_R_DMA_3_CH19 = 479, - IMX_SC_R_DMA_3_CH20 = 480, - IMX_SC_R_DMA_3_CH21 = 481, - IMX_SC_R_DMA_3_CH22 = 482, - IMX_SC_R_DMA_3_CH23 = 483, - IMX_SC_R_DMA_3_CH24 = 484, - IMX_SC_R_DMA_3_CH25 = 485, - IMX_SC_R_DMA_3_CH26 = 486, - IMX_SC_R_DMA_3_CH27 = 487, - IMX_SC_R_DMA_3_CH28 = 488, - IMX_SC_R_DMA_3_CH29 = 489, - IMX_SC_R_DMA_3_CH30 = 490, - IMX_SC_R_DMA_3_CH31 = 491, - IMX_SC_R_AUDIO_PLL_1 = 492, - IMX_SC_R_AUDIO_CLK_0 = 493, - IMX_SC_R_AUDIO_CLK_1 = 494, - IMX_SC_R_MCLK_OUT_0 = 495, - IMX_SC_R_MCLK_OUT_1 = 496, - IMX_SC_R_PMIC_0 = 497, - IMX_SC_R_PMIC_1 = 498, - IMX_SC_R_SECO = 499, - IMX_SC_R_CAAM_JR1 = 500, - IMX_SC_R_CAAM_JR2 = 501, - IMX_SC_R_CAAM_JR3 = 502, - IMX_SC_R_SECO_MU_2 = 503, - IMX_SC_R_SECO_MU_3 = 504, - IMX_SC_R_SECO_MU_4 = 505, - IMX_SC_R_HDMI_RX_PWM_0 = 506, - IMX_SC_R_A35 = 507, - IMX_SC_R_A35_0 = 508, - IMX_SC_R_A35_1 = 509, - IMX_SC_R_A35_2 = 510, - IMX_SC_R_A35_3 = 511, - IMX_SC_R_DSP = 512, - IMX_SC_R_DSP_RAM = 513, - IMX_SC_R_CAAM_JR1_OUT = 514, - IMX_SC_R_CAAM_JR2_OUT = 515, - IMX_SC_R_CAAM_JR3_OUT = 516, - IMX_SC_R_VPU_DEC_0 = 517, - IMX_SC_R_VPU_ENC_0 = 518, - IMX_SC_R_CAAM_JR0 = 519, - IMX_SC_R_CAAM_JR0_OUT = 520, - IMX_SC_R_PMIC_2 = 521, - IMX_SC_R_DBLOGIC = 522, - IMX_SC_R_HDMI_PLL_1 = 523, - IMX_SC_R_BOARD_R0 = 524, - IMX_SC_R_BOARD_R1 = 525, - IMX_SC_R_BOARD_R2 = 526, - IMX_SC_R_BOARD_R3 = 527, - IMX_SC_R_BOARD_R4 = 528, - IMX_SC_R_BOARD_R5 = 529, - IMX_SC_R_BOARD_R6 = 530, - IMX_SC_R_BOARD_R7 = 531, - IMX_SC_R_MJPEG_DEC_MP = 532, - IMX_SC_R_MJPEG_ENC_MP = 533, - IMX_SC_R_VPU_TS_0 = 534, - IMX_SC_R_VPU_MU_0 = 535, - IMX_SC_R_VPU_MU_1 = 536, - IMX_SC_R_VPU_MU_2 = 537, - IMX_SC_R_VPU_MU_3 = 538, - IMX_SC_R_VPU_ENC_1 = 539, - IMX_SC_R_VPU = 540, - IMX_SC_R_LAST -}; - -/* NOTE - please add by replacing some of the UNUSED from above! */ - /* * This type is used to indicate a control. */ -- cgit v1.2.3 From 0a914a4948d4604c08750ae67dc33f8b5702402f Mon Sep 17 00:00:00 2001 From: "A.s. Dong" Date: Thu, 1 Nov 2018 15:20:08 +0000 Subject: firmware: imx: add pm svc headfile Add SCU PM SVC related protocol definitions which will be used by a number of PM functions like Power Domain, Clock, Reset and etc. The detailed implementation of each function will put in the individual function drivers. Cc: Shawn Guo Cc: Sascha Hauer Reviewed-by: Ulf Hansson Signed-off-by: Dong Aisheng Signed-off-by: Shawn Guo --- include/linux/firmware/imx/sci.h | 1 + include/linux/firmware/imx/svc/pm.h | 85 +++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 include/linux/firmware/imx/svc/pm.h (limited to 'include/linux') diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h index 29ada609de03..ebc55098faee 100644 --- a/include/linux/firmware/imx/sci.h +++ b/include/linux/firmware/imx/sci.h @@ -14,4 +14,5 @@ #include #include +#include #endif /* _SC_SCI_H */ diff --git a/include/linux/firmware/imx/svc/pm.h b/include/linux/firmware/imx/svc/pm.h new file mode 100644 index 000000000000..1f6975dd37b0 --- /dev/null +++ b/include/linux/firmware/imx/svc/pm.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2016 Freescale Semiconductor, Inc. + * Copyright 2017-2018 NXP + * + * Header file containing the public API for the System Controller (SC) + * Power Management (PM) function. This includes functions for power state + * control, clock control, reset control, and wake-up event control. + * + * PM_SVC (SVC) Power Management Service + * + * Module for the Power Management (PM) service. + */ + +#ifndef _SC_PM_API_H +#define _SC_PM_API_H + +#include + +/* + * This type is used to indicate RPC PM function calls. + */ +enum imx_sc_pm_func { + IMX_SC_PM_FUNC_UNKNOWN = 0, + IMX_SC_PM_FUNC_SET_SYS_POWER_MODE = 19, + IMX_SC_PM_FUNC_SET_PARTITION_POWER_MODE = 1, + IMX_SC_PM_FUNC_GET_SYS_POWER_MODE = 2, + IMX_SC_PM_FUNC_SET_RESOURCE_POWER_MODE = 3, + IMX_SC_PM_FUNC_GET_RESOURCE_POWER_MODE = 4, + IMX_SC_PM_FUNC_REQ_LOW_POWER_MODE = 16, + IMX_SC_PM_FUNC_SET_CPU_RESUME_ADDR = 17, + IMX_SC_PM_FUNC_REQ_SYS_IF_POWER_MODE = 18, + IMX_SC_PM_FUNC_SET_CLOCK_RATE = 5, + IMX_SC_PM_FUNC_GET_CLOCK_RATE = 6, + IMX_SC_PM_FUNC_CLOCK_ENABLE = 7, + IMX_SC_PM_FUNC_SET_CLOCK_PARENT = 14, + IMX_SC_PM_FUNC_GET_CLOCK_PARENT = 15, + IMX_SC_PM_FUNC_RESET = 13, + IMX_SC_PM_FUNC_RESET_REASON = 10, + IMX_SC_PM_FUNC_BOOT = 8, + IMX_SC_PM_FUNC_REBOOT = 9, + IMX_SC_PM_FUNC_REBOOT_PARTITION = 12, + IMX_SC_PM_FUNC_CPU_START = 11, +}; + +/* + * Defines for ALL parameters + */ +#define IMX_SC_PM_CLK_ALL UINT8_MAX /* All clocks */ + +/* + * Defines for SC PM Power Mode + */ +#define IMX_SC_PM_PW_MODE_OFF 0 /* Power off */ +#define IMX_SC_PM_PW_MODE_STBY 1 /* Power in standby */ +#define IMX_SC_PM_PW_MODE_LP 2 /* Power in low-power */ +#define IMX_SC_PM_PW_MODE_ON 3 /* Power on */ + +/* + * Defines for SC PM CLK + */ +#define IMX_SC_PM_CLK_SLV_BUS 0 /* Slave bus clock */ +#define IMX_SC_PM_CLK_MST_BUS 1 /* Master bus clock */ +#define IMX_SC_PM_CLK_PER 2 /* Peripheral clock */ +#define IMX_SC_PM_CLK_PHY 3 /* Phy clock */ +#define IMX_SC_PM_CLK_MISC 4 /* Misc clock */ +#define IMX_SC_PM_CLK_MISC0 0 /* Misc 0 clock */ +#define IMX_SC_PM_CLK_MISC1 1 /* Misc 1 clock */ +#define IMX_SC_PM_CLK_MISC2 2 /* Misc 2 clock */ +#define IMX_SC_PM_CLK_MISC3 3 /* Misc 3 clock */ +#define IMX_SC_PM_CLK_MISC4 4 /* Misc 4 clock */ +#define IMX_SC_PM_CLK_CPU 2 /* CPU clock */ +#define IMX_SC_PM_CLK_PLL 4 /* PLL */ +#define IMX_SC_PM_CLK_BYPASS 4 /* Bypass clock */ + +/* + * Defines for SC PM CLK Parent + */ +#define IMX_SC_PM_PARENT_XTAL 0 /* Parent is XTAL. */ +#define IMX_SC_PM_PARENT_PLL0 1 /* Parent is PLL0 */ +#define IMX_SC_PM_PARENT_PLL1 2 /* Parent is PLL1 or PLL0/2 */ +#define IMX_SC_PM_PARENT_PLL2 3 /* Parent in PLL2 or PLL0/4 */ +#define IMX_SC_PM_PARENT_BYPS 4 /* Parent is a bypass clock. */ + +#endif /* _SC_PM_API_H */ -- cgit v1.2.3 From 475b08734edb3695b9396950c87e75d7c72278a8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 7 Nov 2018 18:49:38 -0800 Subject: platform/chrome: straighten out cros_ec_get_{next,host}_event() error codes cros_ec_get_next_event() is documented to return 0 for success and negative for errors. It currently returns negative for some errors, and non-negative (number of bytes received) for success (including some "no data available" responses as zero). This mostly works out OK, because the callers were more or less ignoring the documentation, and only treating positive values as success (and indepdently checking the modification of 'wakeup'). Let's button this up by avoiding pretending to handle event/wakeup distinctions when no event info was retrieved (i.e., returned 0 bytes). And fix the documentation of cros_ec_get_host_event() and cros_ec_get_next_event() to accurately describe their behavior. Signed-off-by: Brian Norris Acked-by: Lee Jones Signed-off-by: Benson Leung --- drivers/platform/chrome/cros_ec_proto.c | 4 ++-- include/linux/mfd/cros_ec.h | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index b6fd4838f60f..fff67b389c87 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -580,7 +580,7 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event) if (!ec_dev->mkbp_event_supported) { ret = get_keyboard_state_event(ec_dev); - if (ret < 0) + if (ret <= 0) return ret; if (wake_event) @@ -590,7 +590,7 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event) } ret = get_next_event(ec_dev); - if (ret < 0) + if (ret <= 0) return ret; if (wake_event) { diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index e44e3ec8a9c7..de8b588c8776 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -317,7 +317,9 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev); * @wake_event: Pointer to a bool set to true upon return if the event might be * treated as a wake event. Ignored if null. * - * Return: 0 on success or negative error code. + * Return: negative error code on errors; 0 for no data; or else number of + * bytes received (i.e., an event was retrieved successfully). Event types are + * written out to @ec_dev->event_data.event_type on success. */ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); @@ -329,7 +331,7 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); * events raised and call the functions in the ec notifier. This function * is a helper to know which events are raised. * - * Return: 0 on success or negative error code. + * Return: 0 on error or non-zero bitmask of one or more EC_HOST_EVENT_*. */ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); -- cgit v1.2.3 From 98b0e5f6842a9982a793f0837b1bd1495542a3d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 12 Nov 2018 14:58:10 -0800 Subject: net: sched: provide notification for graft on root Drivers are currently not notified when a Qdisc is grafted as root. This requires special casing Qdiscs added with parent = TC_H_ROOT in the driver. Also there is no notification sent to the driver when an existing Qdisc is grafted as root. Add this very simple notifications, drivers should now be able to track their Qdisc tree fully. Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 10 ++++++++++ net/sched/sch_api.c | 17 +++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 487fa5e0e165..97b4233120e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -845,6 +845,7 @@ enum tc_setup_type { TC_SETUP_QDISC_PRIO, TC_SETUP_QDISC_MQ, TC_SETUP_QDISC_ETF, + TC_SETUP_ROOT_QDISC, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f6c0cd29dea4..fa31d034231d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -889,4 +889,14 @@ struct tc_prio_qopt_offload { }; }; +enum tc_root_command { + TC_ROOT_GRAFT, +}; + +struct tc_root_qopt_offload { + enum tc_root_command command; + u32 handle; + bool ingress; +}; + #endif diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f55bc50cd0a9..9c88cec7e8a2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -860,6 +860,21 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, } EXPORT_SYMBOL(qdisc_offload_graft_helper); +static void qdisc_offload_graft_root(struct net_device *dev, + struct Qdisc *new, struct Qdisc *old, + struct netlink_ext_ack *extack) +{ + struct tc_root_qopt_offload graft_offload = { + .command = TC_ROOT_GRAFT, + .handle = new ? new->handle : 0, + .ingress = (new && new->flags & TCQ_F_INGRESS) || + (old && old->flags & TCQ_F_INGRESS), + }; + + qdisc_offload_graft_helper(dev, NULL, new, old, + TC_SETUP_ROOT_QDISC, &graft_offload, extack); +} + static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event) { @@ -1026,6 +1041,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (dev->flags & IFF_UP) dev_deactivate(dev); + qdisc_offload_graft_root(dev, new, old, extack); + if (new && new->ops->attach) goto skip; -- cgit v1.2.3 From c4fe17e0e3a346cc855b7b41c00ff7b04c56d32b Mon Sep 17 00:00:00 2001 From: Arun Kumar Neelakantam Date: Wed, 3 Oct 2018 11:10:02 +0530 Subject: soc: qcom: qmi_interface: Limit txn ids to U16_MAX Txn IDs created up to INT_MAX cause overflow while storing the IDs in u16 type supported by QMI header. Limit the txn IDs max value to U16_MAX to avoid overflow. Signed-off-by: Arun Kumar Neelakantam Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/soc/qcom/qmi_interface.c | 2 +- include/linux/soc/qcom/qmi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/soc/qcom/qmi_interface.c b/drivers/soc/qcom/qmi_interface.c index 938ca41c56cd..c239a28e503f 100644 --- a/drivers/soc/qcom/qmi_interface.c +++ b/drivers/soc/qcom/qmi_interface.c @@ -318,7 +318,7 @@ int qmi_txn_init(struct qmi_handle *qmi, struct qmi_txn *txn, txn->dest = c_struct; mutex_lock(&qmi->txn_lock); - ret = idr_alloc_cyclic(&qmi->txns, txn, 0, INT_MAX, GFP_KERNEL); + ret = idr_alloc_cyclic(&qmi->txns, txn, 0, U16_MAX, GFP_KERNEL); if (ret < 0) pr_err("failed to allocate transaction id\n"); diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h index f4de33654a60..5efa2b67fa55 100644 --- a/include/linux/soc/qcom/qmi.h +++ b/include/linux/soc/qcom/qmi.h @@ -166,7 +166,7 @@ struct qmi_ops { struct qmi_txn { struct qmi_handle *qmi; - int id; + u16 id; struct mutex lock; struct completion completion; -- cgit v1.2.3 From c9a983058ad6ffa59b950b87e4888a43c12ebb26 Mon Sep 17 00:00:00 2001 From: Alice Michael Date: Fri, 26 Oct 2018 14:33:30 -0700 Subject: virtchnl: white space and reorder White space change. Move the check on the virtchnl_vsi_queue_config_info struct to be close to the struct like all the other similar checks. This keeps it clearer and easier to read. Signed-off-by: Alice Michael Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index b2488055fd1d..3130dec40b93 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -171,7 +171,7 @@ struct virtchnl_msg { VIRTCHNL_CHECK_STRUCT_LEN(20, virtchnl_msg); -/* Message descriptions and data structures.*/ +/* Message descriptions and data structures. */ /* VIRTCHNL_OP_VERSION * VF posts its version number to the PF. PF responds with its version number @@ -342,6 +342,8 @@ struct virtchnl_vsi_queue_config_info { struct virtchnl_queue_pair_info qpair[1]; }; +VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); + /* VIRTCHNL_OP_REQUEST_QUEUES * VF sends this message to request the PF to allocate additional queues to * this VF. Each VF gets a guaranteed number of queues on init but asking for @@ -357,8 +359,6 @@ struct virtchnl_vf_res_request { u16 num_queue_pairs; }; -VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); - /* VIRTCHNL_OP_CONFIG_IRQ_MAP * VF uses this message to map vectors to queues. * The rxq_map and txq_map fields are bitmaps used to indicate which queues -- cgit v1.2.3 From 843faff87af261bf55eda719a06087af0486a168 Mon Sep 17 00:00:00 2001 From: Alice Michael Date: Fri, 26 Oct 2018 14:33:31 -0700 Subject: virtchnl: Fix off by one error When calculating the valid length for a VIRTCHNL_OP_ENABLE_CHANNELS message, we accidentally allowed messages with one extra virtchnl_channel_info structure on the end. This happened due to an off by one error, because we forgot that valid_len already accounted for one virtchnl_channel_info structure, so we need to subtract one from the num_tc value. Signed-off-by: Alice Michael Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 3130dec40b93..7605b5919c3a 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -819,8 +819,8 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, if (msglen >= valid_len) { struct virtchnl_tc_info *vti = (struct virtchnl_tc_info *)msg; - valid_len += vti->num_tc * - sizeof(struct virtchnl_channel_info); + valid_len += (vti->num_tc - 1) * + sizeof(struct virtchnl_channel_info); if (vti->num_tc == 0) err_msg_format = true; } -- cgit v1.2.3 From 43fac3238c1d9363b2a93d8d56c2be0c29c64e6c Mon Sep 17 00:00:00 2001 From: Tony Xie Date: Tue, 30 Oct 2018 18:07:56 +0800 Subject: regmap: add a new macro:REGMAP_IRQ_REG_LINE(_id, _reg_bits) if there are lots of irqs for a device and the register addresses for these irqs is continuous, we can use this macro to initialize regmap_irq value. Signed-off-by: Tony Xie Signed-off-by: Mark Brown --- include/linux/regmap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index a367d59c301d..3930f3331652 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1110,6 +1110,12 @@ struct regmap_irq { #define REGMAP_IRQ_REG(_irq, _off, _mask) \ [_irq] = { .reg_offset = (_off), .mask = (_mask) } +#define REGMAP_IRQ_REG_LINE(_id, _reg_bits) \ + [_id] = { \ + .mask = BIT((_id) % (_reg_bits)), \ + .reg_offset = (_id) / (_reg_bits), \ + } + /** * struct regmap_irq_chip - Description of a generic regmap irq_chip. * -- cgit v1.2.3 From 7ff4f8035695984c513598e2d49c8277d5d234ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Nov 2018 15:22:49 -0700 Subject: block: remove dead queue members No more users of ->in_flight[] or ->nr_sorted, get rid of them. Fixes: a1ce35fa4985 ("block: remove dead elevator code") Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e67ad2dd025e..c961329be96b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -486,9 +486,6 @@ struct request_queue { unsigned int dma_pad_mask; unsigned int dma_alignment; - unsigned int nr_sorted; - unsigned int in_flight[2]; - unsigned int rq_timeout; int poll_nsec; -- cgit v1.2.3 From 8f4236d9008b0973a8281256ccfde6913cdec6cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:04 +0100 Subject: block: remove QUEUE_FLAG_BYPASS and ->bypass Unused since the removal of the legacy request code. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 15 --------------- block/blk-core.c | 21 --------------------- block/blk-mq-debugfs.c | 1 - block/blk-throttle.c | 3 --- include/linux/blk-cgroup.h | 6 +----- include/linux/blkdev.h | 3 --- 6 files changed, 1 insertion(+), 48 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 6c65791bc3fe..a95cddb39f1c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -270,13 +270,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); - /* - * This could be the first entry point of blkcg implementation and - * we shouldn't allow anything to go through for a bypassing queue. - */ - if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); - blkg = __blkg_lookup(blkcg, q, true); if (blkg) return blkg; @@ -741,14 +734,6 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, if (!blkcg_policy_enabled(q, pol)) return ERR_PTR(-EOPNOTSUPP); - - /* - * This could be the first entry point of blkcg implementation and - * we shouldn't allow anything to go through for a bypassing queue. - */ - if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); - return __blkg_lookup(blkcg, q, true /* update_hint */); } diff --git a/block/blk-core.c b/block/blk-core.c index fdc0ad2686c4..1c9b6975cf0a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -370,18 +370,6 @@ void blk_cleanup_queue(struct request_queue *q) blk_set_queue_dying(q); spin_lock_irq(lock); - /* - * A dying queue is permanently in bypass mode till released. Note - * that, unlike blk_queue_bypass_start(), we aren't performing - * synchronize_rcu() after entering bypass mode to avoid the delay - * as some drivers create and destroy a lot of queues while - * probing. This is still safe because blk_release_queue() will be - * called only after the queue refcnt drops to zero and nothing, - * RCU or not, would be traversing the queue by then. - */ - q->bypass_depth++; - queue_flag_set(QUEUE_FLAG_BYPASS, q); - queue_flag_set(QUEUE_FLAG_NOMERGES, q); queue_flag_set(QUEUE_FLAG_NOXMERGES, q); queue_flag_set(QUEUE_FLAG_DYING, q); @@ -589,15 +577,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, q->queue_lock = lock ? : &q->__queue_lock; - /* - * A queue starts its life with bypass turned on to avoid - * unnecessary bypass on/off overhead and nasty surprises during - * init. The initial bypass will be finished when the queue is - * registered by blk_register_queue(). - */ - q->bypass_depth = 1; - queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q); - init_waitqueue_head(&q->mq_freeze_wq); /* diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f021f4817b80..a32bb79d6c95 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -114,7 +114,6 @@ static int queue_pm_only_show(void *data, struct seq_file *m) static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STOPPED), QUEUE_FLAG_NAME(DYING), - QUEUE_FLAG_NAME(BYPASS), QUEUE_FLAG_NAME(BIDI), QUEUE_FLAG_NAME(NOMERGES), QUEUE_FLAG_NAME(SAME_COMP), diff --git a/block/blk-throttle.c b/block/blk-throttle.c index db1a3a2ae006..8e6f3c9821c2 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2145,9 +2145,6 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, throtl_update_latency_buckets(td); - if (unlikely(blk_queue_bypass(q))) - goto out_unlock; - blk_throtl_assoc_bio(tg, bio); blk_throtl_update_idletime(tg); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1b299e025e83..2c68efc603bd 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -325,16 +325,12 @@ static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock and is guaranteed to return %NULL if @q is bypassing - * - see blk_queue_bypass_start() for details. + * under RCU read loc. */ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) { WARN_ON_ONCE(!rcu_read_lock_held()); - - if (unlikely(blk_queue_bypass(q))) - return NULL; return __blkg_lookup(blkcg, q, false); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c961329be96b..dd1e53fd4acf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -548,7 +548,6 @@ struct request_queue { struct mutex sysfs_lock; - int bypass_depth; atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) @@ -586,7 +585,6 @@ struct request_queue { #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ -#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ @@ -630,7 +628,6 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) -#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From 079076b3416e78ba2bb3ce38e05e320c388c3120 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:05 +0100 Subject: block: remove deadline __deadline manipulation helpers No users left since the removal of the legacy request interface, we can remove all the magic bit stealing now and make it a normal field. But use WRITE_ONCE/READ_ONCE on the new deadline field, given that we don't seem to have any mechanism to guarantee a new value actually gets seen by other threads. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- block/blk-timeout.c | 8 +++++--- block/blk.h | 35 ----------------------------------- include/linux/blkdev.h | 4 +--- 4 files changed, 8 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 411be60d0cb6..4c82b4b4fa3e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -325,7 +325,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->special = NULL; /* tag was already set */ rq->extra_len = 0; - rq->__deadline = 0; + WRITE_ONCE(rq->deadline, 0); rq->timeout = 0; @@ -839,7 +839,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) if (rq->rq_flags & RQF_TIMED_OUT) return false; - deadline = blk_rq_deadline(rq); + deadline = READ_ONCE(rq->deadline); if (time_after_eq(jiffies, deadline)) return true; diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 006cff4390c0..3b0179fbdd6a 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -84,7 +84,7 @@ void blk_abort_request(struct request *req) * immediately and that scan sees the new timeout value. * No need for fancy synchronizations. */ - blk_rq_set_deadline(req, jiffies); + WRITE_ONCE(req->deadline, jiffies); kblockd_schedule_work(&req->q->timeout_work); } EXPORT_SYMBOL_GPL(blk_abort_request); @@ -121,14 +121,16 @@ void blk_add_timer(struct request *req) req->timeout = q->rq_timeout; req->rq_flags &= ~RQF_TIMED_OUT; - blk_rq_set_deadline(req, jiffies + req->timeout); + + expiry = jiffies + req->timeout; + WRITE_ONCE(req->deadline, expiry); /* * If the timer isn't already pending or this timeout is earlier * than an existing one, modify the timer. Round up to next nearest * second. */ - expiry = blk_rq_timeout(round_jiffies_up(blk_rq_deadline(req))); + expiry = blk_rq_timeout(round_jiffies_up(expiry)); if (!timer_pending(&q->timeout) || time_before(expiry, q->timeout.expires)) { diff --git a/block/blk.h b/block/blk.h index 41b64e6e101b..08a5845b03ba 100644 --- a/block/blk.h +++ b/block/blk.h @@ -238,26 +238,6 @@ void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); void blk_account_io_done(struct request *req, u64 now); -/* - * EH timer and IO completion will both attempt to 'grab' the request, make - * sure that only one of them succeeds. Steal the bottom bit of the - * __deadline field for this. - */ -static inline int blk_mark_rq_complete(struct request *rq) -{ - return test_and_set_bit(0, &rq->__deadline); -} - -static inline void blk_clear_rq_complete(struct request *rq) -{ - clear_bit(0, &rq->__deadline); -} - -static inline bool blk_rq_is_complete(struct request *rq) -{ - return test_bit(0, &rq->__deadline); -} - /* * Internal elevator interface */ @@ -322,21 +302,6 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req) q->last_merge = NULL; } -/* - * Steal a bit from this field for legacy IO path atomic IO marking. Note that - * setting the deadline clears the bottom bit, potentially clearing the - * completed bit. The user has to be OK with this (current ones are fine). - */ -static inline void blk_rq_set_deadline(struct request *rq, unsigned long time) -{ - rq->__deadline = time & ~0x1UL; -} - -static inline unsigned long blk_rq_deadline(struct request *rq) -{ - return rq->__deadline & ~0x1UL; -} - /* * Internal io_context interface */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dd1e53fd4acf..60507ab7b358 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -224,9 +224,7 @@ struct request { refcount_t ref; unsigned int timeout; - - /* access through blk_rq_set_deadline, blk_rq_deadline */ - unsigned long __deadline; + unsigned long deadline; union { struct __call_single_data csd; -- cgit v1.2.3 From 57d74df90783f6a6b3e79dfdd2a567ce5db3b790 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:07 +0100 Subject: block: use atomic bitops for ->queue_flags ->queue_flags is generally not set or cleared in the fast path, and also generally set or cleared one flag at a time. Make use of the normal atomic bitops for it so that we don't need to take the queue_lock, which is otherwise mostly unused in the core block layer now. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 54 +++++++----------------------------------------- block/blk-mq.c | 2 +- block/blk-settings.c | 10 ++++----- block/blk-sysfs.c | 28 +++++++++++-------------- block/blk.h | 56 -------------------------------------------------- include/linux/blkdev.h | 1 - 6 files changed, 24 insertions(+), 127 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 1c9b6975cf0a..5c8e66a09d82 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -74,11 +74,7 @@ static struct workqueue_struct *kblockd_workqueue; */ void blk_queue_flag_set(unsigned int flag, struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - queue_flag_set(flag, q); - spin_unlock_irqrestore(q->queue_lock, flags); + set_bit(flag, &q->queue_flags); } EXPORT_SYMBOL(blk_queue_flag_set); @@ -89,11 +85,7 @@ EXPORT_SYMBOL(blk_queue_flag_set); */ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - queue_flag_clear(flag, q); - spin_unlock_irqrestore(q->queue_lock, flags); + clear_bit(flag, &q->queue_flags); } EXPORT_SYMBOL(blk_queue_flag_clear); @@ -107,38 +99,10 @@ EXPORT_SYMBOL(blk_queue_flag_clear); */ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q) { - unsigned long flags; - bool res; - - spin_lock_irqsave(q->queue_lock, flags); - res = queue_flag_test_and_set(flag, q); - spin_unlock_irqrestore(q->queue_lock, flags); - - return res; + return test_and_set_bit(flag, &q->queue_flags); } EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set); -/** - * blk_queue_flag_test_and_clear - atomically test and clear a queue flag - * @flag: flag to be cleared - * @q: request queue - * - * Returns the previous value of @flag - 0 if the flag was not set and 1 if - * the flag was set. - */ -bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q) -{ - unsigned long flags; - bool res; - - spin_lock_irqsave(q->queue_lock, flags); - res = queue_flag_test_and_clear(flag, q); - spin_unlock_irqrestore(q->queue_lock, flags); - - return res; -} -EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear); - void blk_rq_init(struct request_queue *q, struct request *rq) { memset(rq, 0, sizeof(*rq)); @@ -368,12 +332,10 @@ void blk_cleanup_queue(struct request_queue *q) /* mark @q DYING, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); blk_set_queue_dying(q); - spin_lock_irq(lock); - queue_flag_set(QUEUE_FLAG_NOMERGES, q); - queue_flag_set(QUEUE_FLAG_NOXMERGES, q); - queue_flag_set(QUEUE_FLAG_DYING, q); - spin_unlock_irq(lock); + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); + blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); + blk_queue_flag_set(QUEUE_FLAG_DYING, q); mutex_unlock(&q->sysfs_lock); /* @@ -384,9 +346,7 @@ void blk_cleanup_queue(struct request_queue *q) rq_qos_exit(q); - spin_lock_irq(lock); - queue_flag_set(QUEUE_FLAG_DEAD, q); - spin_unlock_irq(lock); + blk_queue_flag_set(QUEUE_FLAG_DEAD, q); /* * make sure all in-progress dispatch are completed because diff --git a/block/blk-mq.c b/block/blk-mq.c index 4c82b4b4fa3e..e2717e843727 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2756,7 +2756,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; if (!(set->flags & BLK_MQ_F_SG_MERGE)) - queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); + blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q); q->sg_reserved_size = INT_MAX; diff --git a/block/blk-settings.c b/block/blk-settings.c index cca83590a1dc..3abe831e92c8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -834,16 +834,14 @@ EXPORT_SYMBOL(blk_set_queue_depth); */ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) { - spin_lock_irq(q->queue_lock); if (wc) - queue_flag_set(QUEUE_FLAG_WC, q); + blk_queue_flag_set(QUEUE_FLAG_WC, q); else - queue_flag_clear(QUEUE_FLAG_WC, q); + blk_queue_flag_clear(QUEUE_FLAG_WC, q); if (fua) - queue_flag_set(QUEUE_FLAG_FUA, q); + blk_queue_flag_set(QUEUE_FLAG_FUA, q); else - queue_flag_clear(QUEUE_FLAG_FUA, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_clear(QUEUE_FLAG_FUA, q); wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d4b1b84ba8ca..22fd086eba9f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -316,14 +316,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, if (ret < 0) return ret; - spin_lock_irq(q->queue_lock); - queue_flag_clear(QUEUE_FLAG_NOMERGES, q); - queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); + blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); if (nm == 2) - queue_flag_set(QUEUE_FLAG_NOMERGES, q); + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); else if (nm) - queue_flag_set(QUEUE_FLAG_NOXMERGES, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); return ret; } @@ -347,18 +345,16 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) if (ret < 0) return ret; - spin_lock_irq(q->queue_lock); if (val == 2) { - queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); + blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); } else if (val == 1) { - queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } else if (val == 0) { - queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); - queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); + blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } - spin_unlock_irq(q->queue_lock); #endif return ret; } @@ -889,7 +885,7 @@ int blk_register_queue(struct gendisk *disk) WARN_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags), "%s is registering an already registered queue\n", kobject_name(&dev->kobj)); - queue_flag_set_unlocked(QUEUE_FLAG_REGISTERED, q); + blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); /* * SCSI probing may synchronously create and destroy a lot of @@ -901,7 +897,7 @@ int blk_register_queue(struct gendisk *disk) * request_queues for non-existent devices never get registered. */ if (!blk_queue_init_done(q)) { - queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); + blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); percpu_ref_switch_to_percpu(&q->q_usage_counter); } diff --git a/block/blk.h b/block/blk.h index 08a5845b03ba..f2ddc71e93da 100644 --- a/block/blk.h +++ b/block/blk.h @@ -48,62 +48,6 @@ static inline void queue_lockdep_assert_held(struct request_queue *q) lockdep_assert_held(q->queue_lock); } -static inline void queue_flag_set_unlocked(unsigned int flag, - struct request_queue *q) -{ - if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) && - kref_read(&q->kobj.kref)) - lockdep_assert_held(q->queue_lock); - __set_bit(flag, &q->queue_flags); -} - -static inline void queue_flag_clear_unlocked(unsigned int flag, - struct request_queue *q) -{ - if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) && - kref_read(&q->kobj.kref)) - lockdep_assert_held(q->queue_lock); - __clear_bit(flag, &q->queue_flags); -} - -static inline int queue_flag_test_and_clear(unsigned int flag, - struct request_queue *q) -{ - queue_lockdep_assert_held(q); - - if (test_bit(flag, &q->queue_flags)) { - __clear_bit(flag, &q->queue_flags); - return 1; - } - - return 0; -} - -static inline int queue_flag_test_and_set(unsigned int flag, - struct request_queue *q) -{ - queue_lockdep_assert_held(q); - - if (!test_bit(flag, &q->queue_flags)) { - __set_bit(flag, &q->queue_flags); - return 0; - } - - return 1; -} - -static inline void queue_flag_set(unsigned int flag, struct request_queue *q) -{ - queue_lockdep_assert_held(q); - __set_bit(flag, &q->queue_flags); -} - -static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) -{ - queue_lockdep_assert_held(q); - __clear_bit(flag, &q->queue_flags); -} - static inline struct blk_flush_queue * blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 60507ab7b358..30d8e0fbd104 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -621,7 +621,6 @@ struct request_queue { void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); -bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) -- cgit v1.2.3 From 6d46964230d182c4b6097379738849a809d791dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:18 +0100 Subject: block: remove the lock argument to blk_alloc_queue_node With the legacy request path gone there is no real need to override the queue_lock. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 16 +++------------- block/blk-mq.c | 2 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/null_blk_main.c | 3 +-- drivers/block/umem.c | 2 +- drivers/lightnvm/core.c | 2 +- drivers/md/dm.c | 2 +- drivers/nvdimm/pmem.c | 2 +- drivers/nvme/host/multipath.c | 2 +- include/linux/blkdev.h | 3 +-- 10 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 5c8e66a09d82..3f94c9de0252 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -393,7 +393,7 @@ EXPORT_SYMBOL(blk_cleanup_queue); struct request_queue *blk_alloc_queue(gfp_t gfp_mask) { - return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL); + return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE); } EXPORT_SYMBOL(blk_alloc_queue); @@ -473,17 +473,8 @@ static void blk_rq_timed_out_timer(struct timer_list *t) * blk_alloc_queue_node - allocate a request queue * @gfp_mask: memory allocation flags * @node_id: NUMA node to allocate memory from - * @lock: For legacy queues, pointer to a spinlock that will be used to e.g. - * serialize calls to the legacy .request_fn() callback. Ignored for - * blk-mq request queues. - * - * Note: pass the queue lock as the third argument to this function instead of - * setting the queue lock pointer explicitly to avoid triggering a sporadic - * crash in the blkcg code. This function namely calls blkcg_init_queue() and - * the queue lock pointer must be set before blkcg_init_queue() is called. */ -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, - spinlock_t *lock) +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { struct request_queue *q; int ret; @@ -534,8 +525,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, #endif mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); - - q->queue_lock = lock ? : &q->__queue_lock; + q->queue_lock = &q->__queue_lock; init_waitqueue_head(&q->mq_freeze_wq); diff --git a/block/blk-mq.c b/block/blk-mq.c index a3f057fdd045..3b823891b3ef 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2548,7 +2548,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { struct request_queue *uninit_q, *q; - uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL); + uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); if (!uninit_q) return ERR_PTR(-ENOMEM); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b66c59ce6260..f973a2a845c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2792,7 +2792,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!q) goto out_no_q; device->rq_queue = q; diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 63c23fcfc4df..62c9654b9ce8 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1659,8 +1659,7 @@ static int null_add_dev(struct nullb_device *dev) } null_init_queues(nullb); } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, - NULL); + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 8a27b5adc2b3..aa035cf8a51d 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -888,7 +888,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->biotail = &card->bio; spin_lock_init(&card->lock); - card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); + card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!card->queue) goto failed_alloc; diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index efb976a863d2..60ab11fcc81c 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -389,7 +389,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL); + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); if (!tqueue) { ret = -ENOMEM; goto err_disk; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c510179a7f84..a733e4c920af 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1896,7 +1896,7 @@ static struct mapped_device *alloc_dev(int minor) INIT_LIST_HEAD(&md->table_devices); spin_lock_init(&md->uevent_lock); - md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL); + md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); if (!md->queue) goto bad; md->queue->queuedata = md; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 0e39e3d1846f..f7019294740c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -393,7 +393,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL); + q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); if (!q) return -ENOMEM; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5e3cc8c59a39..b82b0d3ca39a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -276,7 +276,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!q) goto out; q->queuedata = head; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 30d8e0fbd104..c4a3a660e3f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1122,8 +1122,7 @@ extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, - spinlock_t *lock); +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -- cgit v1.2.3 From 0d945c1f966b2bcb67bb12be749da0a7fb00201b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Nov 2018 12:17:28 -0700 Subject: block: remove the queue_lock indirection With the legacy request path gone there is no good reason to keep queue_lock as a pointer, we can always use the embedded lock now. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Fixed floppy and blk-cgroup missing conversions and half done edits. Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 2 +- block/bfq-iosched.c | 16 ++++++------ block/blk-cgroup.c | 62 +++++++++++++++++++++++----------------------- block/blk-core.c | 10 +------- block/blk-ioc.c | 14 +++++------ block/blk-iolatency.c | 4 +-- block/blk-mq-sched.c | 4 +-- block/blk-pm.c | 20 +++++++-------- block/blk-pm.h | 6 ++--- block/blk-sysfs.c | 4 +-- block/blk-throttle.c | 22 ++++++++-------- drivers/block/floppy.c | 8 +++--- drivers/block/pktcdvd.c | 4 +-- drivers/ide/ide-pm.c | 10 ++++---- include/linux/blk-cgroup.h | 4 +-- include/linux/blkdev.h | 8 +----- 16 files changed, 92 insertions(+), 106 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 9fe5952d117d..a7a1712632b0 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -334,7 +334,7 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) parent = bfqg_parent(bfqg); - lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock); + lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock); if (unlikely(!parent)) return; diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index c7636cbefc85..67b22c924aee 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -399,9 +399,9 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, unsigned long flags; struct bfq_io_cq *icq; - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); icq = icq_to_bic(ioc_lookup_icq(ioc, q)); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); return icq; } @@ -4034,7 +4034,7 @@ static void bfq_update_dispatch_stats(struct request_queue *q, * In addition, the following queue lock guarantees that * bfqq_group(bfqq) exists as well. */ - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); if (idle_timer_disabled) /* * Since the idle timer has been disabled, @@ -4053,7 +4053,7 @@ static void bfq_update_dispatch_stats(struct request_queue *q, bfqg_stats_set_start_empty_time(bfqg); bfqg_stats_update_io_remove(bfqg, rq->cmd_flags); } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } #else static inline void bfq_update_dispatch_stats(struct request_queue *q, @@ -4637,11 +4637,11 @@ static void bfq_update_insert_stats(struct request_queue *q, * In addition, the following queue lock guarantees that * bfqq_group(bfqq) exists as well. */ - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); if (idle_timer_disabled) bfqg_stats_update_idle_time(bfqq_group(bfqq)); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } #else static inline void bfq_update_insert_stats(struct request_queue *q, @@ -5382,9 +5382,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) } eq->elevator_data = bfqd; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); q->elevator = eq; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); /* * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3ba23b9bfeb9..0f6b44614165 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -147,7 +147,7 @@ struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); if (blkg && blkg->q == q) { if (update_hint) { - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(&q->queue_lock); rcu_assign_pointer(blkcg->blkg_hint, blkg); } return blkg; @@ -170,7 +170,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, int i, ret; WARN_ON_ONCE(!rcu_read_lock_held()); - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(&q->queue_lock); /* blkg holds a reference to blkcg */ if (!css_tryget_online(&blkcg->css)) { @@ -268,7 +268,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct blkcg_gq *blkg; WARN_ON_ONCE(!rcu_read_lock_held()); - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(&q->queue_lock); blkg = __blkg_lookup(blkcg, q, true); if (blkg) @@ -299,7 +299,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) struct blkcg_gq *parent = blkg->parent; int i; - lockdep_assert_held(blkg->q->queue_lock); + lockdep_assert_held(&blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); /* Something wrong if we are trying to remove same group twice */ @@ -349,7 +349,7 @@ static void blkg_destroy_all(struct request_queue *q) { struct blkcg_gq *blkg, *n; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; @@ -359,7 +359,7 @@ static void blkg_destroy_all(struct request_queue *q) } q->root_blkg = NULL; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } /* @@ -454,10 +454,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(blkg->q->queue_lock); + spin_lock_irq(&blkg->q->queue_lock); if (blkcg_policy_enabled(blkg->q, pol)) total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(blkg->q->queue_lock); + spin_unlock_irq(&blkg->q->queue_lock); } rcu_read_unlock(); @@ -655,7 +655,7 @@ u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, struct cgroup_subsys_state *pos_css; u64 sum = 0; - lockdep_assert_held(blkg->q->queue_lock); + lockdep_assert_held(&blkg->q->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { @@ -698,7 +698,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkg_rwstat sum = { }; int i; - lockdep_assert_held(blkg->q->queue_lock); + lockdep_assert_held(&blkg->q->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { @@ -729,7 +729,7 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, struct request_queue *q) { WARN_ON_ONCE(!rcu_read_lock_held()); - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(&q->queue_lock); if (!blkcg_policy_enabled(q, pol)) return ERR_PTR(-EOPNOTSUPP); @@ -750,7 +750,7 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, */ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx) - __acquires(rcu) __acquires(disk->queue->queue_lock) + __acquires(rcu) __acquires(&disk->queue->queue_lock) { struct gendisk *disk; struct request_queue *q; @@ -778,7 +778,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, q = disk->queue; rcu_read_lock(); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); blkg = blkg_lookup_check(blkcg, pol, q); if (IS_ERR(blkg)) { @@ -805,7 +805,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, } /* Drop locks to do new blkg allocation with GFP_KERNEL. */ - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); new_blkg = blkg_alloc(pos, q, GFP_KERNEL); @@ -815,7 +815,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, } rcu_read_lock(); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); blkg = blkg_lookup_check(pos, pol, q); if (IS_ERR(blkg)) { @@ -843,7 +843,7 @@ success: return 0; fail_unlock: - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); fail: put_disk_and_module(disk); @@ -868,9 +868,9 @@ fail: * with blkg_conf_prep(). */ void blkg_conf_finish(struct blkg_conf_ctx *ctx) - __releases(ctx->disk->queue->queue_lock) __releases(rcu) + __releases(&ctx->disk->queue->queue_lock) __releases(rcu) { - spin_unlock_irq(ctx->disk->queue->queue_lock); + spin_unlock_irq(&ctx->disk->queue->queue_lock); rcu_read_unlock(); put_disk_and_module(ctx->disk); } @@ -903,7 +903,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) */ off += scnprintf(buf+off, size-off, "%s ", dname); - spin_lock_irq(blkg->q->queue_lock); + spin_lock_irq(&blkg->q->queue_lock); rwstat = blkg_rwstat_recursive_sum(blkg, NULL, offsetof(struct blkcg_gq, stat_bytes)); @@ -917,7 +917,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); - spin_unlock_irq(blkg->q->queue_lock); + spin_unlock_irq(&blkg->q->queue_lock); if (rbytes || wbytes || rios || wios) { has_stats = true; @@ -1038,9 +1038,9 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg) struct blkcg_gq, blkcg_node); struct request_queue *q = blkg->q; - if (spin_trylock(q->queue_lock)) { + if (spin_trylock(&q->queue_lock)) { blkg_destroy(blkg); - spin_unlock(q->queue_lock); + spin_unlock(&q->queue_lock); } else { spin_unlock_irq(&blkcg->lock); cpu_relax(); @@ -1161,12 +1161,12 @@ int blkcg_init_queue(struct request_queue *q) /* Make sure the root blkg exists. */ rcu_read_lock(); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); blkg = blkg_create(&blkcg_root, q, new_blkg); if (IS_ERR(blkg)) goto err_unlock; q->root_blkg = blkg; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); if (preloaded) @@ -1185,7 +1185,7 @@ err_destroy_all: blkg_destroy_all(q); return ret; err_unlock: - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); if (preloaded) radix_tree_preload_end(); @@ -1200,7 +1200,7 @@ err_unlock: */ void blkcg_drain_queue(struct request_queue *q) { - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(&q->queue_lock); /* * @q could be exiting and already have destroyed all blkgs as @@ -1335,7 +1335,7 @@ pd_prealloc: } } - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { struct blkg_policy_data *pd; @@ -1347,7 +1347,7 @@ pd_prealloc: if (!pd) swap(pd, pd_prealloc); if (!pd) { - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); goto pd_prealloc; } @@ -1361,7 +1361,7 @@ pd_prealloc: __set_bit(pol->plid, q->blkcg_pols); ret = 0; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); out_bypass_end: if (q->mq_ops) blk_mq_unfreeze_queue(q); @@ -1390,7 +1390,7 @@ void blkcg_deactivate_policy(struct request_queue *q, if (q->mq_ops) blk_mq_freeze_queue(q); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); __clear_bit(pol->plid, q->blkcg_pols); @@ -1403,7 +1403,7 @@ void blkcg_deactivate_policy(struct request_queue *q, } } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); if (q->mq_ops) blk_mq_unfreeze_queue(q); diff --git a/block/blk-core.c b/block/blk-core.c index 3f94c9de0252..92b6b200e9fb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -327,8 +327,6 @@ void blk_exit_queue(struct request_queue *q) */ void blk_cleanup_queue(struct request_queue *q) { - spinlock_t *lock = q->queue_lock; - /* mark @q DYING, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); blk_set_queue_dying(q); @@ -381,11 +379,6 @@ void blk_cleanup_queue(struct request_queue *q) percpu_ref_exit(&q->q_usage_counter); - spin_lock_irq(lock); - if (q->queue_lock != &q->__queue_lock) - q->queue_lock = &q->__queue_lock; - spin_unlock_irq(lock); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } @@ -524,8 +517,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) mutex_init(&q->blk_trace_mutex); #endif mutex_init(&q->sysfs_lock); - spin_lock_init(&q->__queue_lock); - q->queue_lock = &q->__queue_lock; + spin_lock_init(&q->queue_lock); init_waitqueue_head(&q->mq_freeze_wq); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index f91ca6b70d6a..5ed59ac6ae58 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -110,9 +110,9 @@ static void ioc_release_fn(struct work_struct *work) struct io_cq, ioc_node); struct request_queue *q = icq->q; - if (spin_trylock(q->queue_lock)) { + if (spin_trylock(&q->queue_lock)) { ioc_destroy_icq(icq); - spin_unlock(q->queue_lock); + spin_unlock(&q->queue_lock); } else { spin_unlock_irqrestore(&ioc->lock, flags); cpu_relax(); @@ -233,9 +233,9 @@ void ioc_clear_queue(struct request_queue *q) { LIST_HEAD(icq_list); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); list_splice_init(&q->icq_list, &icq_list); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); __ioc_clear_queue(&icq_list); } @@ -326,7 +326,7 @@ struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q) { struct io_cq *icq; - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(&q->queue_lock); /* * icq's are indexed from @ioc using radix tree and hint pointer, @@ -385,7 +385,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, INIT_HLIST_NODE(&icq->ioc_node); /* lock both q and ioc and try to link @icq */ - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); spin_lock(&ioc->lock); if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { @@ -401,7 +401,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, } spin_unlock(&ioc->lock); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); radix_tree_preload_end(); return icq; } diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 8edf1b353ad1..5f7f1773be61 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -485,11 +485,11 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) bio_associate_blkcg(bio, &blkcg->css); blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); blkg = blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } if (!blkg) goto out; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 66fda19be5a3..d084f731d104 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -37,9 +37,9 @@ void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) struct io_context *ioc = rq_ioc(bio); struct io_cq *icq; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); icq = ioc_lookup_icq(ioc, q); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); if (!icq) { icq = ioc_create_icq(ioc, q, GFP_ATOMIC); diff --git a/block/blk-pm.c b/block/blk-pm.c index f8fdae01bea2..0a028c189897 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -89,12 +89,12 @@ int blk_pre_runtime_suspend(struct request_queue *q) /* Switch q_usage_counter back to per-cpu mode. */ blk_mq_unfreeze_queue(q); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); if (ret < 0) pm_runtime_mark_last_busy(q->dev); else q->rpm_status = RPM_SUSPENDING; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); if (ret) blk_clear_pm_only(q); @@ -121,14 +121,14 @@ void blk_post_runtime_suspend(struct request_queue *q, int err) if (!q->dev) return; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); if (!err) { q->rpm_status = RPM_SUSPENDED; } else { q->rpm_status = RPM_ACTIVE; pm_runtime_mark_last_busy(q->dev); } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); if (err) blk_clear_pm_only(q); @@ -151,9 +151,9 @@ void blk_pre_runtime_resume(struct request_queue *q) if (!q->dev) return; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); q->rpm_status = RPM_RESUMING; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } EXPORT_SYMBOL(blk_pre_runtime_resume); @@ -176,7 +176,7 @@ void blk_post_runtime_resume(struct request_queue *q, int err) if (!q->dev) return; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); if (!err) { q->rpm_status = RPM_ACTIVE; pm_runtime_mark_last_busy(q->dev); @@ -184,7 +184,7 @@ void blk_post_runtime_resume(struct request_queue *q, int err) } else { q->rpm_status = RPM_SUSPENDED; } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); if (!err) blk_clear_pm_only(q); @@ -207,10 +207,10 @@ EXPORT_SYMBOL(blk_post_runtime_resume); */ void blk_set_runtime_active(struct request_queue *q) { - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); q->rpm_status = RPM_ACTIVE; pm_runtime_mark_last_busy(q->dev); pm_request_autosuspend(q->dev); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } EXPORT_SYMBOL(blk_set_runtime_active); diff --git a/block/blk-pm.h b/block/blk-pm.h index a8564ea72a41..ea5507d23e75 100644 --- a/block/blk-pm.h +++ b/block/blk-pm.h @@ -21,7 +21,7 @@ static inline void blk_pm_mark_last_busy(struct request *rq) static inline void blk_pm_requeue_request(struct request *rq) { - lockdep_assert_held(rq->q->queue_lock); + lockdep_assert_held(&rq->q->queue_lock); if (rq->q->dev && !(rq->rq_flags & RQF_PM)) rq->q->nr_pending--; @@ -30,7 +30,7 @@ static inline void blk_pm_requeue_request(struct request *rq) static inline void blk_pm_add_request(struct request_queue *q, struct request *rq) { - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(&q->queue_lock); if (q->dev && !(rq->rq_flags & RQF_PM)) q->nr_pending++; @@ -38,7 +38,7 @@ static inline void blk_pm_add_request(struct request_queue *q, static inline void blk_pm_put_request(struct request *rq) { - lockdep_assert_held(rq->q->queue_lock); + lockdep_assert_held(&rq->q->queue_lock); if (rq->q->dev && !(rq->rq_flags & RQF_PM)) --rq->q->nr_pending; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 22fd086eba9f..1e370207a20e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -238,10 +238,10 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) return -EINVAL; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); q->limits.max_sectors = max_sectors_kb << 1; q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); return ret; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a665b0950369..d0a23f0bb3ed 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1243,7 +1243,7 @@ static void throtl_pending_timer_fn(struct timer_list *t) bool dispatched; int ret; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); if (throtl_can_upgrade(td, NULL)) throtl_upgrade_state(td); @@ -1266,9 +1266,9 @@ again: break; /* this dispatch windows is still open, relax and repeat */ - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); cpu_relax(); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); } if (!dispatched) @@ -1290,7 +1290,7 @@ again: queue_work(kthrotld_workqueue, &td->dispatch_work); } out_unlock: - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } /** @@ -1314,11 +1314,11 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work) bio_list_init(&bio_list_on_stack); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); for (rw = READ; rw <= WRITE; rw++) while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL))) bio_list_add(&bio_list_on_stack, bio); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); if (!bio_list_empty(&bio_list_on_stack)) { blk_start_plug(&plug); @@ -2141,7 +2141,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw]) goto out; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); throtl_update_latency_buckets(td); @@ -2224,7 +2224,7 @@ again: } out_unlock: - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); out: bio_set_flag(bio, BIO_THROTTLED); @@ -2345,7 +2345,7 @@ static void tg_drain_bios(struct throtl_service_queue *parent_sq) * Dispatch all currently throttled bios on @q through ->make_request_fn(). */ void blk_throtl_drain(struct request_queue *q) - __releases(q->queue_lock) __acquires(q->queue_lock) + __releases(&q->queue_lock) __acquires(&q->queue_lock) { struct throtl_data *td = q->td; struct blkcg_gq *blkg; @@ -2368,7 +2368,7 @@ void blk_throtl_drain(struct request_queue *q) tg_drain_bios(&td->service_queue); rcu_read_unlock(); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); /* all bios now should be in td->service_queue, issue them */ for (rw = READ; rw <= WRITE; rw++) @@ -2376,7 +2376,7 @@ void blk_throtl_drain(struct request_queue *q) NULL))) generic_make_request(bio); - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); } int blk_throtl_init(struct request_queue *q) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a8cfa011c284..eeb4be8d000b 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2255,9 +2255,9 @@ static void request_done(int uptodate) DRS->maxtrack = 1; /* unlock chained buffers */ - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); floppy_end_request(req, 0); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } else { if (rq_data_dir(req) == WRITE) { /* record write error information */ @@ -2269,9 +2269,9 @@ static void request_done(int uptodate) DRWE->last_error_sector = blk_rq_pos(req); DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); floppy_end_request(req, BLK_STS_IOERR); - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 9381f4e3b221..4adf4c8861cd 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2203,9 +2203,9 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * Some CDRW drives can not handle writes larger than one packet, * even if the size is a multiple of the packet size. */ - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); blk_queue_max_hw_sectors(q, pd->settings.size); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); set_bit(PACKET_WRITABLE, &pd->flags); } else { pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index a8c53c98252d..51fe10ac02fa 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -44,15 +44,15 @@ static int ide_pm_execute_rq(struct request *rq) { struct request_queue *q = rq->q; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); if (unlikely(blk_queue_dying(q))) { rq->rq_flags |= RQF_QUIET; scsi_req(rq)->result = -ENXIO; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); blk_mq_end_request(rq, BLK_STS_OK); return -ENXIO; } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); blk_execute_rq(q, NULL, rq, true); return scsi_req(rq)->result ? -EIO : 0; @@ -214,12 +214,12 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) printk("%s: completing PM request, %s\n", drive->name, (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) ? "suspend" : "resume"); #endif - spin_lock_irqsave(q->queue_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) blk_mq_stop_hw_queues(q); else drive->dev_flags &= ~IDE_DFLAG_BLOCKED; - spin_unlock_irqrestore(q->queue_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); drive->hwif->rq = NULL; diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 2c68efc603bd..a9e2e2037129 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -717,11 +717,11 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); blkg = blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } throtl = blk_throtl_bio(q, blkg, bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c4a3a660e3f0..1d185f1fc333 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -446,13 +446,7 @@ struct request_queue { */ gfp_t bounce_gfp; - /* - * protects queue structures from reentrancy. ->__queue_lock should - * _never_ be used directly, it is queue private. always use - * ->queue_lock. - */ - spinlock_t __queue_lock; - spinlock_t *queue_lock; + spinlock_t queue_lock; /* * queue kobject -- cgit v1.2.3 From 7fe50ac83f4319c18ed7c634d85cad16bd0bf509 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 12 Nov 2018 14:47:18 -0800 Subject: net: dump more useful information in netdev_rx_csum_fault() Currently netdev_rx_csum_fault() only shows a device name, we need more information about the skb for debugging csum failures. Sample output: ens3: hw csum failure dev features: 0x0000000000014b89 skb len=84 data_len=0 pkt_type=0 gso_size=0 gso_type=0 nr_frags=0 ip_summed=0 csum=0 csum_complete_sw=0 csum_valid=0 csum_level=0 Note, I use pr_err() just to be consistent with the existing one. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++-- net/core/datagram.c | 2 +- net/core/dev.c | 11 +++++++++-- net/core/skbuff.c | 4 ++-- net/sunrpc/socklib.c | 2 +- 5 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97b4233120e4..917ae7b6263e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4343,9 +4343,10 @@ static inline bool can_checksum_protocol(netdev_features_t features, } #ifdef CONFIG_BUG -void netdev_rx_csum_fault(struct net_device *dev); +void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb); #else -static inline void netdev_rx_csum_fault(struct net_device *dev) +static inline void netdev_rx_csum_fault(struct net_device *dev, + struct sk_buff *skb) { } #endif diff --git a/net/core/datagram.c b/net/core/datagram.c index 07983b90d2bd..4bf62b1afa3b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -767,7 +767,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) - netdev_rx_csum_fault(NULL); + netdev_rx_csum_fault(NULL, skb); } return 0; fault: diff --git a/net/core/dev.c b/net/core/dev.c index bf7e0a471186..5927f6a7c301 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3091,10 +3091,17 @@ EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG -void netdev_rx_csum_fault(struct net_device *dev) +void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb) { if (net_ratelimit()) { pr_err("%s: hw csum failure\n", dev ? dev->name : ""); + if (dev) + pr_err("dev features: %pNF\n", &dev->features); + pr_err("skb len=%u data_len=%u pkt_type=%u gso_size=%u gso_type=%u nr_frags=%u ip_summed=%u csum=%x csum_complete_sw=%d csum_valid=%d csum_level=%u\n", + skb->len, skb->data_len, skb->pkt_type, + skb_shinfo(skb)->gso_size, skb_shinfo(skb)->gso_type, + skb_shinfo(skb)->nr_frags, skb->ip_summed, skb->csum, + skb->csum_complete_sw, skb->csum_valid, skb->csum_level); dump_stack(); } } @@ -5781,7 +5788,7 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb) if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); + netdev_rx_csum_fault(skb->dev, skb); } NAPI_GRO_CB(skb)->csum = wsum; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 396fcb3baad0..fcb1155a00ec 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2653,7 +2653,7 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); + netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) skb->csum_valid = !sum; @@ -2673,7 +2673,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); + netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) { diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 9062967575c4..7e55cfc69697 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -175,7 +175,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); + netdev_rx_csum_fault(skb->dev, skb); return 0; no_checksum: if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) -- cgit v1.2.3 From 1d2f46814d20a55c45ac171739b6885826e0c793 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 15 Nov 2018 09:01:18 +0100 Subject: regulator: wm8994: Pass descriptor instead of GPIO number Instead of passing a global GPIO number for the enable GPIO, pass a descriptor looked up from the device tree node or the board file decriptor table for the regulator. There is a single board file passing the GPIOs for LDO1 and LDO2 through platform data, so augment this to pass descriptors associated with the i2c device as well. The special GPIO enable DT property for the enable GPIO is nonstandard but this was accomodated in commit 6a537d48461deacc57c07ed86d9915e5aa4b3539 "gpio: of: Support regulator nonstandard GPIO properties". Cc: patches@opensource.cirrus.com Acked-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- arch/arm/mach-s3c64xx/mach-crag6410-module.c | 17 +++++++++++++++-- drivers/mfd/wm8994-core.c | 9 --------- drivers/regulator/wm8994-regulator.c | 20 ++++++++++++-------- include/linux/mfd/wm8994/pdata.h | 3 --- 4 files changed, 27 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-s3c64xx/mach-crag6410-module.c b/arch/arm/mach-s3c64xx/mach-crag6410-module.c index 5aa472892465..76c4855a03bc 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410-module.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410-module.c @@ -194,8 +194,8 @@ static struct wm8994_pdata wm8994_pdata = { 0x3, /* IRQ out, active high, CMOS */ }, .ldo = { - { .enable = S3C64XX_GPN(6), .init_data = &wm8994_ldo1, }, - { .enable = S3C64XX_GPN(4), .init_data = &wm8994_ldo2, }, + { .init_data = &wm8994_ldo1, }, + { .init_data = &wm8994_ldo2, }, }, }; @@ -203,6 +203,18 @@ static const struct i2c_board_info wm1277_devs[] = { { I2C_BOARD_INFO("wm8958", 0x1a), /* WM8958 is the superset */ .platform_data = &wm8994_pdata, .irq = GLENFARCLAS_PMIC_IRQ_BASE + WM831X_IRQ_GPIO_2, + .dev_name = "wm8958", + }, +}; + +static struct gpiod_lookup_table wm8994_gpiod_table = { + .dev_id = "i2c-wm8958", /* I2C device name */ + .table = { + GPIO_LOOKUP("GPION", 6, + "wlf,ldo1ena", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("GPION", 4, + "wlf,ldo2ena", GPIO_ACTIVE_HIGH), + { }, }, }; @@ -381,6 +393,7 @@ static int wlf_gf_module_probe(struct i2c_client *i2c, gpiod_add_lookup_table(&wm5102_reva_gpiod_table); gpiod_add_lookup_table(&wm5102_gpiod_table); + gpiod_add_lookup_table(&wm8994_gpiod_table); if (i < ARRAY_SIZE(gf_mods)) { dev_info(&i2c->dev, "%s revision %d\n", diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 22bd6525e09c..04a177efd245 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -306,14 +305,6 @@ static int wm8994_set_pdata_from_of(struct wm8994 *wm8994) pdata->csnaddr_pd = of_property_read_bool(np, "wlf,csnaddr-pd"); - pdata->ldo[0].enable = of_get_named_gpio(np, "wlf,ldo1ena", 0); - if (pdata->ldo[0].enable < 0) - pdata->ldo[0].enable = 0; - - pdata->ldo[1].enable = of_get_named_gpio(np, "wlf,ldo2ena", 0); - if (pdata->ldo[1].enable < 0) - pdata->ldo[1].enable = 0; - return 0; } #else diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index 7a4ce6df4f22..d7fec533c403 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -129,6 +129,7 @@ static int wm8994_ldo_probe(struct platform_device *pdev) int id = pdev->id % ARRAY_SIZE(pdata->ldo); struct regulator_config config = { }; struct wm8994_ldo *ldo; + struct gpio_desc *gpiod; int ret; dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); @@ -145,12 +146,15 @@ static int wm8994_ldo_probe(struct platform_device *pdev) config.driver_data = ldo; config.regmap = wm8994->regmap; config.init_data = &ldo->init_data; - if (pdata) { - config.ena_gpio = pdata->ldo[id].enable; - } else if (wm8994->dev->of_node) { - config.ena_gpio = wm8994->pdata.ldo[id].enable; - config.ena_gpio_initialized = true; - } + + /* Look up LDO enable GPIO from the parent device node */ + gpiod = devm_gpiod_get_optional(pdev->dev.parent, + id ? "wlf,ldo2ena" : "wlf,ldo1ena", + GPIOD_OUT_LOW | + GPIOD_FLAGS_BIT_NONEXCLUSIVE); + if (IS_ERR(gpiod)) + return PTR_ERR(gpiod); + config.ena_gpiod = gpiod; /* Use default constraints if none set up */ if (!pdata || !pdata->ldo[id].init_data || wm8994->dev->of_node) { @@ -159,7 +163,7 @@ static int wm8994_ldo_probe(struct platform_device *pdev) ldo->init_data = wm8994_ldo_default[id]; ldo->init_data.consumer_supplies = &ldo->supply; - if (!config.ena_gpio) + if (!gpiod) ldo->init_data.constraints.valid_ops_mask = 0; } else { ldo->init_data = *pdata->ldo[id].init_data; diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h index b19c370fe81a..f346167c0e00 100644 --- a/include/linux/mfd/wm8994/pdata.h +++ b/include/linux/mfd/wm8994/pdata.h @@ -20,9 +20,6 @@ #define WM8994_NUM_AIF 3 struct wm8994_ldo_pdata { - /** GPIOs to enable regulator, 0 or less if not available */ - int enable; - const struct regulator_init_data *init_data; }; -- cgit v1.2.3 From 0a020d416d0af0b0c782e2a8363896e756e9121e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Nov 2018 08:22:28 +0000 Subject: lib: introduce initial implementation of object aggregation manager This lib tracks objects which could be of two types: 1) root object 2) nested object - with a "delta" which differentiates it from the associated root object The objects are tracked by a hashtable and reference-counted. User is responsible of implementing callbacks to create/destroy root entity related to each root object and callback to create/destroy nested object delta. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- MAINTAINERS | 8 + include/linux/objagg.h | 46 +++ include/trace/events/objagg.h | 228 ++++++++++++ lib/Kconfig | 3 + lib/Kconfig.debug | 10 + lib/Makefile | 2 + lib/objagg.c | 501 +++++++++++++++++++++++++ lib/test_objagg.c | 835 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1633 insertions(+) create mode 100644 include/linux/objagg.h create mode 100644 include/trace/events/objagg.h create mode 100644 lib/objagg.c create mode 100644 lib/test_objagg.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index e110e327bf38..3bd775ba51ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10679,6 +10679,14 @@ L: linux-nfc@lists.01.org (moderated for non-subscribers) S: Supported F: drivers/nfc/nxp-nci +OBJAGG +M: Jiri Pirko +L: netdev@vger.kernel.org +S: Supported +F: lib/objagg.c +F: lib/test_objagg.c +F: include/linux/objagg.h + OBJTOOL M: Josh Poimboeuf M: Peter Zijlstra diff --git a/include/linux/objagg.h b/include/linux/objagg.h new file mode 100644 index 000000000000..34f38c186ea0 --- /dev/null +++ b/include/linux/objagg.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _OBJAGG_H +#define _OBJAGG_H + +struct objagg_ops { + size_t obj_size; + void * (*delta_create)(void *priv, void *parent_obj, void *obj); + void (*delta_destroy)(void *priv, void *delta_priv); + void * (*root_create)(void *priv, void *obj); + void (*root_destroy)(void *priv, void *root_priv); +}; + +struct objagg; +struct objagg_obj; + +const void *objagg_obj_root_priv(const struct objagg_obj *objagg_obj); +const void *objagg_obj_delta_priv(const struct objagg_obj *objagg_obj); +const void *objagg_obj_raw(const struct objagg_obj *objagg_obj); + +struct objagg_obj *objagg_obj_get(struct objagg *objagg, void *obj); +void objagg_obj_put(struct objagg *objagg, struct objagg_obj *objagg_obj); +struct objagg *objagg_create(const struct objagg_ops *ops, void *priv); +void objagg_destroy(struct objagg *objagg); + +struct objagg_obj_stats { + unsigned int user_count; + unsigned int delta_user_count; /* includes delta object users */ +}; + +struct objagg_obj_stats_info { + struct objagg_obj_stats stats; + struct objagg_obj *objagg_obj; /* associated object */ + bool is_root; +}; + +struct objagg_stats { + unsigned int stats_info_count; + struct objagg_obj_stats_info stats_info[]; +}; + +const struct objagg_stats *objagg_stats_get(struct objagg *objagg); +void objagg_stats_put(const struct objagg_stats *objagg_stats); + +#endif diff --git a/include/trace/events/objagg.h b/include/trace/events/objagg.h new file mode 100644 index 000000000000..fcec0fc9eb0c --- /dev/null +++ b/include/trace/events/objagg.h @@ -0,0 +1,228 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM objagg + +#if !defined(__TRACE_OBJAGG_H) || defined(TRACE_HEADER_MULTI_READ) +#define __TRACE_OBJAGG_H + +#include + +struct objagg; +struct objagg_obj; + +TRACE_EVENT(objagg_create, + TP_PROTO(const struct objagg *objagg), + + TP_ARGS(objagg), + + TP_STRUCT__entry( + __field(const void *, objagg) + ), + + TP_fast_assign( + __entry->objagg = objagg; + ), + + TP_printk("objagg %p", __entry->objagg) +); + +TRACE_EVENT(objagg_destroy, + TP_PROTO(const struct objagg *objagg), + + TP_ARGS(objagg), + + TP_STRUCT__entry( + __field(const void *, objagg) + ), + + TP_fast_assign( + __entry->objagg = objagg; + ), + + TP_printk("objagg %p", __entry->objagg) +); + +TRACE_EVENT(objagg_obj_create, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj), + + TP_ARGS(objagg, obj), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + ), + + TP_printk("objagg %p, obj %p", __entry->objagg, __entry->obj) +); + +TRACE_EVENT(objagg_obj_destroy, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj), + + TP_ARGS(objagg, obj), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + ), + + TP_printk("objagg %p, obj %p", __entry->objagg, __entry->obj) +); + +TRACE_EVENT(objagg_obj_get, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj, + unsigned int refcount), + + TP_ARGS(objagg, obj, refcount), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + __field(unsigned int, refcount) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + __entry->refcount = refcount; + ), + + TP_printk("objagg %p, obj %p, refcount %u", + __entry->objagg, __entry->obj, __entry->refcount) +); + +TRACE_EVENT(objagg_obj_put, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj, + unsigned int refcount), + + TP_ARGS(objagg, obj, refcount), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + __field(unsigned int, refcount) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + __entry->refcount = refcount; + ), + + TP_printk("objagg %p, obj %p, refcount %u", + __entry->objagg, __entry->obj, __entry->refcount) +); + +TRACE_EVENT(objagg_obj_parent_assign, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj, + const struct objagg_obj *parent, + unsigned int parent_refcount), + + TP_ARGS(objagg, obj, parent, parent_refcount), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + __field(const void *, parent) + __field(unsigned int, parent_refcount) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + __entry->parent = parent; + __entry->parent_refcount = parent_refcount; + ), + + TP_printk("objagg %p, obj %p, parent %p, parent_refcount %u", + __entry->objagg, __entry->obj, + __entry->parent, __entry->parent_refcount) +); + +TRACE_EVENT(objagg_obj_parent_unassign, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj, + const struct objagg_obj *parent, + unsigned int parent_refcount), + + TP_ARGS(objagg, obj, parent, parent_refcount), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + __field(const void *, parent) + __field(unsigned int, parent_refcount) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + __entry->parent = parent; + __entry->parent_refcount = parent_refcount; + ), + + TP_printk("objagg %p, obj %p, parent %p, parent_refcount %u", + __entry->objagg, __entry->obj, + __entry->parent, __entry->parent_refcount) +); + +TRACE_EVENT(objagg_obj_root_create, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj), + + TP_ARGS(objagg, obj), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + ), + + TP_printk("objagg %p, obj %p", + __entry->objagg, __entry->obj) +); + +TRACE_EVENT(objagg_obj_root_destroy, + TP_PROTO(const struct objagg *objagg, + const struct objagg_obj *obj), + + TP_ARGS(objagg, obj), + + TP_STRUCT__entry( + __field(const void *, objagg) + __field(const void *, obj) + ), + + TP_fast_assign( + __entry->objagg = objagg; + __entry->obj = obj; + ), + + TP_printk("objagg %p, obj %p", + __entry->objagg, __entry->obj) +); + +#endif /* __TRACE_OBJAGG_H */ + +/* This part must be outside protection */ +#include diff --git a/lib/Kconfig b/lib/Kconfig index a9965f4af4dd..7dbbcfe9cd90 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -624,3 +624,6 @@ config GENERIC_LIB_CMPDI2 config GENERIC_LIB_UCMPDI2 bool + +config OBJAGG + tristate "objagg" if COMPILE_TEST diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af29b8224fd..b3c91b9e32f8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1976,6 +1976,16 @@ config TEST_MEMCAT_P If unsure, say N. +config TEST_OBJAGG + tristate "Perform selftest on object aggreration manager" + default n + depends on OBJAGG + help + Enable this option to test object aggregation manager on boot + (or module load). + + If unsure, say N. + endif # RUNTIME_TESTING_MENU config MEMTEST diff --git a/lib/Makefile b/lib/Makefile index db06d1237898..f5262d30bfe6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o +obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -274,3 +275,4 @@ obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o +obj-$(CONFIG_OBJAGG) += objagg.o diff --git a/lib/objagg.c b/lib/objagg.c new file mode 100644 index 000000000000..c9b457a91153 --- /dev/null +++ b/lib/objagg.c @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +struct objagg { + const struct objagg_ops *ops; + void *priv; + struct rhashtable obj_ht; + struct rhashtable_params ht_params; + struct list_head obj_list; + unsigned int obj_count; +}; + +struct objagg_obj { + struct rhash_head ht_node; /* member of objagg->obj_ht */ + struct list_head list; /* member of objagg->obj_list */ + struct objagg_obj *parent; /* if the object is nested, this + * holds pointer to parent, otherwise NULL + */ + union { + void *delta_priv; /* user delta private */ + void *root_priv; /* user root private */ + }; + unsigned int refcount; /* counts number of users of this object + * including nested objects + */ + struct objagg_obj_stats stats; + unsigned long obj[0]; +}; + +static unsigned int objagg_obj_ref_inc(struct objagg_obj *objagg_obj) +{ + return ++objagg_obj->refcount; +} + +static unsigned int objagg_obj_ref_dec(struct objagg_obj *objagg_obj) +{ + return --objagg_obj->refcount; +} + +static void objagg_obj_stats_inc(struct objagg_obj *objagg_obj) +{ + objagg_obj->stats.user_count++; + objagg_obj->stats.delta_user_count++; + if (objagg_obj->parent) + objagg_obj->parent->stats.delta_user_count++; +} + +static void objagg_obj_stats_dec(struct objagg_obj *objagg_obj) +{ + objagg_obj->stats.user_count--; + objagg_obj->stats.delta_user_count--; + if (objagg_obj->parent) + objagg_obj->parent->stats.delta_user_count--; +} + +static bool objagg_obj_is_root(const struct objagg_obj *objagg_obj) +{ + /* Nesting is not supported, so we can use ->parent + * to figure out if the object is root. + */ + return !objagg_obj->parent; +} + +/** + * objagg_obj_root_priv - obtains root private for an object + * @objagg_obj: objagg object instance + * + * Note: all locking must be provided by the caller. + * + * Either the object is root itself when the private is returned + * directly, or the parent is root and its private is returned + * instead. + * + * Returns a user private root pointer. + */ +const void *objagg_obj_root_priv(const struct objagg_obj *objagg_obj) +{ + if (objagg_obj_is_root(objagg_obj)) + return objagg_obj->root_priv; + WARN_ON(!objagg_obj_is_root(objagg_obj->parent)); + return objagg_obj->parent->root_priv; +} +EXPORT_SYMBOL(objagg_obj_root_priv); + +/** + * objagg_obj_delta_priv - obtains delta private for an object + * @objagg_obj: objagg object instance + * + * Note: all locking must be provided by the caller. + * + * Returns user private delta pointer or NULL in case the passed + * object is root. + */ +const void *objagg_obj_delta_priv(const struct objagg_obj *objagg_obj) +{ + if (objagg_obj_is_root(objagg_obj)) + return NULL; + return objagg_obj->delta_priv; +} +EXPORT_SYMBOL(objagg_obj_delta_priv); + +/** + * objagg_obj_raw - obtains object user private pointer + * @objagg_obj: objagg object instance + * + * Note: all locking must be provided by the caller. + * + * Returns user private pointer as was passed to objagg_obj_get() by "obj" arg. + */ +const void *objagg_obj_raw(const struct objagg_obj *objagg_obj) +{ + return objagg_obj->obj; +} +EXPORT_SYMBOL(objagg_obj_raw); + +static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj) +{ + return rhashtable_lookup_fast(&objagg->obj_ht, obj, objagg->ht_params); +} + +static int objagg_obj_parent_assign(struct objagg *objagg, + struct objagg_obj *objagg_obj, + struct objagg_obj *parent) +{ + void *delta_priv; + + delta_priv = objagg->ops->delta_create(objagg->priv, parent->obj, + objagg_obj->obj); + if (IS_ERR(delta_priv)) + return PTR_ERR(delta_priv); + + /* User returned a delta private, that means that + * our object can be aggregated into the parent. + */ + objagg_obj->parent = parent; + objagg_obj->delta_priv = delta_priv; + objagg_obj_ref_inc(objagg_obj->parent); + trace_objagg_obj_parent_assign(objagg, objagg_obj, + parent, + parent->refcount); + return 0; +} + +static int objagg_obj_parent_lookup_assign(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + struct objagg_obj *objagg_obj_cur; + int err; + + list_for_each_entry(objagg_obj_cur, &objagg->obj_list, list) { + /* Nesting is not supported. In case the object + * is not root, it cannot be assigned as parent. + */ + if (!objagg_obj_is_root(objagg_obj_cur)) + continue; + err = objagg_obj_parent_assign(objagg, objagg_obj, + objagg_obj_cur); + if (!err) + return 0; + } + return -ENOENT; +} + +static void __objagg_obj_put(struct objagg *objagg, + struct objagg_obj *objagg_obj); + +static void objagg_obj_parent_unassign(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + trace_objagg_obj_parent_unassign(objagg, objagg_obj, + objagg_obj->parent, + objagg_obj->parent->refcount); + objagg->ops->delta_destroy(objagg->priv, objagg_obj->delta_priv); + __objagg_obj_put(objagg, objagg_obj->parent); +} + +static int objagg_obj_root_create(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, + objagg_obj->obj); + if (IS_ERR(objagg_obj->root_priv)) + return PTR_ERR(objagg_obj->root_priv); + + trace_objagg_obj_root_create(objagg, objagg_obj); + return 0; +} + +static void objagg_obj_root_destroy(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + trace_objagg_obj_root_destroy(objagg, objagg_obj); + objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv); +} + +static int objagg_obj_init(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + int err; + + /* Try to find if the object can be aggregated under an existing one. */ + err = objagg_obj_parent_lookup_assign(objagg, objagg_obj); + if (!err) + return 0; + /* If aggregation is not possible, make the object a root. */ + return objagg_obj_root_create(objagg, objagg_obj); +} + +static void objagg_obj_fini(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + if (!objagg_obj_is_root(objagg_obj)) + objagg_obj_parent_unassign(objagg, objagg_obj); + else + objagg_obj_root_destroy(objagg, objagg_obj); +} + +static struct objagg_obj *objagg_obj_create(struct objagg *objagg, void *obj) +{ + struct objagg_obj *objagg_obj; + int err; + + objagg_obj = kzalloc(sizeof(*objagg_obj) + objagg->ops->obj_size, + GFP_KERNEL); + if (!objagg_obj) + return ERR_PTR(-ENOMEM); + objagg_obj_ref_inc(objagg_obj); + memcpy(objagg_obj->obj, obj, objagg->ops->obj_size); + + err = objagg_obj_init(objagg, objagg_obj); + if (err) + goto err_obj_init; + + err = rhashtable_insert_fast(&objagg->obj_ht, &objagg_obj->ht_node, + objagg->ht_params); + if (err) + goto err_ht_insert; + list_add(&objagg_obj->list, &objagg->obj_list); + objagg->obj_count++; + trace_objagg_obj_create(objagg, objagg_obj); + + return objagg_obj; + +err_ht_insert: + objagg_obj_fini(objagg, objagg_obj); +err_obj_init: + kfree(objagg_obj); + return ERR_PTR(err); +} + +static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj) +{ + struct objagg_obj *objagg_obj; + + /* First, try to find the object exactly as user passed it, + * perhaps it is already in use. + */ + objagg_obj = objagg_obj_lookup(objagg, obj); + if (objagg_obj) { + objagg_obj_ref_inc(objagg_obj); + return objagg_obj; + } + + return objagg_obj_create(objagg, obj); +} + +/** + * objagg_obj_get - gets an object within objagg instance + * @objagg: objagg instance + * @obj: user-specific private object pointer + * + * Note: all locking must be provided by the caller. + * + * Size of the "obj" memory is specified in "objagg->ops". + * + * There are 3 main options this function wraps: + * 1) The object according to "obj" already exist. In that case + * the reference counter is incrementes and the object is returned. + * 2) The object does not exist, but it can be aggregated within + * another object. In that case, user ops->delta_create() is called + * to obtain delta data and a new object is created with returned + * user-delta private pointer. + * 3) The object does not exist and cannot be aggregated into + * any of the existing objects. In that case, user ops->root_create() + * is called to create the root and a new object is created with + * returned user-root private pointer. + * + * Returns a pointer to objagg object instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +struct objagg_obj *objagg_obj_get(struct objagg *objagg, void *obj) +{ + struct objagg_obj *objagg_obj; + + objagg_obj = __objagg_obj_get(objagg, obj); + if (IS_ERR(objagg_obj)) + return objagg_obj; + objagg_obj_stats_inc(objagg_obj); + trace_objagg_obj_get(objagg, objagg_obj, objagg_obj->refcount); + return objagg_obj; +} +EXPORT_SYMBOL(objagg_obj_get); + +static void objagg_obj_destroy(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + trace_objagg_obj_destroy(objagg, objagg_obj); + --objagg->obj_count; + list_del(&objagg_obj->list); + rhashtable_remove_fast(&objagg->obj_ht, &objagg_obj->ht_node, + objagg->ht_params); + objagg_obj_fini(objagg, objagg_obj); + kfree(objagg_obj); +} + +static void __objagg_obj_put(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + if (!objagg_obj_ref_dec(objagg_obj)) + objagg_obj_destroy(objagg, objagg_obj); +} + +/** + * objagg_obj_put - puts an object within objagg instance + * @objagg: objagg instance + * @objagg_obj: objagg object instance + * + * Note: all locking must be provided by the caller. + * + * Symmetric to objagg_obj_get(). + */ +void objagg_obj_put(struct objagg *objagg, struct objagg_obj *objagg_obj) +{ + trace_objagg_obj_put(objagg, objagg_obj, objagg_obj->refcount); + objagg_obj_stats_dec(objagg_obj); + __objagg_obj_put(objagg, objagg_obj); +} +EXPORT_SYMBOL(objagg_obj_put); + +/** + * objagg_create - creates a new objagg instance + * @ops: user-specific callbacks + * @priv: pointer to a private data passed to the ops + * + * Note: all locking must be provided by the caller. + * + * The purpose of the library is to provide an infrastructure to + * aggregate user-specified objects. Library does not care about the type + * of the object. User fills-up ops which take care of the specific + * user object manipulation. + * + * As a very stupid example, consider integer numbers. For example + * number 8 as a root object. That can aggregate number 9 with delta 1, + * number 10 with delta 2, etc. This example is implemented as + * a part of a testing module in test_objagg.c file. + * + * Each objagg instance contains multiple trees. Each tree node is + * represented by "an object". In the current implementation there can be + * only roots and leafs nodes. Leaf nodes are called deltas. + * But in general, this can be easily extended for intermediate nodes. + * In that extension, a delta would be associated with all non-root + * nodes. + * + * Returns a pointer to newly created objagg instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) +{ + struct objagg *objagg; + int err; + + if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy || + !ops->delta_create || !ops->delta_destroy)) + return ERR_PTR(-EINVAL); + objagg = kzalloc(sizeof(*objagg), GFP_KERNEL); + if (!objagg) + return ERR_PTR(-ENOMEM); + objagg->ops = ops; + objagg->priv = priv; + INIT_LIST_HEAD(&objagg->obj_list); + + objagg->ht_params.key_len = ops->obj_size; + objagg->ht_params.key_offset = offsetof(struct objagg_obj, obj); + objagg->ht_params.head_offset = offsetof(struct objagg_obj, ht_node); + + err = rhashtable_init(&objagg->obj_ht, &objagg->ht_params); + if (err) + goto err_rhashtable_init; + + trace_objagg_create(objagg); + return objagg; + +err_rhashtable_init: + kfree(objagg); + return ERR_PTR(err); +} +EXPORT_SYMBOL(objagg_create); + +/** + * objagg_destroy - destroys a new objagg instance + * @objagg: objagg instance + * + * Note: all locking must be provided by the caller. + */ +void objagg_destroy(struct objagg *objagg) +{ + trace_objagg_destroy(objagg); + WARN_ON(!list_empty(&objagg->obj_list)); + rhashtable_destroy(&objagg->obj_ht); + kfree(objagg); +} +EXPORT_SYMBOL(objagg_destroy); + +static int objagg_stats_info_sort_cmp_func(const void *a, const void *b) +{ + const struct objagg_obj_stats_info *stats_info1 = a; + const struct objagg_obj_stats_info *stats_info2 = b; + + if (stats_info1->is_root != stats_info2->is_root) + return stats_info2->is_root - stats_info1->is_root; + if (stats_info1->stats.delta_user_count != + stats_info2->stats.delta_user_count) + return stats_info2->stats.delta_user_count - + stats_info1->stats.delta_user_count; + return stats_info2->stats.user_count - stats_info1->stats.user_count; +} + +/** + * objagg_stats_get - obtains stats of the objagg instance + * @objagg: objagg instance + * + * Note: all locking must be provided by the caller. + * + * The returned structure contains statistics of all object + * currently in use, ordered by following rules: + * 1) Root objects are always on lower indexes than the rest. + * 2) Objects with higher delta user count are always on lower + * indexes. + * 3) In case more objects have the same delta user count, + * the objects are ordered by user count. + * + * Returns a pointer to stats instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +const struct objagg_stats *objagg_stats_get(struct objagg *objagg) +{ + struct objagg_stats *objagg_stats; + struct objagg_obj *objagg_obj; + size_t alloc_size; + int i; + + alloc_size = sizeof(*objagg_stats) + + sizeof(objagg_stats->stats_info[0]) * objagg->obj_count; + objagg_stats = kzalloc(alloc_size, GFP_KERNEL); + if (!objagg_stats) + return ERR_PTR(-ENOMEM); + + i = 0; + list_for_each_entry(objagg_obj, &objagg->obj_list, list) { + memcpy(&objagg_stats->stats_info[i].stats, &objagg_obj->stats, + sizeof(objagg_stats->stats_info[0].stats)); + objagg_stats->stats_info[i].objagg_obj = objagg_obj; + objagg_stats->stats_info[i].is_root = + objagg_obj_is_root(objagg_obj); + i++; + } + objagg_stats->stats_info_count = i; + + sort(objagg_stats->stats_info, objagg_stats->stats_info_count, + sizeof(struct objagg_obj_stats_info), + objagg_stats_info_sort_cmp_func, NULL); + + return objagg_stats; +} +EXPORT_SYMBOL(objagg_stats_get); + +/** + * objagg_stats_puts - puts stats of the objagg instance + * @objagg_stats: objagg instance stats + * + * Note: all locking must be provided by the caller. + */ +void objagg_stats_put(const struct objagg_stats *objagg_stats) +{ + kfree(objagg_stats); +} +EXPORT_SYMBOL(objagg_stats_put); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko "); +MODULE_DESCRIPTION("Object aggregation manager"); diff --git a/lib/test_objagg.c b/lib/test_objagg.c new file mode 100644 index 000000000000..aac5d8e8800c --- /dev/null +++ b/lib/test_objagg.c @@ -0,0 +1,835 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +struct tokey { + unsigned int id; +}; + +#define NUM_KEYS 32 + +static int key_id_index(unsigned int key_id) +{ + if (key_id >= NUM_KEYS) { + WARN_ON(1); + return 0; + } + return key_id; +} + +#define BUF_LEN 128 + +struct world { + unsigned int root_count; + unsigned int delta_count; + char next_root_buf[BUF_LEN]; + struct objagg_obj *objagg_objs[NUM_KEYS]; + unsigned int key_refs[NUM_KEYS]; +}; + +struct root { + struct tokey key; + char buf[BUF_LEN]; +}; + +struct delta { + unsigned int key_id_diff; +}; + +static struct objagg_obj *world_obj_get(struct world *world, + struct objagg *objagg, + unsigned int key_id) +{ + struct objagg_obj *objagg_obj; + struct tokey key; + int err; + + key.id = key_id; + objagg_obj = objagg_obj_get(objagg, &key); + if (IS_ERR(objagg_obj)) { + pr_err("Key %u: Failed to get object.\n", key_id); + return objagg_obj; + } + if (!world->key_refs[key_id_index(key_id)]) { + world->objagg_objs[key_id_index(key_id)] = objagg_obj; + } else if (world->objagg_objs[key_id_index(key_id)] != objagg_obj) { + pr_err("Key %u: God another object for the same key.\n", + key_id); + err = -EINVAL; + goto err_key_id_check; + } + world->key_refs[key_id_index(key_id)]++; + return objagg_obj; + +err_key_id_check: + objagg_obj_put(objagg, objagg_obj); + return ERR_PTR(err); +} + +static void world_obj_put(struct world *world, struct objagg *objagg, + unsigned int key_id) +{ + struct objagg_obj *objagg_obj; + + if (!world->key_refs[key_id_index(key_id)]) + return; + objagg_obj = world->objagg_objs[key_id_index(key_id)]; + objagg_obj_put(objagg, objagg_obj); + world->key_refs[key_id_index(key_id)]--; +} + +#define MAX_KEY_ID_DIFF 5 + +static void *delta_create(void *priv, void *parent_obj, void *obj) +{ + struct tokey *parent_key = parent_obj; + struct world *world = priv; + struct tokey *key = obj; + int diff = key->id - parent_key->id; + struct delta *delta; + + if (diff < 0 || diff > MAX_KEY_ID_DIFF) + return ERR_PTR(-EINVAL); + + delta = kzalloc(sizeof(*delta), GFP_KERNEL); + if (!delta) + return ERR_PTR(-ENOMEM); + delta->key_id_diff = diff; + world->delta_count++; + return delta; +} + +static void delta_destroy(void *priv, void *delta_priv) +{ + struct delta *delta = delta_priv; + struct world *world = priv; + + world->delta_count--; + kfree(delta); +} + +static void *root_create(void *priv, void *obj) +{ + struct world *world = priv; + struct tokey *key = obj; + struct root *root; + + root = kzalloc(sizeof(*root), GFP_KERNEL); + if (!root) + return ERR_PTR(-ENOMEM); + memcpy(&root->key, key, sizeof(root->key)); + memcpy(root->buf, world->next_root_buf, sizeof(root->buf)); + world->root_count++; + return root; +} + +static void root_destroy(void *priv, void *root_priv) +{ + struct root *root = root_priv; + struct world *world = priv; + + world->root_count--; + kfree(root); +} + +static int test_nodelta_obj_get(struct world *world, struct objagg *objagg, + unsigned int key_id, bool should_create_root) +{ + unsigned int orig_root_count = world->root_count; + struct objagg_obj *objagg_obj; + const struct root *root; + int err; + + if (should_create_root) + prandom_bytes(world->next_root_buf, + sizeof(world->next_root_buf)); + + objagg_obj = world_obj_get(world, objagg, key_id); + if (IS_ERR(objagg_obj)) { + pr_err("Key %u: Failed to get object.\n", key_id); + return PTR_ERR(objagg_obj); + } + if (should_create_root) { + if (world->root_count != orig_root_count + 1) { + pr_err("Key %u: Root was not created\n", key_id); + err = -EINVAL; + goto err_check_root_count; + } + } else { + if (world->root_count != orig_root_count) { + pr_err("Key %u: Root was incorrectly created\n", + key_id); + err = -EINVAL; + goto err_check_root_count; + } + } + root = objagg_obj_root_priv(objagg_obj); + if (root->key.id != key_id) { + pr_err("Key %u: Root has unexpected key id\n", key_id); + err = -EINVAL; + goto err_check_key_id; + } + if (should_create_root && + memcmp(world->next_root_buf, root->buf, sizeof(root->buf))) { + pr_err("Key %u: Buffer does not match the expected content\n", + key_id); + err = -EINVAL; + goto err_check_buf; + } + return 0; + +err_check_buf: +err_check_key_id: +err_check_root_count: + objagg_obj_put(objagg, objagg_obj); + return err; +} + +static int test_nodelta_obj_put(struct world *world, struct objagg *objagg, + unsigned int key_id, bool should_destroy_root) +{ + unsigned int orig_root_count = world->root_count; + + world_obj_put(world, objagg, key_id); + + if (should_destroy_root) { + if (world->root_count != orig_root_count - 1) { + pr_err("Key %u: Root was not destroyed\n", key_id); + return -EINVAL; + } + } else { + if (world->root_count != orig_root_count) { + pr_err("Key %u: Root was incorrectly destroyed\n", + key_id); + return -EINVAL; + } + } + return 0; +} + +static int check_stats_zero(struct objagg *objagg) +{ + const struct objagg_stats *stats; + int err = 0; + + stats = objagg_stats_get(objagg); + if (IS_ERR(stats)) + return PTR_ERR(stats); + + if (stats->stats_info_count != 0) { + pr_err("Stats: Object count is not zero while it should be\n"); + err = -EINVAL; + } + + objagg_stats_put(stats); + return err; +} + +static int check_stats_nodelta(struct objagg *objagg) +{ + const struct objagg_stats *stats; + int i; + int err; + + stats = objagg_stats_get(objagg); + if (IS_ERR(stats)) + return PTR_ERR(stats); + + if (stats->stats_info_count != NUM_KEYS) { + pr_err("Stats: Unexpected object count (%u expected, %u returned)\n", + NUM_KEYS, stats->stats_info_count); + err = -EINVAL; + goto stats_put; + } + + for (i = 0; i < stats->stats_info_count; i++) { + if (stats->stats_info[i].stats.user_count != 2) { + pr_err("Stats: incorrect user count\n"); + err = -EINVAL; + goto stats_put; + } + if (stats->stats_info[i].stats.delta_user_count != 2) { + pr_err("Stats: incorrect delta user count\n"); + err = -EINVAL; + goto stats_put; + } + } + err = 0; + +stats_put: + objagg_stats_put(stats); + return err; +} + +static void *delta_create_dummy(void *priv, void *parent_obj, void *obj) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static void delta_destroy_dummy(void *priv, void *delta_priv) +{ +} + +static const struct objagg_ops nodelta_ops = { + .obj_size = sizeof(struct tokey), + .delta_create = delta_create_dummy, + .delta_destroy = delta_destroy_dummy, + .root_create = root_create, + .root_destroy = root_destroy, +}; + +static int test_nodelta(void) +{ + struct world world = {}; + struct objagg *objagg; + int i; + int err; + + objagg = objagg_create(&nodelta_ops, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + err = check_stats_zero(objagg); + if (err) + goto err_stats_first_zero; + + /* First round of gets, the root objects should be created */ + for (i = 0; i < NUM_KEYS; i++) { + err = test_nodelta_obj_get(&world, objagg, i, true); + if (err) + goto err_obj_first_get; + } + + /* Do the second round of gets, all roots are already created, + * make sure that no new root is created + */ + for (i = 0; i < NUM_KEYS; i++) { + err = test_nodelta_obj_get(&world, objagg, i, false); + if (err) + goto err_obj_second_get; + } + + err = check_stats_nodelta(objagg); + if (err) + goto err_stats_nodelta; + + for (i = NUM_KEYS - 1; i >= 0; i--) { + err = test_nodelta_obj_put(&world, objagg, i, false); + if (err) + goto err_obj_first_put; + } + for (i = NUM_KEYS - 1; i >= 0; i--) { + err = test_nodelta_obj_put(&world, objagg, i, true); + if (err) + goto err_obj_second_put; + } + + err = check_stats_zero(objagg); + if (err) + goto err_stats_second_zero; + + objagg_destroy(objagg); + return 0; + +err_stats_nodelta: +err_obj_first_put: +err_obj_second_get: + for (i--; i >= 0; i--) + world_obj_put(&world, objagg, i); + + i = NUM_KEYS; +err_obj_first_get: +err_obj_second_put: + for (i--; i >= 0; i--) + world_obj_put(&world, objagg, i); +err_stats_first_zero: +err_stats_second_zero: + objagg_destroy(objagg); + return err; +} + +static const struct objagg_ops delta_ops = { + .obj_size = sizeof(struct tokey), + .delta_create = delta_create, + .delta_destroy = delta_destroy, + .root_create = root_create, + .root_destroy = root_destroy, +}; + +enum action { + ACTION_GET, + ACTION_PUT, +}; + +enum expect_delta { + EXPECT_DELTA_SAME, + EXPECT_DELTA_INC, + EXPECT_DELTA_DEC, +}; + +enum expect_root { + EXPECT_ROOT_SAME, + EXPECT_ROOT_INC, + EXPECT_ROOT_DEC, +}; + +struct expect_stats_info { + struct objagg_obj_stats stats; + bool is_root; + unsigned int key_id; +}; + +struct expect_stats { + unsigned int info_count; + struct expect_stats_info info[NUM_KEYS]; +}; + +struct action_item { + unsigned int key_id; + enum action action; + enum expect_delta expect_delta; + enum expect_root expect_root; + struct expect_stats expect_stats; +}; + +#define EXPECT_STATS(count, ...) \ +{ \ + .info_count = count, \ + .info = { __VA_ARGS__ } \ +} + +#define ROOT(key_id, user_count, delta_user_count) \ + {{user_count, delta_user_count}, true, key_id} + +#define DELTA(key_id, user_count) \ + {{user_count, user_count}, false, key_id} + +static const struct action_item action_items[] = { + { + 1, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_INC, + EXPECT_STATS(1, ROOT(1, 1, 1)), + }, /* r: 1 d: */ + { + 7, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_INC, + EXPECT_STATS(2, ROOT(1, 1, 1), ROOT(7, 1, 1)), + }, /* r: 1, 7 d: */ + { + 3, ACTION_GET, EXPECT_DELTA_INC, EXPECT_ROOT_SAME, + EXPECT_STATS(3, ROOT(1, 1, 2), ROOT(7, 1, 1), + DELTA(3, 1)), + }, /* r: 1, 7 d: 3^1 */ + { + 5, ACTION_GET, EXPECT_DELTA_INC, EXPECT_ROOT_SAME, + EXPECT_STATS(4, ROOT(1, 1, 3), ROOT(7, 1, 1), + DELTA(3, 1), DELTA(5, 1)), + }, /* r: 1, 7 d: 3^1, 5^1 */ + { + 3, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(4, ROOT(1, 1, 4), ROOT(7, 1, 1), + DELTA(3, 2), DELTA(5, 1)), + }, /* r: 1, 7 d: 3^1, 3^1, 5^1 */ + { + 1, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(4, ROOT(1, 2, 5), ROOT(7, 1, 1), + DELTA(3, 2), DELTA(5, 1)), + }, /* r: 1, 1, 7 d: 3^1, 3^1, 5^1 */ + { + 30, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_INC, + EXPECT_STATS(5, ROOT(1, 2, 5), ROOT(7, 1, 1), ROOT(30, 1, 1), + DELTA(3, 2), DELTA(5, 1)), + }, /* r: 1, 1, 7, 30 d: 3^1, 3^1, 5^1 */ + { + 8, ACTION_GET, EXPECT_DELTA_INC, EXPECT_ROOT_SAME, + EXPECT_STATS(6, ROOT(1, 2, 5), ROOT(7, 1, 2), ROOT(30, 1, 1), + DELTA(3, 2), DELTA(5, 1), DELTA(8, 1)), + }, /* r: 1, 1, 7, 30 d: 3^1, 3^1, 5^1, 8^7 */ + { + 8, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(6, ROOT(1, 2, 5), ROOT(7, 1, 3), ROOT(30, 1, 1), + DELTA(3, 2), DELTA(8, 2), DELTA(5, 1)), + }, /* r: 1, 1, 7, 30 d: 3^1, 3^1, 5^1, 8^7, 8^7 */ + { + 3, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(6, ROOT(1, 2, 4), ROOT(7, 1, 3), ROOT(30, 1, 1), + DELTA(8, 2), DELTA(3, 1), DELTA(5, 1)), + }, /* r: 1, 1, 7, 30 d: 3^1, 5^1, 8^7, 8^7 */ + { + 3, ACTION_PUT, EXPECT_DELTA_DEC, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(1, 2, 3), ROOT(7, 1, 3), ROOT(30, 1, 1), + DELTA(8, 2), DELTA(5, 1)), + }, /* r: 1, 1, 7, 30 d: 5^1, 8^7, 8^7 */ + { + 1, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(7, 1, 3), ROOT(1, 1, 2), ROOT(30, 1, 1), + DELTA(8, 2), DELTA(5, 1)), + }, /* r: 1, 7, 30 d: 5^1, 8^7, 8^7 */ + { + 1, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(7, 1, 3), ROOT(30, 1, 1), ROOT(1, 0, 1), + DELTA(8, 2), DELTA(5, 1)), + }, /* r: 7, 30 d: 5^1, 8^7, 8^7 */ + { + 5, ACTION_PUT, EXPECT_DELTA_DEC, EXPECT_ROOT_DEC, + EXPECT_STATS(3, ROOT(7, 1, 3), ROOT(30, 1, 1), + DELTA(8, 2)), + }, /* r: 7, 30 d: 8^7, 8^7 */ + { + 5, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_INC, + EXPECT_STATS(4, ROOT(7, 1, 3), ROOT(30, 1, 1), ROOT(5, 1, 1), + DELTA(8, 2)), + }, /* r: 7, 30, 5 d: 8^7, 8^7 */ + { + 6, ACTION_GET, EXPECT_DELTA_INC, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(7, 1, 3), ROOT(5, 1, 2), ROOT(30, 1, 1), + DELTA(8, 2), DELTA(6, 1)), + }, /* r: 7, 30, 5 d: 8^7, 8^7, 6^5 */ + { + 8, ACTION_GET, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(7, 1, 4), ROOT(5, 1, 2), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(6, 1)), + }, /* r: 7, 30, 5 d: 8^7, 8^7, 8^7, 6^5 */ + { + 8, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(7, 1, 3), ROOT(5, 1, 2), ROOT(30, 1, 1), + DELTA(8, 2), DELTA(6, 1)), + }, /* r: 7, 30, 5 d: 8^7, 8^7, 6^5 */ + { + 8, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(7, 1, 2), ROOT(5, 1, 2), ROOT(30, 1, 1), + DELTA(8, 1), DELTA(6, 1)), + }, /* r: 7, 30, 5 d: 8^7, 6^5 */ + { + 8, ACTION_PUT, EXPECT_DELTA_DEC, EXPECT_ROOT_SAME, + EXPECT_STATS(4, ROOT(5, 1, 2), ROOT(7, 1, 1), ROOT(30, 1, 1), + DELTA(6, 1)), + }, /* r: 7, 30, 5 d: 6^5 */ + { + 8, ACTION_GET, EXPECT_DELTA_INC, EXPECT_ROOT_SAME, + EXPECT_STATS(5, ROOT(5, 1, 3), ROOT(7, 1, 1), ROOT(30, 1, 1), + DELTA(6, 1), DELTA(8, 1)), + }, /* r: 7, 30, 5 d: 6^5, 8^5 */ + { + 7, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_DEC, + EXPECT_STATS(4, ROOT(5, 1, 3), ROOT(30, 1, 1), + DELTA(6, 1), DELTA(8, 1)), + }, /* r: 30, 5 d: 6^5, 8^5 */ + { + 30, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_DEC, + EXPECT_STATS(3, ROOT(5, 1, 3), + DELTA(6, 1), DELTA(8, 1)), + }, /* r: 5 d: 6^5, 8^5 */ + { + 5, ACTION_PUT, EXPECT_DELTA_SAME, EXPECT_ROOT_SAME, + EXPECT_STATS(3, ROOT(5, 0, 2), + DELTA(6, 1), DELTA(8, 1)), + }, /* r: d: 6^5, 8^5 */ + { + 6, ACTION_PUT, EXPECT_DELTA_DEC, EXPECT_ROOT_SAME, + EXPECT_STATS(2, ROOT(5, 0, 1), + DELTA(8, 1)), + }, /* r: d: 6^5 */ + { + 8, ACTION_PUT, EXPECT_DELTA_DEC, EXPECT_ROOT_DEC, + EXPECT_STATS(0, ), + }, /* r: d: */ +}; + +static int check_expect(struct world *world, + const struct action_item *action_item, + unsigned int orig_delta_count, + unsigned int orig_root_count) +{ + unsigned int key_id = action_item->key_id; + + switch (action_item->expect_delta) { + case EXPECT_DELTA_SAME: + if (orig_delta_count != world->delta_count) { + pr_err("Key %u: Delta count changed while expected to remain the same.\n", + key_id); + return -EINVAL; + } + break; + case EXPECT_DELTA_INC: + if (WARN_ON(action_item->action == ACTION_PUT)) + return -EINVAL; + if (orig_delta_count + 1 != world->delta_count) { + pr_err("Key %u: Delta count was not incremented.\n", + key_id); + return -EINVAL; + } + break; + case EXPECT_DELTA_DEC: + if (WARN_ON(action_item->action == ACTION_GET)) + return -EINVAL; + if (orig_delta_count - 1 != world->delta_count) { + pr_err("Key %u: Delta count was not decremented.\n", + key_id); + return -EINVAL; + } + break; + } + + switch (action_item->expect_root) { + case EXPECT_ROOT_SAME: + if (orig_root_count != world->root_count) { + pr_err("Key %u: Root count changed while expected to remain the same.\n", + key_id); + return -EINVAL; + } + break; + case EXPECT_ROOT_INC: + if (WARN_ON(action_item->action == ACTION_PUT)) + return -EINVAL; + if (orig_root_count + 1 != world->root_count) { + pr_err("Key %u: Root count was not incremented.\n", + key_id); + return -EINVAL; + } + break; + case EXPECT_ROOT_DEC: + if (WARN_ON(action_item->action == ACTION_GET)) + return -EINVAL; + if (orig_root_count - 1 != world->root_count) { + pr_err("Key %u: Root count was not decremented.\n", + key_id); + return -EINVAL; + } + } + + return 0; +} + +static unsigned int obj_to_key_id(struct objagg_obj *objagg_obj) +{ + const struct tokey *root_key; + const struct delta *delta; + unsigned int key_id; + + root_key = objagg_obj_root_priv(objagg_obj); + key_id = root_key->id; + delta = objagg_obj_delta_priv(objagg_obj); + if (delta) + key_id += delta->key_id_diff; + return key_id; +} + +static int +check_expect_stats_nums(const struct objagg_obj_stats_info *stats_info, + const struct expect_stats_info *expect_stats_info, + const char **errmsg) +{ + if (stats_info->is_root != expect_stats_info->is_root) { + if (errmsg) + *errmsg = "Incorrect root/delta indication"; + return -EINVAL; + } + if (stats_info->stats.user_count != + expect_stats_info->stats.user_count) { + if (errmsg) + *errmsg = "Incorrect user count"; + return -EINVAL; + } + if (stats_info->stats.delta_user_count != + expect_stats_info->stats.delta_user_count) { + if (errmsg) + *errmsg = "Incorrect delta user count"; + return -EINVAL; + } + return 0; +} + +static int +check_expect_stats_key_id(const struct objagg_obj_stats_info *stats_info, + const struct expect_stats_info *expect_stats_info, + const char **errmsg) +{ + if (obj_to_key_id(stats_info->objagg_obj) != + expect_stats_info->key_id) { + if (errmsg) + *errmsg = "incorrect key id"; + return -EINVAL; + } + return 0; +} + +static int check_expect_stats_neigh(const struct objagg_stats *stats, + const struct expect_stats *expect_stats, + int pos) +{ + int i; + int err; + + for (i = pos - 1; i >= 0; i--) { + err = check_expect_stats_nums(&stats->stats_info[i], + &expect_stats->info[pos], NULL); + if (err) + break; + err = check_expect_stats_key_id(&stats->stats_info[i], + &expect_stats->info[pos], NULL); + if (!err) + return 0; + } + for (i = pos + 1; i < stats->stats_info_count; i++) { + err = check_expect_stats_nums(&stats->stats_info[i], + &expect_stats->info[pos], NULL); + if (err) + break; + err = check_expect_stats_key_id(&stats->stats_info[i], + &expect_stats->info[pos], NULL); + if (!err) + return 0; + } + return -EINVAL; +} + +static int __check_expect_stats(const struct objagg_stats *stats, + const struct expect_stats *expect_stats, + const char **errmsg) +{ + int i; + int err; + + if (stats->stats_info_count != expect_stats->info_count) { + *errmsg = "Unexpected object count"; + return -EINVAL; + } + + for (i = 0; i < stats->stats_info_count; i++) { + err = check_expect_stats_nums(&stats->stats_info[i], + &expect_stats->info[i], errmsg); + if (err) + return err; + err = check_expect_stats_key_id(&stats->stats_info[i], + &expect_stats->info[i], errmsg); + if (err) { + /* It is possible that one of the neighbor stats with + * same numbers have the correct key id, so check it + */ + err = check_expect_stats_neigh(stats, expect_stats, i); + if (err) + return err; + } + } + return 0; +} + +static int check_expect_stats(struct objagg *objagg, + const struct expect_stats *expect_stats, + const char **errmsg) +{ + const struct objagg_stats *stats; + int err; + + stats = objagg_stats_get(objagg); + if (IS_ERR(stats)) + return PTR_ERR(stats); + err = __check_expect_stats(stats, expect_stats, errmsg); + objagg_stats_put(stats); + return err; +} + +static int test_delta_action_item(struct world *world, + struct objagg *objagg, + const struct action_item *action_item, + bool inverse) +{ + unsigned int orig_delta_count = world->delta_count; + unsigned int orig_root_count = world->root_count; + unsigned int key_id = action_item->key_id; + enum action action = action_item->action; + struct objagg_obj *objagg_obj; + const char *errmsg; + int err; + + if (inverse) + action = action == ACTION_GET ? ACTION_PUT : ACTION_GET; + + switch (action) { + case ACTION_GET: + objagg_obj = world_obj_get(world, objagg, key_id); + if (IS_ERR(objagg_obj)) + return PTR_ERR(objagg_obj); + break; + case ACTION_PUT: + world_obj_put(world, objagg, key_id); + break; + } + + if (inverse) + return 0; + err = check_expect(world, action_item, + orig_delta_count, orig_root_count); + if (err) + goto errout; + + err = check_expect_stats(objagg, &action_item->expect_stats, &errmsg); + if (err) { + pr_err("Key %u: Stats: %s\n", action_item->key_id, errmsg); + goto errout; + } + + return 0; + +errout: + /* This can only happen when action is not inversed. + * So in case of an error, cleanup by doing inverse action. + */ + test_delta_action_item(world, objagg, action_item, true); + return err; +} + +static int test_delta(void) +{ + struct world world = {}; + struct objagg *objagg; + int i; + int err; + + objagg = objagg_create(&delta_ops, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < ARRAY_SIZE(action_items); i++) { + err = test_delta_action_item(&world, objagg, + &action_items[i], false); + if (err) + goto err_do_action_item; + } + + objagg_destroy(objagg); + return 0; + +err_do_action_item: + for (i--; i >= 0; i--) + test_delta_action_item(&world, objagg, &action_items[i], true); + + objagg_destroy(objagg); + return err; +} + +static int __init test_objagg_init(void) +{ + int err; + + err = test_nodelta(); + if (err) + return err; + return test_delta(); +} + +static void __exit test_objagg_exit(void) +{ +} + +module_init(test_objagg_init); +module_exit(test_objagg_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Jiri Pirko "); +MODULE_DESCRIPTION("Test module for objagg"); -- cgit v1.2.3 From 344e9ffcbd1898e1dc04085564a6e05c30ea8199 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Nov 2018 12:22:51 -0700 Subject: block: add queue_is_mq() helper Various spots check for q->mq_ops being non-NULL, but provide a helper to do this instead. Where the ->mq_ops != NULL check is redundant, remove it. Since mq == rq-based now that legacy is gone, get rid of the queue_is_rq_based() and just use queue_is_mq() everywhere. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 8 ++++---- block/blk-core.c | 12 ++++++------ block/blk-flush.c | 3 +-- block/blk-mq.c | 2 +- block/blk-sysfs.c | 14 +++++++------- block/blk-throttle.c | 2 +- block/blk-wbt.c | 2 +- block/blk-zoned.c | 2 +- block/bsg.c | 2 +- block/elevator.c | 11 +++++------ block/genhd.c | 8 ++++---- drivers/md/dm-rq.c | 2 +- drivers/md/dm-table.c | 4 ++-- include/linux/blkdev.h | 6 +----- 14 files changed, 36 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0f6b44614165..63d226a084cd 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1324,7 +1324,7 @@ int blkcg_activate_policy(struct request_queue *q, if (blkcg_policy_enabled(q, pol)) return 0; - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_freeze_queue(q); pd_prealloc: if (!pd_prealloc) { @@ -1363,7 +1363,7 @@ pd_prealloc: spin_unlock_irq(&q->queue_lock); out_bypass_end: - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); if (pd_prealloc) pol->pd_free_fn(pd_prealloc); @@ -1387,7 +1387,7 @@ void blkcg_deactivate_policy(struct request_queue *q, if (!blkcg_policy_enabled(q, pol)) return; - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_freeze_queue(q); spin_lock_irq(&q->queue_lock); @@ -1405,7 +1405,7 @@ void blkcg_deactivate_policy(struct request_queue *q, spin_unlock_irq(&q->queue_lock); - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); diff --git a/block/blk-core.c b/block/blk-core.c index 92b6b200e9fb..0b684a520a11 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -232,7 +232,7 @@ void blk_sync_queue(struct request_queue *q) del_timer_sync(&q->timeout); cancel_work_sync(&q->timeout_work); - if (q->mq_ops) { + if (queue_is_mq(q)) { struct blk_mq_hw_ctx *hctx; int i; @@ -281,7 +281,7 @@ void blk_set_queue_dying(struct request_queue *q) */ blk_freeze_queue_start(q); - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_wake_waiters(q); /* Make blk_queue_enter() reexamine the DYING flag. */ @@ -356,7 +356,7 @@ void blk_cleanup_queue(struct request_queue *q) * blk_freeze_queue() should be enough for cases of passthrough * request. */ - if (q->mq_ops && blk_queue_init_done(q)) + if (queue_is_mq(q) && blk_queue_init_done(q)) blk_mq_quiesce_queue(q); /* for synchronous bio-based driver finish in-flight integrity i/o */ @@ -374,7 +374,7 @@ void blk_cleanup_queue(struct request_queue *q) blk_exit_queue(q); - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_free_queue(q); percpu_ref_exit(&q->q_usage_counter); @@ -982,7 +982,7 @@ generic_make_request_checks(struct bio *bio) * For a REQ_NOWAIT based request, return -EOPNOTSUPP * if queue is not a request based queue. */ - if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q)) + if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) goto not_supported; if (should_fail_bio(bio)) @@ -1657,7 +1657,7 @@ EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); */ int blk_lld_busy(struct request_queue *q) { - if (q->mq_ops && q->mq_ops->busy) + if (queue_is_mq(q) && q->mq_ops->busy) return q->mq_ops->busy(q); return 0; diff --git a/block/blk-flush.c b/block/blk-flush.c index fcd18b158fd6..a3fc7191c694 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -273,8 +273,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, * assigned to empty flushes, and we deadlock if we are expecting * other requests to make progress. Don't defer for that case. */ - if (!list_empty(&fq->flush_data_in_flight) && - !(q->mq_ops && q->elevator) && + if (!list_empty(&fq->flush_data_in_flight) && q->elevator && time_before(jiffies, fq->flush_pending_since + FLUSH_PENDING_TIMEOUT)) return; diff --git a/block/blk-mq.c b/block/blk-mq.c index 3b823891b3ef..32b246ed44c0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -150,7 +150,7 @@ void blk_freeze_queue_start(struct request_queue *q) freeze_depth = atomic_inc_return(&q->mq_freeze_depth); if (freeze_depth == 1) { percpu_ref_kill(&q->q_usage_counter); - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_run_hw_queues(q, false); } } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 1e370207a20e..80eef48fddc8 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -68,7 +68,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) unsigned long nr; int ret, err; - if (!q->mq_ops) + if (!queue_is_mq(q)) return -EINVAL; ret = queue_var_store(&nr, page, count); @@ -835,12 +835,12 @@ static void __blk_release_queue(struct work_struct *work) blk_queue_free_zone_bitmaps(q); - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_release(q); blk_trace_shutdown(q); - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_debugfs_unregister(q); bioset_exit(&q->bio_split); @@ -914,7 +914,7 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } - if (q->mq_ops) { + if (queue_is_mq(q)) { __blk_mq_register_dev(dev, q); blk_mq_debugfs_register(q); } @@ -925,7 +925,7 @@ int blk_register_queue(struct gendisk *disk) blk_throtl_register_queue(q); - if ((q->mq_ops && q->elevator)) { + if (q->elevator) { ret = elv_register_queue(q); if (ret) { mutex_unlock(&q->sysfs_lock); @@ -974,7 +974,7 @@ void blk_unregister_queue(struct gendisk *disk) * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. */ - if (q->mq_ops) + if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); mutex_unlock(&q->sysfs_lock); @@ -983,7 +983,7 @@ void blk_unregister_queue(struct gendisk *disk) blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); - if (q->mq_ops && q->elevator) + if (q->elevator) elv_unregister_queue(q); mutex_unlock(&q->sysfs_lock); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index d0a23f0bb3ed..8f0a104770ee 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2456,7 +2456,7 @@ void blk_throtl_register_queue(struct request_queue *q) td->throtl_slice = DFL_THROTL_SLICE_HD; #endif - td->track_bio_latency = !queue_is_rq_based(q); + td->track_bio_latency = !queue_is_mq(q); if (!td->track_bio_latency) blk_stat_enable_accounting(q); } diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 9f142b84dc85..d051ebfb4852 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -701,7 +701,7 @@ void wbt_enable_default(struct request_queue *q) if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) return; - if (q->mq_ops && IS_ENABLED(CONFIG_BLK_WBT_MQ)) + if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ)) wbt_init(q); } EXPORT_SYMBOL_GPL(wbt_enable_default); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 13ba2011a306..e9c332b1d9da 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -421,7 +421,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) * BIO based queues do not use a scheduler so only q->nr_zones * needs to be updated so that the sysfs exposed value is correct. */ - if (!queue_is_rq_based(q)) { + if (!queue_is_mq(q)) { q->nr_zones = nr_zones; return 0; } diff --git a/block/bsg.c b/block/bsg.c index 9a442c23a715..44f6028b9567 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -471,7 +471,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent, /* * we need a proper transport to send commands, not a stacked device */ - if (!queue_is_rq_based(q)) + if (!queue_is_mq(q)) return 0; bcd = &q->bsg_dev; diff --git a/block/elevator.c b/block/elevator.c index 796436270682..f05e90d4e695 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -667,7 +667,7 @@ static int __elevator_change(struct request_queue *q, const char *name) /* * Special case for mq, turn off scheduling */ - if (q->mq_ops && !strncmp(name, "none", 4)) + if (!strncmp(name, "none", 4)) return elevator_switch(q, NULL); strlcpy(elevator_name, name, sizeof(elevator_name)); @@ -685,8 +685,7 @@ static int __elevator_change(struct request_queue *q, const char *name) static inline bool elv_support_iosched(struct request_queue *q) { - if (q->mq_ops && q->tag_set && (q->tag_set->flags & - BLK_MQ_F_NO_SCHED)) + if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) return false; return true; } @@ -696,7 +695,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, { int ret; - if (!q->mq_ops || !elv_support_iosched(q)) + if (!queue_is_mq(q) || !elv_support_iosched(q)) return count; ret = __elevator_change(q, name); @@ -713,7 +712,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) struct elevator_type *__e; int len = 0; - if (!queue_is_rq_based(q)) + if (!queue_is_mq(q)) return sprintf(name, "none\n"); if (!q->elevator) @@ -732,7 +731,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) } spin_unlock(&elv_list_lock); - if (q->mq_ops && q->elevator) + if (q->elevator) len += sprintf(name+len, "none"); len += sprintf(len+name, "\n"); diff --git a/block/genhd.c b/block/genhd.c index cff6bdf27226..0145bcb0cc76 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -47,7 +47,7 @@ static void disk_release_events(struct gendisk *disk); void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw) { - if (q->mq_ops) + if (queue_is_mq(q)) return; atomic_inc(&part->in_flight[rw]); @@ -57,7 +57,7 @@ void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw) void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw) { - if (q->mq_ops) + if (queue_is_mq(q)) return; atomic_dec(&part->in_flight[rw]); @@ -68,7 +68,7 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw) void part_in_flight(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]) { - if (q->mq_ops) { + if (queue_is_mq(q)) { blk_mq_in_flight(q, part, inflight); return; } @@ -85,7 +85,7 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part, void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]) { - if (q->mq_ops) { + if (queue_is_mq(q)) { blk_mq_in_flight_rw(q, part, inflight); return; } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 7cd36e4d1310..1f1fe9a618ea 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -43,7 +43,7 @@ static unsigned dm_get_blk_mq_queue_depth(void) int dm_request_based(struct mapped_device *md) { - return queue_is_rq_based(md->queue); + return queue_is_mq(md->queue); } void dm_start_queue(struct request_queue *q) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9038c302d5c2..844f7d0f2ef8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -919,12 +919,12 @@ static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev, struct request_queue *q = bdev_get_queue(dev->bdev); struct verify_rq_based_data *v = data; - if (q->mq_ops) + if (queue_is_mq(q)) v->mq_count++; else v->sq_count++; - return queue_is_rq_based(q); + return queue_is_mq(q); } static int dm_table_determine_type(struct dm_table *t) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d185f1fc333..41aaa05e42c1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,11 +656,7 @@ static inline bool blk_account_rq(struct request *rq) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -/* - * Driver can handle struct request, if it either has an old style - * request_fn defined, or is blk-mq based. - */ -static inline bool queue_is_rq_based(struct request_queue *q) +static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; } -- cgit v1.2.3 From 0619317ff8baa2da9238191ad5167ed3618c16d9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Nov 2018 21:16:54 -0700 Subject: block: add polled wakeup task helper If we're polling for IO on a device that doesn't use interrupts, then IO completion loop (and wake of task) is done by submitting task itself. If that is the case, then we don't need to enter the wake_up_process() function, we can simply mark ourselves as TASK_RUNNING. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 4 ++-- fs/iomap.c | 2 +- include/linux/blkdev.h | 13 +++++++++++++ mm/page_io.c | 2 +- 4 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/block_dev.c b/fs/block_dev.c index c039abfb2052..9fe56672cfe5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -181,7 +181,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio) struct task_struct *waiter = bio->bi_private; WRITE_ONCE(bio->bi_private, NULL); - wake_up_process(waiter); + blk_wake_io_task(waiter); } static ssize_t @@ -305,7 +305,7 @@ static void blkdev_bio_end_io(struct bio *bio) struct task_struct *waiter = dio->waiter; WRITE_ONCE(dio->waiter, NULL); - wake_up_process(waiter); + blk_wake_io_task(waiter); } } diff --git a/fs/iomap.c b/fs/iomap.c index f61d13dfdf09..b0462b363bad 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1525,7 +1525,7 @@ static void iomap_dio_bio_end_io(struct bio *bio) if (dio->wait_for_completion) { struct task_struct *waiter = dio->submit.waiter; WRITE_ONCE(dio->submit.waiter, NULL); - wake_up_process(waiter); + blk_wake_io_task(waiter); } else if (dio->flags & IOMAP_DIO_WRITE) { struct inode *inode = file_inode(dio->iocb->ki_filp); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 41aaa05e42c1..91c44f7a7f62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1772,4 +1772,17 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, #endif /* CONFIG_BLOCK */ +static inline void blk_wake_io_task(struct task_struct *waiter) +{ + /* + * If we're polling, the task itself is doing the completions. For + * that case, we don't need to signal a wakeup, it's enough to just + * mark us as RUNNING. + */ + if (waiter == current) + __set_current_state(TASK_RUNNING); + else + wake_up_process(waiter); +} + #endif diff --git a/mm/page_io.c b/mm/page_io.c index d4d1c89bcddd..57572ff46016 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -140,7 +140,7 @@ out: unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); bio_put(bio); - wake_up_process(waiter); + blk_wake_io_task(waiter); put_task_struct(waiter); } -- cgit v1.2.3 From 2b78eae147a13ab2ca7caa121dd3fca2eecf8613 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Nov 2018 09:10:01 +0100 Subject: block: remove the rq_alloc_data request_queue field Reviewed-by: Omar Sandoval Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 91c44f7a7f62..1ad6eafc43f2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -567,7 +567,6 @@ struct request_queue { bool mq_sysfs_init_done; size_t cmd_size; - void *rq_alloc_data; struct work_struct release_work; -- cgit v1.2.3 From 49b623732e4af1853186ecf859e2c371228074af Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 7 Nov 2018 16:45:21 +0100 Subject: iio: st-accel: add support for lis3de This commit add support for STMicroelectronics lis3de accelerometer. Datasheet for this device can be found here: https://www.st.com/resource/en/datasheet/lis3de.pdf Signed-off-by: Heiko Stuebner Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/st-sensors.txt | 1 + drivers/iio/accel/Kconfig | 2 +- drivers/iio/accel/st_accel.h | 1 + drivers/iio/accel/st_accel_core.c | 1 + drivers/iio/accel/st_accel_i2c.c | 5 +++++ drivers/iio/accel/st_accel_spi.c | 5 +++++ include/linux/iio/common/st_sensors.h | 2 +- 7 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/iio/st-sensors.txt b/Documentation/devicetree/bindings/iio/st-sensors.txt index 07f1767c7ee6..ddcb95509599 100644 --- a/Documentation/devicetree/bindings/iio/st-sensors.txt +++ b/Documentation/devicetree/bindings/iio/st-sensors.txt @@ -48,6 +48,7 @@ Accelerometers: - st,lis3l02dq - st,lis2dw12 - st,lis3dhh +- st,lis3de Gyroscopes: - st,l3g4200d-gyro diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index 7993a67bd351..898839ca164a 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -223,7 +223,7 @@ config IIO_ST_ACCEL_3AXIS Say yes here to build support for STMicroelectronics accelerometers: LSM303DLH, LSM303DLHC, LIS3DH, LSM330D, LSM330DL, LSM330DLC, LIS331DLH, LSM303DL, LSM303DLM, LSM330, LIS2DH12, H3LIS331DL, - LNG2DM + LNG2DM, LIS3DE This driver can also be built as a module. If so, these modules will be created: diff --git a/drivers/iio/accel/st_accel.h b/drivers/iio/accel/st_accel.h index 2f931e4837e5..fd53258656ca 100644 --- a/drivers/iio/accel/st_accel.h +++ b/drivers/iio/accel/st_accel.h @@ -56,6 +56,7 @@ enum st_accel_type { #define LNG2DM_ACCEL_DEV_NAME "lng2dm" #define LIS2DW12_ACCEL_DEV_NAME "lis2dw12" #define LIS3DHH_ACCEL_DEV_NAME "lis3dhh" +#define LIS3DE_ACCEL_DEV_NAME "lis3de" /** * struct st_sensors_platform_data - default accel platform data diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 3e6fd5a8ac5b..f7b471121508 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -103,6 +103,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { [4] = LSM330DLC_ACCEL_DEV_NAME, [5] = LSM303AGR_ACCEL_DEV_NAME, [6] = LIS2DH12_ACCEL_DEV_NAME, + [7] = LIS3DE_ACCEL_DEV_NAME, }, .ch = (struct iio_chan_spec *)st_accel_12bit_channels, .odr = { diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c index 2ca5d1f6ade0..de8ae4327094 100644 --- a/drivers/iio/accel/st_accel_i2c.c +++ b/drivers/iio/accel/st_accel_i2c.c @@ -98,6 +98,10 @@ static const struct of_device_id st_accel_of_match[] = { .compatible = "st,lis2dw12", .data = LIS2DW12_ACCEL_DEV_NAME, }, + { + .compatible = "st,lis3de", + .data = LIS3DE_ACCEL_DEV_NAME, + }, {}, }; MODULE_DEVICE_TABLE(of, st_accel_of_match); @@ -135,6 +139,7 @@ static const struct i2c_device_id st_accel_id_table[] = { { LIS331DL_ACCEL_DEV_NAME }, { LIS3LV02DL_ACCEL_DEV_NAME }, { LIS2DW12_ACCEL_DEV_NAME }, + { LIS3DE_ACCEL_DEV_NAME }, {}, }; MODULE_DEVICE_TABLE(i2c, st_accel_id_table); diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c index dcc9bd243a52..73bfb5d04e2b 100644 --- a/drivers/iio/accel/st_accel_spi.c +++ b/drivers/iio/accel/st_accel_spi.c @@ -90,6 +90,10 @@ static const struct of_device_id st_accel_of_match[] = { .compatible = "st,lis3dhh", .data = LIS3DHH_ACCEL_DEV_NAME, }, + { + .compatible = "st,lis3de", + .data = LIS3DE_ACCEL_DEV_NAME, + }, {} }; MODULE_DEVICE_TABLE(of, st_accel_of_match); @@ -143,6 +147,7 @@ static const struct spi_device_id st_accel_id_table[] = { { LIS3LV02DL_ACCEL_DEV_NAME }, { LIS2DW12_ACCEL_DEV_NAME }, { LIS3DHH_ACCEL_DEV_NAME }, + { LIS3DE_ACCEL_DEV_NAME }, {}, }; MODULE_DEVICE_TABLE(spi, st_accel_id_table); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index f9bd6e8ab138..8092b8e7f37e 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -40,7 +40,7 @@ #define ST_SENSORS_DEFAULT_STAT_ADDR 0x27 #define ST_SENSORS_MAX_NAME 17 -#define ST_SENSORS_MAX_4WAI 7 +#define ST_SENSORS_MAX_4WAI 8 #define ST_SENSORS_LSM_CHANNELS(device_type, mask, index, mod, \ ch2, s, endian, rbits, sbits, addr) \ -- cgit v1.2.3 From c91c1c844ebd868ad15bcfc866879fca1079234a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 11 Nov 2018 15:15:33 +0100 Subject: iio: imu: st_lsm6dsx: add i2c embedded controller support i2c controller embedded in lsm6dx series can connect up to four slave devices using accelerometer sensor as trigger for i2c read/write operations. Introduce sensor hub support for lsm6dso sensor. Add register map for lis2mdl magnetometer sensor. In order to perform single read/write operations st_lsm6dsx driver relies on SLV0 channel (hw FIFO is not supported yet) Signed-off-by: Lorenzo Bianconi Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/Makefile | 3 +- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 112 ++++ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 135 +++-- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c | 702 +++++++++++++++++++++++++ include/linux/platform_data/st_sensors_pdata.h | 2 + 5 files changed, 911 insertions(+), 43 deletions(-) create mode 100644 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c (limited to 'include/linux') diff --git a/drivers/iio/imu/st_lsm6dsx/Makefile b/drivers/iio/imu/st_lsm6dsx/Makefile index 35919febea2a..e5f733ce6e11 100644 --- a/drivers/iio/imu/st_lsm6dsx/Makefile +++ b/drivers/iio/imu/st_lsm6dsx/Makefile @@ -1,4 +1,5 @@ -st_lsm6dsx-y := st_lsm6dsx_core.o st_lsm6dsx_buffer.o +st_lsm6dsx-y := st_lsm6dsx_core.o st_lsm6dsx_buffer.o \ + st_lsm6dsx_shub.o obj-$(CONFIG_IIO_ST_LSM6DSX) += st_lsm6dsx.o obj-$(CONFIG_IIO_ST_LSM6DSX_I2C) += st_lsm6dsx_i2c.o diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index 2beb4f563892..d20746eb3d2d 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -43,6 +43,24 @@ enum st_lsm6dsx_hw_id { * ST_LSM6DSX_TAGGED_SAMPLE_SIZE) #define ST_LSM6DSX_SHIFT_VAL(val, mask) (((val) << __ffs(mask)) & (mask)) +#define ST_LSM6DSX_CHANNEL(chan_type, addr, mod, scan_idx) \ +{ \ + .type = chan_type, \ + .address = addr, \ + .modified = 1, \ + .channel2 = mod, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ + BIT(IIO_CHAN_INFO_SCALE), \ + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \ + .scan_index = scan_idx, \ + .scan_type = { \ + .sign = 's', \ + .realbits = 16, \ + .storagebits = 16, \ + .endianness = IIO_LE, \ + }, \ +} + struct st_lsm6dsx_reg { u8 addr; u8 mask; @@ -50,6 +68,28 @@ struct st_lsm6dsx_reg { struct st_lsm6dsx_hw; +struct st_lsm6dsx_odr { + u16 hz; + u8 val; +}; + +#define ST_LSM6DSX_ODR_LIST_SIZE 6 +struct st_lsm6dsx_odr_table_entry { + struct st_lsm6dsx_reg reg; + struct st_lsm6dsx_odr odr_avl[ST_LSM6DSX_ODR_LIST_SIZE]; +}; + +struct st_lsm6dsx_fs { + u32 gain; + u8 val; +}; + +#define ST_LSM6DSX_FS_LIST_SIZE 4 +struct st_lsm6dsx_fs_table_entry { + struct st_lsm6dsx_reg reg; + struct st_lsm6dsx_fs fs_avl[ST_LSM6DSX_FS_LIST_SIZE]; +}; + /** * struct st_lsm6dsx_fifo_ops - ST IMU FIFO settings * @read_fifo: Read FIFO callback. @@ -84,6 +124,66 @@ struct st_lsm6dsx_hw_ts_settings { struct st_lsm6dsx_reg decimator; }; +/** + * struct st_lsm6dsx_shub_settings - ST IMU hw i2c controller settings + * @page_mux: register page mux info (addr + mask). + * @master_en: master config register info (addr + mask). + * @pullup_en: i2c controller pull-up register info (addr + mask). + * @aux_sens: aux sensor register info (addr + mask). + * @shub_out: sensor hub first output register info. + * @slv0_addr: slave0 address in secondary page. + * @dw_slv0_addr: slave0 write register address in secondary page. + */ +struct st_lsm6dsx_shub_settings { + struct st_lsm6dsx_reg page_mux; + struct st_lsm6dsx_reg master_en; + struct st_lsm6dsx_reg pullup_en; + struct st_lsm6dsx_reg aux_sens; + u8 shub_out; + u8 slv0_addr; + u8 dw_slv0_addr; +}; + +enum st_lsm6dsx_ext_sensor_id { + ST_LSM6DSX_ID_MAGN, +}; + +/** + * struct st_lsm6dsx_ext_dev_settings - i2c controller slave settings + * @i2c_addr: I2c slave address list. + * @wai: Wai address info. + * @id: external sensor id. + * @odr: Output data rate of the sensor [Hz]. + * @gain: Configured sensor sensitivity. + * @temp_comp: Temperature compensation register info (addr + mask). + * @pwr_table: Power on register info (addr + mask). + * @off_canc: Offset cancellation register info (addr + mask). + * @bdu: Block data update register info (addr + mask). + * @out: Output register info. + */ +struct st_lsm6dsx_ext_dev_settings { + u8 i2c_addr[2]; + struct { + u8 addr; + u8 val; + } wai; + enum st_lsm6dsx_ext_sensor_id id; + struct st_lsm6dsx_odr_table_entry odr_table; + struct st_lsm6dsx_fs_table_entry fs_table; + struct st_lsm6dsx_reg temp_comp; + struct { + struct st_lsm6dsx_reg reg; + u8 off_val; + u8 on_val; + } pwr_table; + struct st_lsm6dsx_reg off_canc; + struct st_lsm6dsx_reg bdu; + struct { + u8 addr; + u8 len; + } out; +}; + /** * struct st_lsm6dsx_settings - ST IMU sensor settings * @wai: Sensor WhoAmI default value. @@ -93,6 +193,7 @@ struct st_lsm6dsx_hw_ts_settings { * @batch: List of FIFO batching register info (addr + mask). * @fifo_ops: Sensor hw FIFO parameters. * @ts_settings: Hw timer related settings. + * @shub_settings: i2c controller related settings. */ struct st_lsm6dsx_settings { u8 wai; @@ -102,6 +203,7 @@ struct st_lsm6dsx_settings { struct st_lsm6dsx_reg batch[ST_LSM6DSX_MAX_ID]; struct st_lsm6dsx_fifo_ops fifo_ops; struct st_lsm6dsx_hw_ts_settings ts_settings; + struct st_lsm6dsx_shub_settings shub_settings; }; enum st_lsm6dsx_sensor_id { @@ -129,6 +231,7 @@ enum st_lsm6dsx_fifo_mode { * @sip: Number of samples in a given pattern. * @decimator: FIFO decimation factor. * @ts_ref: Sensor timestamp reference for hw one. + * @ext_info: Sensor settings if it is connected to i2c controller */ struct st_lsm6dsx_sensor { char name[32]; @@ -142,6 +245,11 @@ struct st_lsm6dsx_sensor { u8 sip; u8 decimator; s64 ts_ref; + + struct { + const struct st_lsm6dsx_ext_dev_settings *settings; + u8 addr; + } ext_info; }; /** @@ -181,6 +289,7 @@ struct st_lsm6dsx_hw { const struct st_lsm6dsx_settings *settings; }; +static const unsigned long st_lsm6dsx_available_scan_masks[] = {0x7, 0x0}; extern const struct dev_pm_ops st_lsm6dsx_pm_ops; int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, const char *name, @@ -197,6 +306,9 @@ int st_lsm6dsx_set_fifo_mode(struct st_lsm6dsx_hw *hw, int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw); int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw); int st_lsm6dsx_check_odr(struct st_lsm6dsx_sensor *sensor, u16 odr, u8 *val); +int st_lsm6dsx_shub_probe(struct st_lsm6dsx_hw *hw, const char *name); +int st_lsm6dsx_shub_set_enable(struct st_lsm6dsx_sensor *sensor, bool enable); +int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable); static inline int st_lsm6dsx_update_bits_locked(struct st_lsm6dsx_hw *hw, unsigned int addr, diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 28ddedbd1304..149080acd859 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -88,17 +88,6 @@ #define ST_LSM6DSX_GYRO_FS_1000_GAIN IIO_DEGREE_TO_RAD(35000) #define ST_LSM6DSX_GYRO_FS_2000_GAIN IIO_DEGREE_TO_RAD(70000) -struct st_lsm6dsx_odr { - u16 hz; - u8 val; -}; - -#define ST_LSM6DSX_ODR_LIST_SIZE 6 -struct st_lsm6dsx_odr_table_entry { - struct st_lsm6dsx_reg reg; - struct st_lsm6dsx_odr odr_avl[ST_LSM6DSX_ODR_LIST_SIZE]; -}; - static const struct st_lsm6dsx_odr_table_entry st_lsm6dsx_odr_table[] = { [ST_LSM6DSX_ID_ACC] = { .reg = { @@ -126,17 +115,6 @@ static const struct st_lsm6dsx_odr_table_entry st_lsm6dsx_odr_table[] = { } }; -struct st_lsm6dsx_fs { - u32 gain; - u8 val; -}; - -#define ST_LSM6DSX_FS_LIST_SIZE 4 -struct st_lsm6dsx_fs_table_entry { - struct st_lsm6dsx_reg reg; - struct st_lsm6dsx_fs fs_avl[ST_LSM6DSX_FS_LIST_SIZE]; -}; - static const struct st_lsm6dsx_fs_table_entry st_lsm6dsx_fs_table[] = { [ST_LSM6DSX_ID_ACC] = { .reg = { @@ -342,27 +320,30 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .mask = GENMASK(7, 6), }, }, + .shub_settings = { + .page_mux = { + .addr = 0x01, + .mask = BIT(6), + }, + .master_en = { + .addr = 0x14, + .mask = BIT(2), + }, + .pullup_en = { + .addr = 0x14, + .mask = BIT(3), + }, + .aux_sens = { + .addr = 0x14, + .mask = GENMASK(1, 0), + }, + .shub_out = 0x02, + .slv0_addr = 0x15, + .dw_slv0_addr = 0x21, + } }, }; -#define ST_LSM6DSX_CHANNEL(chan_type, addr, mod, scan_idx) \ -{ \ - .type = chan_type, \ - .address = addr, \ - .modified = 1, \ - .channel2 = mod, \ - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ - BIT(IIO_CHAN_INFO_SCALE), \ - .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \ - .scan_index = scan_idx, \ - .scan_type = { \ - .sign = 's', \ - .realbits = 16, \ - .storagebits = 16, \ - .endianness = IIO_LE, \ - }, \ -} - static const struct iio_chan_spec st_lsm6dsx_acc_channels[] = { ST_LSM6DSX_CHANNEL(IIO_ACCEL, ST_LSM6DSX_REG_ACC_OUT_X_L_ADDR, IIO_MOD_X, 0), @@ -383,6 +364,21 @@ static const struct iio_chan_spec st_lsm6dsx_gyro_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(3), }; +int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable) +{ + const struct st_lsm6dsx_shub_settings *hub_settings; + unsigned int data; + int err; + + hub_settings = &hw->settings->shub_settings; + data = ST_LSM6DSX_SHIFT_VAL(enable, hub_settings->page_mux.mask); + err = regmap_update_bits(hw->regmap, hub_settings->page_mux.addr, + hub_settings->page_mux.mask, data); + usleep_range(100, 150); + + return err; +} + static int st_lsm6dsx_check_whoami(struct st_lsm6dsx_hw *hw, int id) { int err, i, j, data; @@ -736,8 +732,6 @@ static const struct iio_info st_lsm6dsx_gyro_info = { .hwfifo_set_watermark = st_lsm6dsx_set_watermark, }; -static const unsigned long st_lsm6dsx_available_scan_masks[] = {0x7, 0x0}; - static int st_lsm6dsx_of_get_drdy_pin(struct st_lsm6dsx_hw *hw, int *drdy_pin) { struct device_node *np = hw->dev->of_node; @@ -776,6 +770,51 @@ static int st_lsm6dsx_get_drdy_reg(struct st_lsm6dsx_hw *hw, u8 *drdy_reg) return err; } +static int st_lsm6dsx_init_shub(struct st_lsm6dsx_hw *hw) +{ + const struct st_lsm6dsx_shub_settings *hub_settings; + struct device_node *np = hw->dev->of_node; + struct st_sensors_platform_data *pdata; + unsigned int data; + int err = 0; + + hub_settings = &hw->settings->shub_settings; + + pdata = (struct st_sensors_platform_data *)hw->dev->platform_data; + if ((np && of_property_read_bool(np, "st,pullups")) || + (pdata && pdata->pullups)) { + err = st_lsm6dsx_set_page(hw, true); + if (err < 0) + return err; + + data = ST_LSM6DSX_SHIFT_VAL(1, hub_settings->pullup_en.mask); + err = regmap_update_bits(hw->regmap, + hub_settings->pullup_en.addr, + hub_settings->pullup_en.mask, data); + + st_lsm6dsx_set_page(hw, false); + + if (err < 0) + return err; + } + + if (hub_settings->aux_sens.addr) { + /* configure aux sensors */ + err = st_lsm6dsx_set_page(hw, true); + if (err < 0) + return err; + + data = ST_LSM6DSX_SHIFT_VAL(3, hub_settings->aux_sens.mask); + err = regmap_update_bits(hw->regmap, + hub_settings->aux_sens.addr, + hub_settings->aux_sens.mask, data); + + st_lsm6dsx_set_page(hw, false); + } + + return err; +} + static int st_lsm6dsx_init_hw_timer(struct st_lsm6dsx_hw *hw) { const struct st_lsm6dsx_hw_ts_settings *ts_settings; @@ -856,6 +895,10 @@ static int st_lsm6dsx_init_device(struct st_lsm6dsx_hw *hw) if (err < 0) return err; + err = st_lsm6dsx_init_shub(hw); + if (err < 0) + return err; + return st_lsm6dsx_init_hw_timer(hw); } @@ -909,6 +952,7 @@ static struct iio_dev *st_lsm6dsx_alloc_iiodev(struct st_lsm6dsx_hw *hw, int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, const char *name, struct regmap *regmap) { + const struct st_lsm6dsx_shub_settings *hub_settings; struct st_lsm6dsx_hw *hw; int i, err; @@ -944,6 +988,13 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, const char *name, if (err < 0) return err; + hub_settings = &hw->settings->shub_settings; + if (hub_settings->master_en.addr) { + err = st_lsm6dsx_shub_probe(hw, name); + if (err < 0) + return err; + } + if (hw->irq > 0) { err = st_lsm6dsx_fifo_setup(hw); if (err < 0) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c new file mode 100644 index 000000000000..9c66e88a1c3a --- /dev/null +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c @@ -0,0 +1,702 @@ +/* + * STMicroelectronics st_lsm6dsx i2c controller driver + * + * i2c controller embedded in lsm6dx series can connect up to four + * slave devices using accelerometer sensor as trigger for i2c + * read/write operations. Current implementation relies on SLV0 channel + * for slave configuration and SLV{1,2,3} to read data and push them into + * the hw FIFO + * + * Copyright (C) 2018 Lorenzo Bianconi + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ +#include +#include +#include +#include +#include + +#include "st_lsm6dsx.h" + +#define ST_LSM6DSX_MAX_SLV_NUM 3 +#define ST_LSM6DSX_SLV_ADDR(n, base) ((base) + (n) * 3) +#define ST_LSM6DSX_SLV_SUB_ADDR(n, base) ((base) + 1 + (n) * 3) +#define ST_LSM6DSX_SLV_CONFIG(n, base) ((base) + 2 + (n) * 3) + +#define ST_LS6DSX_READ_OP_MASK GENMASK(2, 0) + +static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = { + /* LIS2MDL */ + { + .i2c_addr = { 0x1e }, + .wai = { + .addr = 0x4f, + .val = 0x40, + }, + .id = ST_LSM6DSX_ID_MAGN, + .odr_table = { + .reg = { + .addr = 0x60, + .mask = GENMASK(3, 2), + }, + .odr_avl[0] = { 10, 0x0 }, + .odr_avl[1] = { 20, 0x1 }, + .odr_avl[2] = { 50, 0x2 }, + .odr_avl[3] = { 100, 0x3 }, + }, + .fs_table = { + .fs_avl[0] = { + .gain = 1500, + .val = 0x0, + }, /* 1500 uG/LSB */ + }, + .temp_comp = { + .addr = 0x60, + .mask = BIT(7), + }, + .pwr_table = { + .reg = { + .addr = 0x60, + .mask = GENMASK(1, 0), + }, + .off_val = 0x2, + .on_val = 0x0, + }, + .off_canc = { + .addr = 0x61, + .mask = BIT(1), + }, + .bdu = { + .addr = 0x62, + .mask = BIT(4), + }, + .out = { + .addr = 0x68, + .len = 6, + }, + }, +}; + +static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw) +{ + struct st_lsm6dsx_sensor *sensor; + + sensor = iio_priv(hw->iio_devs[ST_LSM6DSX_ID_ACC]); + msleep((2000U / sensor->odr) + 1); +} + +/** + * st_lsm6dsx_shub_read_reg - read i2c controller register + * + * Read st_lsm6dsx i2c controller register + */ +static int st_lsm6dsx_shub_read_reg(struct st_lsm6dsx_hw *hw, u8 addr, + u8 *data, int len) +{ + const struct st_lsm6dsx_shub_settings *hub_settings; + int err; + + mutex_lock(&hw->page_lock); + + hub_settings = &hw->settings->shub_settings; + err = st_lsm6dsx_set_page(hw, true); + if (err < 0) + goto out; + + err = regmap_bulk_read(hw->regmap, addr, data, len); + + st_lsm6dsx_set_page(hw, false); +out: + mutex_unlock(&hw->page_lock); + + return err; +} + +/** + * st_lsm6dsx_shub_write_reg - write i2c controller register + * + * Write st_lsm6dsx i2c controller register + */ +static int st_lsm6dsx_shub_write_reg(struct st_lsm6dsx_hw *hw, u8 addr, + u8 *data, int len) +{ + int err; + + mutex_lock(&hw->page_lock); + err = st_lsm6dsx_set_page(hw, true); + if (err < 0) + goto out; + + err = regmap_bulk_write(hw->regmap, addr, data, len); + + st_lsm6dsx_set_page(hw, false); +out: + mutex_unlock(&hw->page_lock); + + return err; +} + +static int st_lsm6dsx_shub_master_enable(struct st_lsm6dsx_sensor *sensor, + bool enable) +{ + const struct st_lsm6dsx_shub_settings *hub_settings; + struct st_lsm6dsx_hw *hw = sensor->hw; + unsigned int data; + int err; + + /* enable acc sensor as trigger */ + err = st_lsm6dsx_sensor_set_enable(sensor, enable); + if (err < 0) + return err; + + mutex_lock(&hw->page_lock); + + hub_settings = &hw->settings->shub_settings; + err = st_lsm6dsx_set_page(hw, true); + if (err < 0) + goto out; + + data = ST_LSM6DSX_SHIFT_VAL(enable, hub_settings->master_en.mask); + err = regmap_update_bits(hw->regmap, hub_settings->master_en.addr, + hub_settings->master_en.mask, data); + + st_lsm6dsx_set_page(hw, false); +out: + mutex_unlock(&hw->page_lock); + + return err; +} + +/** + * st_lsm6dsx_shub_read - read data from slave device register + * + * Read data from slave device register. SLV0 is used for + * one-shot read operation + */ +static int +st_lsm6dsx_shub_read(struct st_lsm6dsx_sensor *sensor, u8 addr, + u8 *data, int len) +{ + const struct st_lsm6dsx_shub_settings *hub_settings; + struct st_lsm6dsx_hw *hw = sensor->hw; + u8 config[3], slv_addr; + int err; + + hub_settings = &hw->settings->shub_settings; + slv_addr = ST_LSM6DSX_SLV_ADDR(0, hub_settings->slv0_addr); + + config[0] = (sensor->ext_info.addr << 1) | 1; + config[1] = addr; + config[2] = len & ST_LS6DSX_READ_OP_MASK; + + err = st_lsm6dsx_shub_write_reg(hw, slv_addr, config, + sizeof(config)); + if (err < 0) + return err; + + err = st_lsm6dsx_shub_master_enable(sensor, true); + if (err < 0) + return err; + + st_lsm6dsx_shub_wait_complete(hw); + + err = st_lsm6dsx_shub_read_reg(hw, hub_settings->shub_out, data, + len & ST_LS6DSX_READ_OP_MASK); + + st_lsm6dsx_shub_master_enable(sensor, false); + + memset(config, 0, sizeof(config)); + return st_lsm6dsx_shub_write_reg(hw, slv_addr, config, + sizeof(config)); +} + +/** + * st_lsm6dsx_shub_write - write data to slave device register + * + * Write data from slave device register. SLV0 is used for + * one-shot write operation + */ +static int +st_lsm6dsx_shub_write(struct st_lsm6dsx_sensor *sensor, u8 addr, + u8 *data, int len) +{ + const struct st_lsm6dsx_shub_settings *hub_settings; + struct st_lsm6dsx_hw *hw = sensor->hw; + u8 config[2], slv_addr; + int err, i; + + hub_settings = &hw->settings->shub_settings; + slv_addr = ST_LSM6DSX_SLV_ADDR(0, hub_settings->slv0_addr); + config[0] = sensor->ext_info.addr << 1; + for (i = 0 ; i < len; i++) { + config[1] = addr + i; + + err = st_lsm6dsx_shub_write_reg(hw, slv_addr, config, + sizeof(config)); + if (err < 0) + return err; + + err = st_lsm6dsx_shub_write_reg(hw, hub_settings->dw_slv0_addr, + &data[i], 1); + if (err < 0) + return err; + + err = st_lsm6dsx_shub_master_enable(sensor, true); + if (err < 0) + return err; + + st_lsm6dsx_shub_wait_complete(hw); + + st_lsm6dsx_shub_master_enable(sensor, false); + } + + memset(config, 0, sizeof(config)); + return st_lsm6dsx_shub_write_reg(hw, slv_addr, config, sizeof(config)); +} + +static int +st_lsm6dsx_shub_write_with_mask(struct st_lsm6dsx_sensor *sensor, + u8 addr, u8 mask, u8 val) +{ + int err; + u8 data; + + err = st_lsm6dsx_shub_read(sensor, addr, &data, sizeof(data)); + if (err < 0) + return err; + + data = ((data & ~mask) | (val << __ffs(mask) & mask)); + + return st_lsm6dsx_shub_write(sensor, addr, &data, sizeof(data)); +} + +static int +st_lsm6dsx_shub_get_odr_val(struct st_lsm6dsx_sensor *sensor, + u16 odr, u16 *val) +{ + const struct st_lsm6dsx_ext_dev_settings *settings; + int i; + + settings = sensor->ext_info.settings; + for (i = 0; i < ST_LSM6DSX_ODR_LIST_SIZE; i++) + if (settings->odr_table.odr_avl[i].hz == odr) + break; + + if (i == ST_LSM6DSX_ODR_LIST_SIZE) + return -EINVAL; + + *val = settings->odr_table.odr_avl[i].val; + return 0; +} + +static int +st_lsm6dsx_shub_set_odr(struct st_lsm6dsx_sensor *sensor, u16 odr) +{ + const struct st_lsm6dsx_ext_dev_settings *settings; + u16 val; + int err; + + err = st_lsm6dsx_shub_get_odr_val(sensor, odr, &val); + if (err < 0) + return err; + + settings = sensor->ext_info.settings; + return st_lsm6dsx_shub_write_with_mask(sensor, + settings->odr_table.reg.addr, + settings->odr_table.reg.mask, + val); +} + +int st_lsm6dsx_shub_set_enable(struct st_lsm6dsx_sensor *sensor, bool enable) +{ + const struct st_lsm6dsx_ext_dev_settings *settings; + int err; + + settings = sensor->ext_info.settings; + if (enable) { + err = st_lsm6dsx_shub_set_odr(sensor, sensor->odr); + if (err < 0) + return err; + } else { + err = st_lsm6dsx_shub_write_with_mask(sensor, + settings->odr_table.reg.addr, + settings->odr_table.reg.mask, 0); + if (err < 0) + return err; + } + + if (settings->pwr_table.reg.addr) { + u8 val; + + val = enable ? settings->pwr_table.on_val + : settings->pwr_table.off_val; + err = st_lsm6dsx_shub_write_with_mask(sensor, + settings->pwr_table.reg.addr, + settings->pwr_table.reg.mask, val); + if (err < 0) + return err; + } + + return st_lsm6dsx_shub_master_enable(sensor, enable); +} + +static int +st_lsm6dsx_shub_read_oneshot(struct st_lsm6dsx_sensor *sensor, + struct iio_chan_spec const *ch, + int *val) +{ + int err, delay, len = ch->scan_type.realbits >> 3; + __le16 data; + + err = st_lsm6dsx_shub_set_enable(sensor, true); + if (err < 0) + return err; + + delay = 1000000 / sensor->odr; + usleep_range(delay, 2 * delay); + + err = st_lsm6dsx_shub_read(sensor, ch->address, (u8 *)&data, len); + if (err < 0) + return err; + + st_lsm6dsx_shub_set_enable(sensor, false); + + switch (len) { + case 2: + *val = (s16)le16_to_cpu(data); + break; + default: + return -EINVAL; + } + + return IIO_VAL_INT; +} + +static int +st_lsm6dsx_shub_read_raw(struct iio_dev *iio_dev, + struct iio_chan_spec const *ch, + int *val, int *val2, long mask) +{ + struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev); + int ret; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + ret = iio_device_claim_direct_mode(iio_dev); + if (ret) + break; + + ret = st_lsm6dsx_shub_read_oneshot(sensor, ch, val); + iio_device_release_direct_mode(iio_dev); + break; + case IIO_CHAN_INFO_SAMP_FREQ: + *val = sensor->odr; + ret = IIO_VAL_INT; + break; + case IIO_CHAN_INFO_SCALE: + *val = 0; + *val2 = sensor->gain; + ret = IIO_VAL_INT_PLUS_MICRO; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int +st_lsm6dsx_shub_write_raw(struct iio_dev *iio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev); + int err; + + err = iio_device_claim_direct_mode(iio_dev); + if (err) + return err; + + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: { + u16 data; + + err = st_lsm6dsx_shub_get_odr_val(sensor, val, &data); + if (!err) + sensor->odr = val; + break; + } + default: + err = -EINVAL; + break; + } + + iio_device_release_direct_mode(iio_dev); + + return err; +} + +static ssize_t +st_lsm6dsx_shub_sampling_freq_avail(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); + const struct st_lsm6dsx_ext_dev_settings *settings; + int i, len = 0; + + settings = sensor->ext_info.settings; + for (i = 0; i < ST_LSM6DSX_ODR_LIST_SIZE; i++) { + u16 val = settings->odr_table.odr_avl[i].hz; + + if (val > 0) + len += scnprintf(buf + len, PAGE_SIZE - len, "%d ", + val); + } + buf[len - 1] = '\n'; + + return len; +} + +static ssize_t st_lsm6dsx_shub_scale_avail(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); + const struct st_lsm6dsx_ext_dev_settings *settings; + int i, len = 0; + + settings = sensor->ext_info.settings; + for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) { + u16 val = settings->fs_table.fs_avl[i].gain; + + if (val > 0) + len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", + val); + } + buf[len - 1] = '\n'; + + return len; +} + +static IIO_DEV_ATTR_SAMP_FREQ_AVAIL(st_lsm6dsx_shub_sampling_freq_avail); +static IIO_DEVICE_ATTR(in_scale_available, 0444, + st_lsm6dsx_shub_scale_avail, NULL, 0); +static struct attribute *st_lsm6dsx_ext_attributes[] = { + &iio_dev_attr_sampling_frequency_available.dev_attr.attr, + &iio_dev_attr_in_scale_available.dev_attr.attr, + NULL, +}; + +static const struct attribute_group st_lsm6dsx_ext_attribute_group = { + .attrs = st_lsm6dsx_ext_attributes, +}; + +static const struct iio_info st_lsm6dsx_ext_info = { + .attrs = &st_lsm6dsx_ext_attribute_group, + .read_raw = st_lsm6dsx_shub_read_raw, + .write_raw = st_lsm6dsx_shub_write_raw, + .hwfifo_set_watermark = st_lsm6dsx_set_watermark, +}; + +static struct iio_dev * +st_lsm6dsx_shub_alloc_iiodev(struct st_lsm6dsx_hw *hw, + enum st_lsm6dsx_sensor_id id, + const struct st_lsm6dsx_ext_dev_settings *info, + u8 i2c_addr, const char *name) +{ + struct iio_chan_spec *ext_channels; + struct st_lsm6dsx_sensor *sensor; + struct iio_dev *iio_dev; + + iio_dev = devm_iio_device_alloc(hw->dev, sizeof(*sensor)); + if (!iio_dev) + return NULL; + + iio_dev->modes = INDIO_DIRECT_MODE; + iio_dev->dev.parent = hw->dev; + iio_dev->info = &st_lsm6dsx_ext_info; + + sensor = iio_priv(iio_dev); + sensor->id = id; + sensor->hw = hw; + sensor->odr = info->odr_table.odr_avl[0].hz; + sensor->gain = info->fs_table.fs_avl[0].gain; + sensor->ext_info.settings = info; + sensor->ext_info.addr = i2c_addr; + sensor->watermark = 1; + + switch (info->id) { + case ST_LSM6DSX_ID_MAGN: { + const struct iio_chan_spec magn_channels[] = { + ST_LSM6DSX_CHANNEL(IIO_MAGN, info->out.addr, + IIO_MOD_X, 0), + ST_LSM6DSX_CHANNEL(IIO_MAGN, info->out.addr + 2, + IIO_MOD_Y, 1), + ST_LSM6DSX_CHANNEL(IIO_MAGN, info->out.addr + 4, + IIO_MOD_Z, 2), + IIO_CHAN_SOFT_TIMESTAMP(3), + }; + + ext_channels = devm_kzalloc(hw->dev, sizeof(magn_channels), + GFP_KERNEL); + if (!ext_channels) + return NULL; + + memcpy(ext_channels, magn_channels, sizeof(magn_channels)); + iio_dev->available_scan_masks = st_lsm6dsx_available_scan_masks; + iio_dev->channels = ext_channels; + iio_dev->num_channels = ARRAY_SIZE(magn_channels); + + scnprintf(sensor->name, sizeof(sensor->name), "%s_magn", + name); + break; + } + default: + return NULL; + } + iio_dev->name = sensor->name; + + return iio_dev; +} + +static int st_lsm6dsx_shub_init_device(struct st_lsm6dsx_sensor *sensor) +{ + const struct st_lsm6dsx_ext_dev_settings *settings; + int err; + + settings = sensor->ext_info.settings; + if (settings->bdu.addr) { + err = st_lsm6dsx_shub_write_with_mask(sensor, + settings->bdu.addr, + settings->bdu.mask, 1); + if (err < 0) + return err; + } + + if (settings->temp_comp.addr) { + err = st_lsm6dsx_shub_write_with_mask(sensor, + settings->temp_comp.addr, + settings->temp_comp.mask, 1); + if (err < 0) + return err; + } + + if (settings->off_canc.addr) { + err = st_lsm6dsx_shub_write_with_mask(sensor, + settings->off_canc.addr, + settings->off_canc.mask, 1); + if (err < 0) + return err; + } + + return 0; +} + +static int +st_lsm6dsx_shub_check_wai(struct st_lsm6dsx_hw *hw, u8 *i2c_addr, + const struct st_lsm6dsx_ext_dev_settings *settings) +{ + const struct st_lsm6dsx_shub_settings *hub_settings; + struct st_lsm6dsx_sensor *sensor; + u8 config[3], data, slv_addr; + bool found = false; + int i, err; + + hub_settings = &hw->settings->shub_settings; + slv_addr = ST_LSM6DSX_SLV_ADDR(0, hub_settings->slv0_addr); + sensor = iio_priv(hw->iio_devs[ST_LSM6DSX_ID_ACC]); + + for (i = 0; i < ARRAY_SIZE(settings->i2c_addr); i++) { + if (!settings->i2c_addr[i]) + continue; + + /* read wai slave register */ + config[0] = (settings->i2c_addr[i] << 1) | 0x1; + config[1] = settings->wai.addr; + config[2] = 0x1; + + err = st_lsm6dsx_shub_write_reg(hw, slv_addr, config, + sizeof(config)); + if (err < 0) + return err; + + err = st_lsm6dsx_shub_master_enable(sensor, true); + if (err < 0) + return err; + + st_lsm6dsx_shub_wait_complete(hw); + + err = st_lsm6dsx_shub_read_reg(hw, + hub_settings->shub_out, + &data, sizeof(data)); + + st_lsm6dsx_shub_master_enable(sensor, false); + + if (err < 0) + return err; + + if (data != settings->wai.val) + continue; + + *i2c_addr = settings->i2c_addr[i]; + found = true; + break; + } + + /* reset SLV0 channel */ + memset(config, 0, sizeof(config)); + err = st_lsm6dsx_shub_write_reg(hw, slv_addr, config, + sizeof(config)); + if (err < 0) + return err; + + return found ? 0 : -ENODEV; +} + +int st_lsm6dsx_shub_probe(struct st_lsm6dsx_hw *hw, const char *name) +{ + enum st_lsm6dsx_sensor_id id = ST_LSM6DSX_ID_EXT0; + struct st_lsm6dsx_sensor *sensor; + int err, i, num_ext_dev = 0; + u8 i2c_addr = 0; + + for (i = 0; i < ARRAY_SIZE(st_lsm6dsx_ext_dev_table); i++) { + err = st_lsm6dsx_shub_check_wai(hw, &i2c_addr, + &st_lsm6dsx_ext_dev_table[i]); + if (err == -ENODEV) + continue; + else if (err < 0) + return err; + + hw->iio_devs[id] = st_lsm6dsx_shub_alloc_iiodev(hw, id, + &st_lsm6dsx_ext_dev_table[i], + i2c_addr, name); + if (!hw->iio_devs[id]) + return -ENOMEM; + + sensor = iio_priv(hw->iio_devs[id]); + err = st_lsm6dsx_shub_init_device(sensor); + if (err < 0) + return err; + + if (++num_ext_dev >= ST_LSM6DSX_MAX_SLV_NUM) + break; + id++; + } + + return 0; +} diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h index f8274b0c6888..728193111c2f 100644 --- a/include/linux/platform_data/st_sensors_pdata.h +++ b/include/linux/platform_data/st_sensors_pdata.h @@ -18,11 +18,13 @@ * Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet). * @open_drain: set the interrupt line to be open drain if possible. * @spi_3wire: enable spi-3wire mode. + * @pullups: enable/disable i2c controller pullup resistors. */ struct st_sensors_platform_data { u8 drdy_int_pin; bool open_drain; bool spi_3wire; + bool pullups; }; #endif /* ST_SENSORS_PDATA_H */ -- cgit v1.2.3 From eee3919c5f2949a8b7b1e9fa239d153be1538656 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 12 Nov 2018 15:10:28 +0100 Subject: gpio: drop broken to_gpio_irq_chip() helper Drop the broken to_gpio_irq_chip() container_of() helper, which would break the build for anyone who tries to use it. Specifically, struct gpio_irq_chip only holds a pointer to a struct irq_chip so using container_of() on an irq-chip pointer makes no sense. Fixes: da80ff81a8f5 ("gpio: Move irqchip into struct gpio_irq_chip") Cc: Thierry Reding Cc: Grygorii Strashko Signed-off-by: Johan Hovold Reviewed-by: Bartosz Golaszewski Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index f70d976e1395..9c8d5d491680 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -166,11 +166,6 @@ struct gpio_irq_chip { */ void (*irq_disable)(struct irq_data *data); }; - -static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) -{ - return container_of(chip, struct gpio_irq_chip, chip); -} #endif /** -- cgit v1.2.3 From 5109f9fd6a76116090b34a192d4a957d2ad0621e Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Sat, 10 Nov 2018 19:58:34 +0100 Subject: net/skbuff: add macros for VLAN_PRESENT bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wrap VLAN_PRESENT bit using macro like PKT_TYPE_* and CLONED_*, as used by BPF code. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7dcfb5591dc3..99f38779332c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -816,6 +816,12 @@ struct sk_buff { __u32 priority; int skb_iif; __u32 hash; +#define PKT_VLAN_PRESENT_BIT 4 // CFI (12-th bit) in TCI +#ifdef __BIG_ENDIAN +#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, vlan_tci) +#else +#define PKT_VLAN_PRESENT_OFFSET() (offsetof(struct sk_buff, vlan_tci) + 1) +#endif __be16 vlan_proto; __u16 vlan_tci; #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) -- cgit v1.2.3 From 0c4b2d370514cb4f3454dd3b18f031d2651fab73 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Sat, 10 Nov 2018 19:58:36 +0100 Subject: net: remove VLAN_TAG_PRESENT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace VLAN_TAG_PRESENT with single bit flag and free up VLAN.CFI overload. Now VLAN.CFI is visible in networking stack and can be passed around intact. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- arch/mips/net/bpf_jit.c | 3 --- arch/powerpc/net/bpf_jit_comp.c | 3 --- arch/sparc/net/bpf_jit_comp_32.c | 4 ---- include/linux/if_vlan.h | 11 ++++++----- include/linux/skbuff.h | 16 +++++++++------- lib/test_bpf.c | 14 ++++++++------ net/core/filter.c | 6 ------ 7 files changed, 23 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index de4c6372ad9a..3a0e34f4e615 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1164,9 +1164,6 @@ jmp_cmp: vlan_tci) != 2); off = offsetof(struct sk_buff, vlan_tci); emit_half_load_unsigned(r_A, r_skb, off, ctx); -#ifdef VLAN_TAG_PRESENT - emit_andi(r_A, r_A, (u16)~VLAN_TAG_PRESENT, ctx); -#endif break; case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: ctx->flags |= SEEN_SKB | SEEN_A; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index dc4a2f54e829..91d223cf512b 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -383,9 +383,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); -#ifdef VLAN_TAG_PRESENT - PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT); -#endif break; case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index 48f3c04dd179..84cc8f7f83e9 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -553,10 +553,6 @@ void bpf_jit_compile(struct bpf_prog *fp) break; case BPF_ANC | SKF_AD_VLAN_TAG: emit_skb_load16(vlan_tci, r_A); -#ifdef VLAN_TAG_PRESENT - emit_loadimm(~VLAN_TAG_PRESENT, r_TMP); - emit_and(r_A, r_TMP, r_A); -#endif break; case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: __emit_skb_load8(__pkt_vlan_present_offset, r_A); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 1be5230921b5..7a541eadf78e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -66,7 +66,6 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ #define VLAN_PRIO_SHIFT 13 #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ -#define VLAN_TAG_PRESENT VLAN_CFI_MASK #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ #define VLAN_N_VID 4096 @@ -78,8 +77,8 @@ static inline bool is_vlan_dev(const struct net_device *dev) return dev->priv_flags & IFF_802_1Q_VLAN; } -#define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) -#define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) +#define skb_vlan_tag_present(__skb) ((__skb)->vlan_present) +#define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #define skb_vlan_tag_get_prio(__skb) (((__skb)->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) @@ -480,7 +479,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, */ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { - skb->vlan_tci = 0; + skb->vlan_present = 0; } /** @@ -492,6 +491,7 @@ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) */ static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src) { + dst->vlan_present = src->vlan_present; dst->vlan_proto = src->vlan_proto; dst->vlan_tci = src->vlan_tci; } @@ -526,7 +526,8 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { skb->vlan_proto = vlan_proto; - skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; + skb->vlan_tci = vlan_tci; + skb->vlan_present = 1; } /** diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 99f38779332c..b9aa0d1b21cf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -777,6 +777,14 @@ struct sk_buff { __u8 encap_hdr_csum:1; __u8 csum_valid:1; +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_VLAN_PRESENT_BIT 7 +#else +#define PKT_VLAN_PRESENT_BIT 0 +#endif +#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) + __u8 __pkt_vlan_present_offset[0]; + __u8 vlan_present:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_not_inet:1; @@ -784,8 +792,8 @@ struct sk_buff { #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif - __u8 ipvs_property:1; + __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV @@ -816,12 +824,6 @@ struct sk_buff { __u32 priority; int skb_iif; __u32 hash; -#define PKT_VLAN_PRESENT_BIT 4 // CFI (12-th bit) in TCI -#ifdef __BIG_ENDIAN -#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, vlan_tci) -#else -#define PKT_VLAN_PRESENT_OFFSET() (offsetof(struct sk_buff, vlan_tci) + 1) -#endif __be16 vlan_proto; __u16 vlan_tci; #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index aa22bcaec1dc..f3e570722a7e 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -39,6 +39,7 @@ #define SKB_HASH 0x1234aaab #define SKB_QUEUE_MAP 123 #define SKB_VLAN_TCI 0xffff +#define SKB_VLAN_PRESENT 1 #define SKB_DEV_IFINDEX 577 #define SKB_DEV_TYPE 588 @@ -725,8 +726,8 @@ static struct bpf_test tests[] = { CLASSIC, { }, { - { 1, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT }, - { 10, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT } + { 1, SKB_VLAN_TCI }, + { 10, SKB_VLAN_TCI } }, }, { @@ -739,8 +740,8 @@ static struct bpf_test tests[] = { CLASSIC, { }, { - { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, - { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } + { 1, SKB_VLAN_PRESENT }, + { 10, SKB_VLAN_PRESENT } }, }, { @@ -5289,8 +5290,8 @@ static struct bpf_test tests[] = { #endif { }, { - { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, - { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } + { 1, SKB_VLAN_PRESENT }, + { 10, SKB_VLAN_PRESENT } }, .fill_helper = bpf_fill_maxinsns6, .expected_errcode = -ENOTSUPP, @@ -6493,6 +6494,7 @@ static struct sk_buff *populate_skb(char *buf, int size) skb->hash = SKB_HASH; skb->queue_mapping = SKB_QUEUE_MAP; skb->vlan_tci = SKB_VLAN_TCI; + skb->vlan_present = SKB_VLAN_PRESENT; skb->vlan_proto = htons(ETH_P_IP); dev_net_set(&dev, &init_net); skb->dev = &dev; diff --git a/net/core/filter.c b/net/core/filter.c index c151b906df53..10acbc00ff6c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -301,9 +301,6 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, offsetof(struct sk_buff, vlan_tci)); -#ifdef VLAN_TAG_PRESENT - *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, ~VLAN_TAG_PRESENT); -#endif break; case SKF_AD_VLAN_TAG_PRESENT: *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET()); @@ -6152,9 +6149,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, vlan_tci, 2, target_size)); -#ifdef VLAN_TAG_PRESENT - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, ~VLAN_TAG_PRESENT); -#endif break; case offsetof(struct __sk_buff, cb[0]) ... -- cgit v1.2.3 From 7f600f14dfac4ba4aee6283a415cdad2925d7791 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 12 Nov 2018 18:05:24 -0800 Subject: net: remove unused skb_send_sock() Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - net/core/skbuff.c | 13 ------------- 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b9aa0d1b21cf..a2e8297a5b00 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3335,7 +3335,6 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); -int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f95ab41c9fb9..a1be7f19d998 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2364,19 +2364,6 @@ error: } EXPORT_SYMBOL_GPL(skb_send_sock_locked); -/* Send skb data on a socket. */ -int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) -{ - int ret = 0; - - lock_sock(sk); - ret = skb_send_sock_locked(sk, skb, offset, len); - release_sock(sk); - - return ret; -} -EXPORT_SYMBOL_GPL(skb_send_sock); - /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer -- cgit v1.2.3 From f0aef2d018643187101199d8af1dd5ea3a43a3b7 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 13 Nov 2018 13:20:24 +0200 Subject: iio: ad_sigma_delta: Allow to provide custom data register address Some newer devices from the Sigma-Delta ADC family do have their data register at a different address than the current default address. Add a parameter to the ad_sigma_delta_info struct which allows to override the default address. Signed-off-by: Lars-Peter Clausen Signed-off-by: Stefan Popa Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 22 +++++++++++++++++----- include/linux/iio/adc/ad_sigma_delta.h | 3 +++ 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index fc9510716ac7..ff5f2da2e1b1 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -278,6 +278,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, { struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); unsigned int sample, raw_sample; + unsigned int data_reg; int ret = 0; if (iio_buffer_enabled(indio_dev)) @@ -305,7 +306,12 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, if (ret < 0) goto out; - ret = ad_sd_read_reg(sigma_delta, AD_SD_REG_DATA, + if (sigma_delta->info->data_reg != 0) + data_reg = sigma_delta->info->data_reg; + else + data_reg = AD_SD_REG_DATA; + + ret = ad_sd_read_reg(sigma_delta, data_reg, DIV_ROUND_UP(chan->scan_type.realbits + chan->scan_type.shift, 8), &raw_sample); @@ -392,6 +398,7 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); unsigned int reg_size; + unsigned int data_reg; uint8_t data[16]; int ret; @@ -401,18 +408,23 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p) indio_dev->channels[0].scan_type.shift; reg_size = DIV_ROUND_UP(reg_size, 8); + if (sigma_delta->info->data_reg != 0) + data_reg = sigma_delta->info->data_reg; + else + data_reg = AD_SD_REG_DATA; + switch (reg_size) { case 4: case 2: case 1: - ret = ad_sd_read_reg_raw(sigma_delta, AD_SD_REG_DATA, - reg_size, &data[0]); + ret = ad_sd_read_reg_raw(sigma_delta, data_reg, reg_size, + &data[0]); break; case 3: /* We store 24 bit samples in a 32 bit word. Keep the upper * byte set to zero. */ - ret = ad_sd_read_reg_raw(sigma_delta, AD_SD_REG_DATA, - reg_size, &data[1]); + ret = ad_sd_read_reg_raw(sigma_delta, data_reg, reg_size, + &data[1]); break; } diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 730ead1a46df..7e84351fa2c0 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -39,6 +39,8 @@ struct iio_dev; * if there is just one read-only sample data shift register. * @addr_shift: Shift of the register address in the communications register. * @read_mask: Mask for the communications register having the read bit set. + * @data_reg: Address of the data register, if 0 the default address of 0x3 will + * be used. */ struct ad_sigma_delta_info { int (*set_channel)(struct ad_sigma_delta *, unsigned int channel); @@ -47,6 +49,7 @@ struct ad_sigma_delta_info { bool has_registers; unsigned int addr_shift; unsigned int read_mask; + unsigned int data_reg; }; /** -- cgit v1.2.3 From 9a5ee462302512b7f3929c19f0711715613ac418 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Nov 2018 07:24:24 -0800 Subject: net: align pcpu_sw_netstats and pcpu_lstats structs Do not risk spanning these small structures on two cache lines, it is absolutely not worth it. For 32bit arches, the hint might not be enough, but we do not really care anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 917ae7b6263e..086e64d88597 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2389,13 +2389,13 @@ struct pcpu_sw_netstats { u64 tx_packets; u64 tx_bytes; struct u64_stats_sync syncp; -}; +} __aligned(4 * sizeof(u64)); struct pcpu_lstats { u64 packets; u64 bytes; struct u64_stats_sync syncp; -}; +} __aligned(2 * sizeof(u64)); #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ -- cgit v1.2.3 From 0c5eaa7749726b2e4667a5e3668c3eb8516e7440 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 16 Nov 2018 15:06:55 -0600 Subject: of: Drop full path from full_name for PDT systems Now that there are no more users of path_component_name for Sparc outside of the PDT code and all users of device_node.full_name are converted to use "%pOF" printf specifier, we can align Sparc with FDT and store just the base node name and unit address in full_name. This makes path_component_name redundant, so it can be removed. As full_name is used by printf specifiers, set it as early as possible. Cc: Frank Rowand Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/of/pdt.c | 50 ++++++++++++++------------------------------------ include/linux/of.h | 1 - 2 files changed, 14 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c index 013e65de074a..c1633041621d 100644 --- a/drivers/of/pdt.c +++ b/drivers/of/pdt.c @@ -32,24 +32,7 @@ unsigned int of_pdt_unique_id __initdata; static char * __init of_pdt_build_full_name(struct device_node *dp) { - int len, ourlen, plen; - char *n; - - dp->path_component_name = build_path_component(dp); - - plen = strlen(dp->parent->full_name); - ourlen = strlen(dp->path_component_name); - len = ourlen + plen + 2; - - n = prom_early_alloc(len); - strcpy(n, dp->parent->full_name); - if (!of_node_is_root(dp->parent)) { - strcpy(n + plen, "/"); - plen++; - } - strcpy(n + plen, dp->path_component_name); - - return n; + return build_path_component(dp); } #else /* CONFIG_SPARC */ @@ -60,23 +43,21 @@ static inline void irq_trans_init(struct device_node *dp) { } static char * __init of_pdt_build_full_name(struct device_node *dp) { static int failsafe_id = 0; /* for generating unique names on failure */ + const char *name; + char path[256]; char *buf; int len; - if (of_pdt_prom_ops->pkg2path(dp->phandle, NULL, 0, &len)) - goto failsafe; - - buf = prom_early_alloc(len + 1); - if (of_pdt_prom_ops->pkg2path(dp->phandle, buf, len, &len)) - goto failsafe; - return buf; + if (!of_pdt_prom_ops->pkg2path(dp->phandle, path, sizeof(path), &len)) { + name = kbasename(path); + buf = prom_early_alloc(strlen(name) + 1); + strcpy(buf, name); + return buf; + } - failsafe: - buf = prom_early_alloc(strlen(dp->parent->full_name) + - strlen(dp->name) + 16); - sprintf(buf, "%s/%s@unknown%i", - of_node_is_root(dp->parent) ? "" : dp->parent->full_name, - dp->name, failsafe_id++); + name = of_get_property(dp, "name", &len); + buf = prom_early_alloc(len + 16); + sprintf(buf, "%s@unknown%i", name, failsafe_id++); pr_err("%s: pkg2path failed; assigning %s\n", __func__, buf); return buf; } @@ -181,6 +162,8 @@ static struct device_node * __init of_pdt_create_node(phandle node, dp->properties = of_pdt_build_prop_list(node); + dp->full_name = of_pdt_build_full_name(dp); + irq_trans_init(dp); return dp; @@ -204,8 +187,6 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent, ret = dp; prev_sibling = dp; - dp->full_name = of_pdt_build_full_name(dp); - dp->child = of_pdt_build_tree(dp, of_pdt_prom_ops->getchild(node)); if (of_pdt_build_more) @@ -228,9 +209,6 @@ void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops) of_pdt_prom_ops = ops; of_root = of_pdt_create_node(root_node, NULL); -#if defined(CONFIG_SPARC) - of_root->path_component_name = ""; -#endif of_root->full_name = "/"; of_root->child = of_pdt_build_tree(of_root, diff --git a/include/linux/of.h b/include/linux/of.h index a5aee3c438ad..0fe5bef81a7e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -66,7 +66,6 @@ struct device_node { unsigned long _flags; void *data; #if defined(CONFIG_SPARC) - const char *path_component_name; unsigned int unique_id; struct of_irq_controller *irq_trans; #endif -- cgit v1.2.3 From f8702f9e4aa7b45131af3df5531d6e3835269141 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 19 Nov 2018 00:56:17 +0300 Subject: regulator: core: Use ww_mutex for regulators locking Wait/wound mutex shall be used in order to avoid lockups on locking of coupled regulators. Signed-off-by: Dmitry Osipenko Suggested-by: Lucas Stach Signed-off-by: Mark Brown --- drivers/regulator/core.c | 403 ++++++++++++++++++++++++++-------- drivers/regulator/da9210-regulator.c | 4 +- drivers/regulator/stpmic1_regulator.c | 4 +- drivers/regulator/wm8350-regulator.c | 4 +- include/linux/regulator/driver.h | 6 +- 5 files changed, 317 insertions(+), 104 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 783ec9c74104..47ccd35c7965 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -50,6 +50,8 @@ #define rdev_dbg(rdev, fmt, ...) \ pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) +static DEFINE_WW_CLASS(regulator_ww_class); +static DEFINE_MUTEX(regulator_nesting_mutex); static DEFINE_MUTEX(regulator_list_mutex); static LIST_HEAD(regulator_map_list); static LIST_HEAD(regulator_ena_gpio_list); @@ -154,7 +156,7 @@ static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev) /** * regulator_lock_nested - lock a single regulator * @rdev: regulator source - * @subclass: mutex subclass used for lockdep + * @ww_ctx: w/w mutex acquire context * * This function can be called many times by one task on * a single regulator and its mutex will be locked only @@ -162,24 +164,52 @@ static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev) * than the one, which initially locked the mutex, it will * wait on mutex. */ -static void regulator_lock_nested(struct regulator_dev *rdev, - unsigned int subclass) +static inline int regulator_lock_nested(struct regulator_dev *rdev, + struct ww_acquire_ctx *ww_ctx) { - if (!mutex_trylock(&rdev->mutex)) { - if (rdev->mutex_owner == current) { + bool lock = false; + int ret = 0; + + mutex_lock(®ulator_nesting_mutex); + + if (ww_ctx || !ww_mutex_trylock(&rdev->mutex)) { + if (rdev->mutex_owner == current) rdev->ref_cnt++; - return; + else + lock = true; + + if (lock) { + mutex_unlock(®ulator_nesting_mutex); + ret = ww_mutex_lock(&rdev->mutex, ww_ctx); + mutex_lock(®ulator_nesting_mutex); } - mutex_lock_nested(&rdev->mutex, subclass); + } else { + lock = true; } - rdev->ref_cnt = 1; - rdev->mutex_owner = current; + if (lock && ret != -EDEADLK) { + rdev->ref_cnt++; + rdev->mutex_owner = current; + } + + mutex_unlock(®ulator_nesting_mutex); + + return ret; } -static inline void regulator_lock(struct regulator_dev *rdev) +/** + * regulator_lock - lock a single regulator + * @rdev: regulator source + * + * This function can be called many times by one task on + * a single regulator and its mutex will be locked only + * once. If a task, which is calling this function is other + * than the one, which initially locked the mutex, it will + * wait on mutex. + */ +void regulator_lock(struct regulator_dev *rdev) { - regulator_lock_nested(rdev, 0); + regulator_lock_nested(rdev, NULL); } /** @@ -189,52 +219,48 @@ static inline void regulator_lock(struct regulator_dev *rdev) * This function unlocks the mutex when the * reference counter reaches 0. */ -static void regulator_unlock(struct regulator_dev *rdev) +void regulator_unlock(struct regulator_dev *rdev) { - if (rdev->ref_cnt != 0) { - rdev->ref_cnt--; + mutex_lock(®ulator_nesting_mutex); - if (!rdev->ref_cnt) { - rdev->mutex_owner = NULL; - mutex_unlock(&rdev->mutex); - } + if (--rdev->ref_cnt == 0) { + rdev->mutex_owner = NULL; + ww_mutex_unlock(&rdev->mutex); } + + WARN_ON_ONCE(rdev->ref_cnt < 0); + + mutex_unlock(®ulator_nesting_mutex); } -static int regulator_lock_recursive(struct regulator_dev *rdev, - unsigned int subclass) +static void regulator_unlock_recursive(struct regulator_dev *rdev, + unsigned int n_coupled) { struct regulator_dev *c_rdev; int i; - for (i = 0; i < rdev->coupling_desc.n_coupled; i++) { - c_rdev = rdev->coupling_desc.coupled_rdevs[i]; + for (i = n_coupled; i > 0; i--) { + c_rdev = rdev->coupling_desc.coupled_rdevs[i - 1]; if (!c_rdev) continue; - regulator_lock_nested(c_rdev, subclass++); - if (c_rdev->supply) - subclass = - regulator_lock_recursive(c_rdev->supply->rdev, - subclass); - } + regulator_unlock_recursive( + c_rdev->supply->rdev, + c_rdev->coupling_desc.n_coupled); - return subclass; + regulator_unlock(c_rdev); + } } -/** - * regulator_unlock_dependent - unlock regulator's suppliers and coupled - * regulators - * @rdev: regulator source - * - * Unlock all regulators related with rdev by coupling or suppling. - */ -static void regulator_unlock_dependent(struct regulator_dev *rdev) +static int regulator_lock_recursive(struct regulator_dev *rdev, + struct regulator_dev **new_contended_rdev, + struct regulator_dev **old_contended_rdev, + struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *c_rdev; - int i; + int i, err; for (i = 0; i < rdev->coupling_desc.n_coupled; i++) { c_rdev = rdev->coupling_desc.coupled_rdevs[i]; @@ -242,23 +268,95 @@ static void regulator_unlock_dependent(struct regulator_dev *rdev) if (!c_rdev) continue; - regulator_unlock(c_rdev); + if (c_rdev != *old_contended_rdev) { + err = regulator_lock_nested(c_rdev, ww_ctx); + if (err) { + if (err == -EDEADLK) { + *new_contended_rdev = c_rdev; + goto err_unlock; + } - if (c_rdev->supply) - regulator_unlock_dependent(c_rdev->supply->rdev); + /* shouldn't happen */ + WARN_ON_ONCE(err != -EALREADY); + } + } else { + *old_contended_rdev = NULL; + } + + if (c_rdev->supply) { + err = regulator_lock_recursive(c_rdev->supply->rdev, + new_contended_rdev, + old_contended_rdev, + ww_ctx); + if (err) { + regulator_unlock(c_rdev); + goto err_unlock; + } + } } + + return 0; + +err_unlock: + regulator_unlock_recursive(rdev, i); + + return err; +} + +/** + * regulator_unlock_dependent - unlock regulator's suppliers and coupled + * regulators + * @rdev: regulator source + * @ww_ctx: w/w mutex acquire context + * + * Unlock all regulators related with rdev by coupling or suppling. + */ +static void regulator_unlock_dependent(struct regulator_dev *rdev, + struct ww_acquire_ctx *ww_ctx) +{ + regulator_unlock_recursive(rdev, rdev->coupling_desc.n_coupled); + ww_acquire_fini(ww_ctx); } /** * regulator_lock_dependent - lock regulator's suppliers and coupled regulators * @rdev: regulator source + * @ww_ctx: w/w mutex acquire context * * This function as a wrapper on regulator_lock_recursive(), which locks * all regulators related with rdev by coupling or suppling. */ -static inline void regulator_lock_dependent(struct regulator_dev *rdev) +static void regulator_lock_dependent(struct regulator_dev *rdev, + struct ww_acquire_ctx *ww_ctx) { - regulator_lock_recursive(rdev, 0); + struct regulator_dev *new_contended_rdev = NULL; + struct regulator_dev *old_contended_rdev = NULL; + int err; + + mutex_lock(®ulator_list_mutex); + + ww_acquire_init(ww_ctx, ®ulator_ww_class); + + do { + if (new_contended_rdev) { + ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx); + old_contended_rdev = new_contended_rdev; + old_contended_rdev->ref_cnt++; + } + + err = regulator_lock_recursive(rdev, + &new_contended_rdev, + &old_contended_rdev, + ww_ctx); + + if (old_contended_rdev) + regulator_unlock(old_contended_rdev); + + } while (err == -EDEADLK); + + ww_acquire_done(ww_ctx); + + mutex_unlock(®ulator_list_mutex); } /** @@ -772,7 +870,7 @@ static int drms_uA_update(struct regulator_dev *rdev) int current_uA = 0, output_uV, input_uV, err; unsigned int mode; - lockdep_assert_held_once(&rdev->mutex); + lockdep_assert_held_once(&rdev->mutex.base); /* * first check to see if we can set modes at all, otherwise just @@ -2274,7 +2372,20 @@ static int _regulator_enable(struct regulator_dev *rdev) { int ret; - lockdep_assert_held_once(&rdev->mutex); + lockdep_assert_held_once(&rdev->mutex.base); + + if (rdev->supply) { + ret = _regulator_enable(rdev->supply->rdev); + if (ret < 0) + return ret; + } + + /* balance only if there are regulators coupled */ + if (rdev->coupling_desc.n_coupled > 1) { + ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); + if (ret < 0) + goto err_disable_supply; + } /* check voltage and requested load before enabling */ if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS)) @@ -2285,18 +2396,20 @@ static int _regulator_enable(struct regulator_dev *rdev) ret = _regulator_is_enabled(rdev); if (ret == -EINVAL || ret == 0) { if (!regulator_ops_is_valid(rdev, - REGULATOR_CHANGE_STATUS)) - return -EPERM; + REGULATOR_CHANGE_STATUS)) { + ret = -EPERM; + goto err_disable_supply; + } ret = _regulator_do_enable(rdev); if (ret < 0) - return ret; + goto err_disable_supply; _notifier_call_chain(rdev, REGULATOR_EVENT_ENABLE, NULL); } else if (ret < 0) { rdev_err(rdev, "is_enabled() failed: %d\n", ret); - return ret; + goto err_disable_supply; } /* Fallthrough on positive return values - already enabled */ } @@ -2304,6 +2417,12 @@ static int _regulator_enable(struct regulator_dev *rdev) rdev->use_count++; return 0; + +err_disable_supply: + if (rdev->supply) + _regulator_disable(rdev->supply->rdev); + + return ret; } /** @@ -2320,30 +2439,15 @@ static int _regulator_enable(struct regulator_dev *rdev) int regulator_enable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; + struct ww_acquire_ctx ww_ctx; int ret = 0; if (regulator->always_on) return 0; - if (rdev->supply) { - ret = regulator_enable(rdev->supply); - if (ret != 0) - return ret; - } - - regulator_lock_dependent(rdev); - /* balance only if there are regulators coupled */ - if (rdev->coupling_desc.n_coupled > 1) { - ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); - if (ret != 0) - goto unlock; - } + regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_enable(rdev); -unlock: - regulator_unlock_dependent(rdev); - - if (ret != 0 && rdev->supply) - regulator_disable(rdev->supply); + regulator_unlock_dependent(rdev, &ww_ctx); return ret; } @@ -2385,7 +2489,7 @@ static int _regulator_disable(struct regulator_dev *rdev) { int ret = 0; - lockdep_assert_held_once(&rdev->mutex); + lockdep_assert_held_once(&rdev->mutex.base); if (WARN(rdev->use_count <= 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) @@ -2423,6 +2527,12 @@ static int _regulator_disable(struct regulator_dev *rdev) rdev->use_count--; } + if (ret == 0 && rdev->coupling_desc.n_coupled > 1) + ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); + + if (ret == 0 && rdev->supply) + ret = _regulator_disable(rdev->supply->rdev); + return ret; } @@ -2441,19 +2551,15 @@ static int _regulator_disable(struct regulator_dev *rdev) int regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; + struct ww_acquire_ctx ww_ctx; int ret = 0; if (regulator->always_on) return 0; - regulator_lock_dependent(rdev); + regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_disable(rdev); - if (rdev->coupling_desc.n_coupled > 1) - regulator_balance_voltage(rdev, PM_SUSPEND_ON); - regulator_unlock_dependent(rdev); - - if (ret == 0 && rdev->supply) - regulator_disable(rdev->supply); + regulator_unlock_dependent(rdev, &ww_ctx); return ret; } @@ -2464,7 +2570,7 @@ static int _regulator_force_disable(struct regulator_dev *rdev) { int ret = 0; - lockdep_assert_held_once(&rdev->mutex); + lockdep_assert_held_once(&rdev->mutex.base); ret = _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | REGULATOR_EVENT_PRE_DISABLE, NULL); @@ -2497,14 +2603,15 @@ static int _regulator_force_disable(struct regulator_dev *rdev) int regulator_force_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; + struct ww_acquire_ctx ww_ctx; int ret; - regulator_lock_dependent(rdev); + regulator_lock_dependent(rdev, &ww_ctx); regulator->uA_load = 0; ret = _regulator_force_disable(regulator->rdev); if (rdev->coupling_desc.n_coupled > 1) regulator_balance_voltage(rdev, PM_SUSPEND_ON); - regulator_unlock_dependent(rdev); + regulator_unlock_dependent(rdev, &ww_ctx); if (rdev->supply) while (rdev->open_count--) @@ -2518,9 +2625,10 @@ static void regulator_disable_work(struct work_struct *work) { struct regulator_dev *rdev = container_of(work, struct regulator_dev, disable_work.work); + struct ww_acquire_ctx ww_ctx; int count, i, ret; - regulator_lock(rdev); + regulator_lock_dependent(rdev, &ww_ctx); BUG_ON(!rdev->deferred_disables); @@ -2541,7 +2649,10 @@ static void regulator_disable_work(struct work_struct *work) rdev_err(rdev, "Deferred disable failed: %d\n", ret); } - regulator_unlock(rdev); + if (rdev->coupling_desc.n_coupled > 1) + regulator_balance_voltage(rdev, PM_SUSPEND_ON); + + regulator_unlock_dependent(rdev, &ww_ctx); if (rdev->supply) { for (i = 0; i < count; i++) { @@ -2652,9 +2763,9 @@ int regulator_is_enabled(struct regulator *regulator) if (regulator->always_on) return 1; - regulator_lock_dependent(regulator->rdev); + regulator_lock(regulator->rdev); ret = _regulator_is_enabled(regulator->rdev); - regulator_unlock_dependent(regulator->rdev); + regulator_unlock(regulator->rdev); return ret; } @@ -3268,7 +3379,7 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev, int tmp_min = 0; int tmp_max = INT_MAX; - lockdep_assert_held_once(&c_rdevs[i]->mutex); + lockdep_assert_held_once(&c_rdevs[i]->mutex.base); ret = regulator_check_consumers(c_rdevs[i], &tmp_min, @@ -3479,14 +3590,15 @@ out: */ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) { - int ret = 0; + struct ww_acquire_ctx ww_ctx; + int ret; - regulator_lock_dependent(regulator->rdev); + regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = regulator_set_voltage_unlocked(regulator, min_uV, max_uV, PM_SUSPEND_ON); - regulator_unlock_dependent(regulator->rdev); + regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } @@ -3558,18 +3670,19 @@ static int _regulator_set_suspend_voltage(struct regulator *regulator, int regulator_set_suspend_voltage(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) { - int ret = 0; + struct ww_acquire_ctx ww_ctx; + int ret; /* PM_SUSPEND_ON is handled by regulator_set_voltage() */ if (regulator_check_states(state) || state == PM_SUSPEND_ON) return -EINVAL; - regulator_lock_dependent(regulator->rdev); + regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = _regulator_set_suspend_voltage(regulator, min_uV, max_uV, state); - regulator_unlock_dependent(regulator->rdev); + regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } @@ -3759,13 +3872,12 @@ static int _regulator_get_voltage(struct regulator_dev *rdev) */ int regulator_get_voltage(struct regulator *regulator) { + struct ww_acquire_ctx ww_ctx; int ret; - regulator_lock_dependent(regulator->rdev); - + regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = _regulator_get_voltage(regulator->rdev); - - regulator_unlock_dependent(regulator->rdev); + regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } @@ -4301,7 +4413,7 @@ EXPORT_SYMBOL_GPL(regulator_bulk_free); int regulator_notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { - lockdep_assert_held_once(&rdev->mutex); + lockdep_assert_held_once(&rdev->mutex.base); _notifier_call_chain(rdev, event, data); return NOTIFY_DONE; @@ -4669,7 +4781,7 @@ regulator_register(const struct regulator_desc *regulator_desc, rdev->dev.of_node = of_node_get(config->of_node); } - mutex_init(&rdev->mutex); + ww_mutex_init(&rdev->mutex, ®ulator_ww_class); rdev->reg_data = config->driver_data; rdev->owner = regulator_desc->owner; rdev->desc = regulator_desc; @@ -5026,8 +5138,6 @@ static void regulator_summary_show_subtree(struct seq_file *s, if (!rdev) return; - regulator_lock_nested(rdev, level); - opmode = _regulator_get_mode_unlocked(rdev); seq_printf(s, "%*s%-*s %3d %4d %6d %7s ", level * 3 + 1, "", @@ -5084,8 +5194,101 @@ static void regulator_summary_show_subtree(struct seq_file *s, class_for_each_device(®ulator_class, NULL, &summary_data, regulator_summary_show_children); +} + +struct summary_lock_data { + struct ww_acquire_ctx *ww_ctx; + struct regulator_dev **new_contended_rdev; + struct regulator_dev **old_contended_rdev; +}; + +static int regulator_summary_lock_one(struct device *dev, void *data) +{ + struct regulator_dev *rdev = dev_to_rdev(dev); + struct summary_lock_data *lock_data = data; + int ret = 0; + + if (rdev != *lock_data->old_contended_rdev) { + ret = regulator_lock_nested(rdev, lock_data->ww_ctx); + + if (ret == -EDEADLK) + *lock_data->new_contended_rdev = rdev; + else + WARN_ON_ONCE(ret); + } else { + *lock_data->old_contended_rdev = NULL; + } + + return ret; +} + +static int regulator_summary_unlock_one(struct device *dev, void *data) +{ + struct regulator_dev *rdev = dev_to_rdev(dev); + struct summary_lock_data *lock_data = data; + + if (lock_data) { + if (rdev == *lock_data->new_contended_rdev) + return -EDEADLK; + } regulator_unlock(rdev); + + return 0; +} + +static int regulator_summary_lock_all(struct ww_acquire_ctx *ww_ctx, + struct regulator_dev **new_contended_rdev, + struct regulator_dev **old_contended_rdev) +{ + struct summary_lock_data lock_data; + int ret; + + lock_data.ww_ctx = ww_ctx; + lock_data.new_contended_rdev = new_contended_rdev; + lock_data.old_contended_rdev = old_contended_rdev; + + ret = class_for_each_device(®ulator_class, NULL, &lock_data, + regulator_summary_lock_one); + if (ret) + class_for_each_device(®ulator_class, NULL, &lock_data, + regulator_summary_unlock_one); + + return ret; +} + +static void regulator_summary_lock(struct ww_acquire_ctx *ww_ctx) +{ + struct regulator_dev *new_contended_rdev = NULL; + struct regulator_dev *old_contended_rdev = NULL; + int err; + + ww_acquire_init(ww_ctx, ®ulator_ww_class); + + do { + if (new_contended_rdev) { + ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx); + old_contended_rdev = new_contended_rdev; + old_contended_rdev->ref_cnt++; + } + + err = regulator_summary_lock_all(ww_ctx, + &new_contended_rdev, + &old_contended_rdev); + + if (old_contended_rdev) + regulator_unlock(old_contended_rdev); + + } while (err == -EDEADLK); + + ww_acquire_done(ww_ctx); +} + +static void regulator_summary_unlock(struct ww_acquire_ctx *ww_ctx) +{ + class_for_each_device(®ulator_class, NULL, NULL, + regulator_summary_unlock_one); + ww_acquire_fini(ww_ctx); } static int regulator_summary_show_roots(struct device *dev, void *data) @@ -5101,12 +5304,18 @@ static int regulator_summary_show_roots(struct device *dev, void *data) static int regulator_summary_show(struct seq_file *s, void *data) { + struct ww_acquire_ctx ww_ctx; + seq_puts(s, " regulator use open bypass opmode voltage current min max\n"); seq_puts(s, "---------------------------------------------------------------------------------------\n"); + regulator_summary_lock(&ww_ctx); + class_for_each_device(®ulator_class, NULL, s, regulator_summary_show_roots); + regulator_summary_unlock(&ww_ctx); + return 0; } diff --git a/drivers/regulator/da9210-regulator.c b/drivers/regulator/da9210-regulator.c index d0496d6b0934..84dba64ed11e 100644 --- a/drivers/regulator/da9210-regulator.c +++ b/drivers/regulator/da9210-regulator.c @@ -131,7 +131,7 @@ static irqreturn_t da9210_irq_handler(int irq, void *data) if (error < 0) goto error_i2c; - mutex_lock(&chip->rdev->mutex); + regulator_lock(chip->rdev); if (val & DA9210_E_OVCURR) { regulator_notifier_call_chain(chip->rdev, @@ -157,7 +157,7 @@ static irqreturn_t da9210_irq_handler(int irq, void *data) handled |= DA9210_E_VMAX; } - mutex_unlock(&chip->rdev->mutex); + regulator_unlock(chip->rdev); if (handled) { /* Clear handled events */ diff --git a/drivers/regulator/stpmic1_regulator.c b/drivers/regulator/stpmic1_regulator.c index e15634edb8ce..eac0848a78c7 100644 --- a/drivers/regulator/stpmic1_regulator.c +++ b/drivers/regulator/stpmic1_regulator.c @@ -489,14 +489,14 @@ static irqreturn_t stpmic1_curlim_irq_handler(int irq, void *data) { struct regulator_dev *rdev = (struct regulator_dev *)data; - mutex_lock(&rdev->mutex); + regulator_lock(rdev, NULL); /* Send an overcurrent notification */ regulator_notifier_call_chain(rdev, REGULATOR_EVENT_OVER_CURRENT, NULL); - mutex_unlock(&rdev->mutex); + regulator_unlock(rdev); return IRQ_HANDLED; } diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c index 8ad11b074b49..a1c7dfee5c37 100644 --- a/drivers/regulator/wm8350-regulator.c +++ b/drivers/regulator/wm8350-regulator.c @@ -1153,7 +1153,7 @@ static irqreturn_t pmic_uv_handler(int irq, void *data) { struct regulator_dev *rdev = (struct regulator_dev *)data; - mutex_lock(&rdev->mutex); + regulator_lock(rdev); if (irq == WM8350_IRQ_CS1 || irq == WM8350_IRQ_CS2) regulator_notifier_call_chain(rdev, REGULATOR_EVENT_REGULATION_OUT, @@ -1162,7 +1162,7 @@ static irqreturn_t pmic_uv_handler(int irq, void *data) regulator_notifier_call_chain(rdev, REGULATOR_EVENT_UNDER_VOLTAGE, NULL); - mutex_unlock(&rdev->mutex); + regulator_unlock(rdev); return IRQ_HANDLED; } diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index a05d37d0efa1..7065031f0846 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -20,6 +20,7 @@ #include #include #include +#include struct gpio_desc; struct regmap; @@ -462,7 +463,7 @@ struct regulator_dev { struct coupling_desc coupling_desc; struct blocking_notifier_head notifier; - struct mutex mutex; /* consumer lock */ + struct ww_mutex mutex; /* consumer lock */ struct task_struct *mutex_owner; int ref_cnt; struct module *owner; @@ -545,4 +546,7 @@ int regulator_set_active_discharge_regmap(struct regulator_dev *rdev, bool enable); void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data); +void regulator_lock(struct regulator_dev *rdev); +void regulator_unlock(struct regulator_dev *rdev); + #endif -- cgit v1.2.3 From 85f4d4b65fdd67f1d6dc9eeb1d91923cef07eb6a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Nov 2018 13:30:55 -0700 Subject: block: have ->poll_fn() return number of entries polled We currently only really support sync poll, ie poll with 1 IO in flight. This prepares us for supporting async poll. Note that the returned value isn't necessarily 100% accurate. If poll races with IRQ completion, we assume that the fact that the task is now runnable means we found at least one entry. In reality it could be more than 1, or not even 1. This is fine, the caller will just need to take this into account. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 18 +++++++++--------- drivers/nvme/host/multipath.c | 4 ++-- include/linux/blkdev.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 7fc4abb4cc36..52b1c97cd7c6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -38,7 +38,7 @@ #include "blk-mq-sched.h" #include "blk-rq-qos.h" -static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); +static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -3305,7 +3305,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, return true; } -static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) +static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) { struct request_queue *q = hctx->queue; long state; @@ -3318,7 +3318,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) * straight to the busy poll loop. */ if (blk_mq_poll_hybrid_sleep(q, hctx, rq)) - return true; + return 1; hctx->poll_considered++; @@ -3332,30 +3332,30 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) if (ret > 0) { hctx->poll_success++; __set_current_state(TASK_RUNNING); - return true; + return ret; } if (signal_pending_state(state, current)) __set_current_state(TASK_RUNNING); if (current->state == TASK_RUNNING) - return true; + return 1; if (ret < 0) break; cpu_relax(); } __set_current_state(TASK_RUNNING); - return false; + return 0; } -static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) +static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie) { struct blk_mq_hw_ctx *hctx; struct request *rq; if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - return false; + return 0; hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; if (!blk_qc_t_is_internal(cookie)) @@ -3369,7 +3369,7 @@ static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) * so we should be safe with just the NULL check. */ if (!rq) - return false; + return 0; } return __blk_mq_poll(hctx, rq); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 8b841f39734c..f9eeb3b58632 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -220,11 +220,11 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, return ret; } -static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) +static int nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) { struct nvme_ns_head *head = q->queuedata; struct nvme_ns *ns; - bool found = false; + int found = 0; int srcu_idx; srcu_idx = srcu_read_lock(&head->srcu); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1ad6eafc43f2..e97c0a3b2262 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,7 +283,7 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); +typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; typedef int (dma_drain_needed_fn)(struct request *); -- cgit v1.2.3 From 0fe3c7fceb500de2d0adfb9dcf292580cd43ea38 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Fri, 16 Nov 2018 12:16:35 -0500 Subject: audit: localize audit_log_session_info prototype The audit_log_session_info() function is only used in kernel/audit*, so move its prototype to kernel/audit.h Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 2 -- kernel/audit.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9334fbef7bae..58cf665f597e 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -115,8 +115,6 @@ extern int audit_classify_compat_syscall(int abi, unsigned syscall); struct filename; -extern void audit_log_session_info(struct audit_buffer *ab); - #define AUDIT_OFF 0 #define AUDIT_ON 1 #define AUDIT_LOCKED 2 diff --git a/kernel/audit.h b/kernel/audit.h index 214e14948370..9a3828bd387b 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -210,6 +210,8 @@ struct audit_context { extern bool audit_ever_enabled; +extern void audit_log_session_info(struct audit_buffer *ab); + extern void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, struct inode *inode); -- cgit v1.2.3 From 92f806d678e5136e4777b21e5ed5368482ac9ea9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Nov 2018 11:37:31 -0700 Subject: nvme-fc: remove ->poll implementation It's specifically looking for a given request, which we will not be supporting going forward. Also kill the qla2xxx poll implementation as that's the only user of the nvme-fc poll, and the now unused ->poll_queue() hook. Reviewed-by: Christoph Hellwig Reviewed-by: James Smart Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 33 --------------------------------- drivers/scsi/qla2xxx/qla_nvme.c | 12 ------------ include/linux/nvme-fc-driver.h | 1 - 3 files changed, 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 98c3c77f48f6..de797c641265 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2302,38 +2302,6 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); } -static struct blk_mq_tags * -nvme_fc_tagset(struct nvme_fc_queue *queue) -{ - if (queue->qnum == 0) - return queue->ctrl->admin_tag_set.tags[queue->qnum]; - - return queue->ctrl->tag_set.tags[queue->qnum - 1]; -} - -static int -nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) - -{ - struct nvme_fc_queue *queue = hctx->driver_data; - struct nvme_fc_ctrl *ctrl = queue->ctrl; - struct request *req; - struct nvme_fc_fcp_op *op; - - req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) - return 0; - - op = blk_mq_rq_to_pdu(req); - - if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && - (ctrl->lport->ops->poll_queue)) - ctrl->lport->ops->poll_queue(&ctrl->lport->localport, - queue->lldd_handle); - - return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); -} - static void nvme_fc_submit_async_event(struct nvme_ctrl *arg) { @@ -2404,7 +2372,6 @@ static const struct blk_mq_ops nvme_fc_mq_ops = { .init_request = nvme_fc_init_request, .exit_request = nvme_fc_exit_request, .init_hctx = nvme_fc_init_hctx, - .poll = nvme_fc_poll, .timeout = nvme_fc_timeout, }; diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 7e78e7eff783..fccc733145fc 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -272,17 +272,6 @@ static void qla_nvme_fcp_abort(struct nvme_fc_local_port *lport, schedule_work(&priv->abort_work); } -static void qla_nvme_poll(struct nvme_fc_local_port *lport, void *hw_queue_handle) -{ - struct qla_qpair *qpair = hw_queue_handle; - unsigned long flags; - struct scsi_qla_host *vha = lport->private; - - spin_lock_irqsave(&qpair->qp_lock, flags); - qla24xx_process_response_queue(vha, qpair->rsp); - spin_unlock_irqrestore(&qpair->qp_lock, flags); -} - static inline int qla2x00_start_nvme_mq(srb_t *sp) { unsigned long flags; @@ -578,7 +567,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = { .ls_abort = qla_nvme_ls_abort, .fcp_io = qla_nvme_post_cmd, .fcp_abort = qla_nvme_fcp_abort, - .poll_queue = qla_nvme_poll, .max_hw_queues = 8, .max_sgl_segments = 128, .max_dif_sgl_segments = 64, diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 496ff759f84c..f4ab3b1925ac 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -403,7 +403,6 @@ struct nvme_fc_port_template { void **handle); void (*delete_queue)(struct nvme_fc_local_port *, unsigned int qidx, void *handle); - void (*poll_queue)(struct nvme_fc_local_port *, void *handle); int (*ls_req)(struct nvme_fc_local_port *, struct nvme_fc_remote_port *, struct nvmefc_ls_req *); -- cgit v1.2.3 From e2b3fa5af70c1e646270f6c7c799414f5e904d7a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:34 +0900 Subject: block: Remove bio->bi_ioc bio->bi_ioc is never set so always NULL. Remove references to it in bio_disassociate_task() and in rq_ioc() and delete this field from struct bio. With this change, rq_ioc() always returns current->io_context without the need for a bio argument. Further simplify the code and make it more readable by also removing this helper, which also allows to simplify blk_mq_sched_assign_ioc() by removing its bio argument. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Adam Manzanares Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/bio.c | 4 ---- block/blk-core.c | 2 +- block/blk-mq-sched.c | 4 ++-- block/blk-mq-sched.h | 2 +- block/blk-mq.c | 4 ++-- block/blk.h | 16 ---------------- include/linux/blk_types.h | 3 +-- 7 files changed, 7 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 4f4d9884443b..03895cc0d74a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2027,10 +2027,6 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) */ void bio_disassociate_task(struct bio *bio) { - if (bio->bi_ioc) { - put_io_context(bio->bi_ioc); - bio->bi_ioc = NULL; - } if (bio->bi_css) { css_put(bio->bi_css); bio->bi_css = NULL; diff --git a/block/blk-core.c b/block/blk-core.c index d6e8ab9ca99d..492648c96992 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -813,7 +813,7 @@ out: void blk_init_request_from_bio(struct request *req, struct bio *bio) { - struct io_context *ioc = rq_ioc(bio); + struct io_context *ioc = current->io_context; if (bio->bi_opf & REQ_RAHEAD) req->cmd_flags |= REQ_FAILFAST_MASK; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d084f731d104..13b8dc332541 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -31,10 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); -void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) +void blk_mq_sched_assign_ioc(struct request *rq) { struct request_queue *q = rq->q; - struct io_context *ioc = rq_ioc(bio); + struct io_context *ioc = current->io_context; struct io_cq *icq; spin_lock_irq(&q->queue_lock); diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 7ff5671bf128..0f719c8532ae 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -8,7 +8,7 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q, void (*exit)(struct blk_mq_hw_ctx *)); -void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio); +void blk_mq_sched_assign_ioc(struct request *rq); void blk_mq_sched_request_inserted(struct request *rq); bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, diff --git a/block/blk-mq.c b/block/blk-mq.c index 52b1c97cd7c6..174384eaace7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -389,8 +389,8 @@ static struct request *blk_mq_get_request(struct request_queue *q, if (!op_is_flush(data->cmd_flags)) { rq->elv.icq = NULL; if (e && e->type->ops.prepare_request) { - if (e->type->icq_cache && rq_ioc(bio)) - blk_mq_sched_assign_ioc(rq, bio); + if (e->type->icq_cache) + blk_mq_sched_assign_ioc(rq); e->type->ops.prepare_request(rq, bio); rq->rq_flags |= RQF_ELVPRIV; diff --git a/block/blk.h b/block/blk.h index 816a9abb87cd..610948157a5b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -254,22 +254,6 @@ void ioc_clear_queue(struct request_queue *q); int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); -/** - * rq_ioc - determine io_context for request allocation - * @bio: request being allocated is for this bio (can be %NULL) - * - * Determine io_context to use for request allocation for @bio. May return - * %NULL if %current->io_context doesn't exist. - */ -static inline struct io_context *rq_ioc(struct bio *bio) -{ -#ifdef CONFIG_BLK_CGROUP - if (bio && bio->bi_ioc) - return bio->bi_ioc; -#endif - return current->io_context; -} - /** * create_io_context - try to create task->io_context * @gfp_mask: allocation mask diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dbdbfbd6a987..c0ba1a038ff3 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -174,10 +174,9 @@ struct bio { void *bi_private; #ifdef CONFIG_BLK_CGROUP /* - * Optional ioc and css associated with this bio. Put on bio + * Optional css associated with this bio. Put on bio * release. Read comment on top of bio_associate_current(). */ - struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; -- cgit v1.2.3 From 64845a1ddd655574886eb48e9a5eaeeb9b05bf0d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:35 +0900 Subject: block: Introduce get_current_ioprio() Define get_current_ioprio() as an inline helper to obtain the caller I/O priority from its task I/O context. Use this helper in blk_init_request_from_bio() to set a request ioprio. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +----- include/linux/ioprio.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 492648c96992..4450d3c08f25 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -813,18 +813,14 @@ out: void blk_init_request_from_bio(struct request *req, struct bio *bio) { - struct io_context *ioc = current->io_context; - if (bio->bi_opf & REQ_RAHEAD) req->cmd_flags |= REQ_FAILFAST_MASK; req->__sector = bio->bi_iter.bi_sector; if (ioprio_valid(bio_prio(bio))) req->ioprio = bio_prio(bio); - else if (ioc) - req->ioprio = ioc->ioprio; else - req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + req->ioprio = get_current_ioprio(); req->write_hint = bio->bi_write_hint; blk_rq_bio_prep(req->q, req, bio); } diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 9e30ed6443db..e9bfe6972aed 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -70,6 +70,19 @@ static inline int task_nice_ioclass(struct task_struct *task) return IOPRIO_CLASS_BE; } +/* + * If the calling process has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + */ +static inline int get_current_ioprio(void) +{ + struct io_context *ioc = current->io_context; + + if (ioc) + return ioc->ioprio; + return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); +} + /* * For inheritance, return the highest of the two given priorities */ -- cgit v1.2.3 From 20578bdfd0418efb11ec316229e670d085cd574a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:38 +0900 Subject: block: Initialize BIO I/O priority early For the synchronous I/O path case (read(), write() etc system calls), a BIO I/O priority is not initialized until the execution of blk_init_request_from_bio() when the BIO is submitted and a request initialized for the BIO execution. This is due to the ki_ioprio field of the struct kiocb defined on stack being always initialized to IOPRIO_CLASS_NONE, regardless of the calling process I/O context ioprio value set with ioprio_set(). This late initialization can result in the BIO being merged to pending requests even when the I/O priorities differ. Fix this by initializing the ki_iopriority field of on stack struct kiocb using the get_current_ioprio() helper, ensuring that all BIOs allocated and submitted for the system call execution see the correct intended I/O priority early. With this, since a BIO I/O priority is always set to the intended effective value for both the sync and async path, blk_init_request_from_bio() can be simplified. Reviewed-by: Christoph Hellwig Reviewed-by: Adam Manzanares Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +---- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index dde30b08aa14..04f5be473638 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -814,10 +814,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_FAILFAST_MASK; req->__sector = bio->bi_iter.bi_sector; - if (ioprio_valid(bio_prio(bio))) - req->ioprio = bio_prio(bio); - else - req->ioprio = get_current_ioprio(); + req->ioprio = bio_prio(bio); req->write_hint = bio->bi_write_hint; blk_rq_bio_prep(req->q, req, bio); } diff --git a/include/linux/fs.h b/include/linux/fs.h index c95c0807471f..a1ab233e6469 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2021,7 +2021,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) .ki_filp = filp, .ki_flags = iocb_flags(filp), .ki_hint = ki_hint_validate(file_write_hint(filp)), - .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0), + .ki_ioprio = get_current_ioprio(), }; } -- cgit v1.2.3 From 890d8d23ec3c9eca847be0593c0cf5f650b97271 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 19 Nov 2018 15:21:42 -0800 Subject: net: sched: gred: add basic Qdisc offload Add basic offload for the GRED Qdisc. Inform the drivers any time Qdisc or virtual queue configuration changes. Signed-off-by: Jakub Kicinski Reviewed-by: John Hurley Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 36 ++++++++++++++++++++++++++++++++++++ net/sched/sch_gred.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 086e64d88597..4b4207ebd5c0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -846,6 +846,7 @@ enum tc_setup_type { TC_SETUP_QDISC_MQ, TC_SETUP_QDISC_ETF, TC_SETUP_ROOT_QDISC, + TC_SETUP_QDISC_GRED, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c497ada7f591..c9198797aaed 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -868,6 +868,42 @@ struct tc_red_qopt_offload { }; }; +enum tc_gred_command { + TC_GRED_REPLACE, + TC_GRED_DESTROY, +}; + +struct tc_gred_vq_qopt_offload_params { + bool present; + u32 limit; + u32 prio; + u32 min; + u32 max; + bool is_ecn; + bool is_harddrop; + u32 probability; + /* Only need backlog, see struct tc_prio_qopt_offload_params */ + u32 *backlog; +}; + +struct tc_gred_qopt_offload_params { + bool grio_on; + bool wred_on; + unsigned int dp_cnt; + unsigned int dp_def; + struct gnet_stats_queue *qstats; + struct tc_gred_vq_qopt_offload_params tab[MAX_DPs]; +}; + +struct tc_gred_qopt_offload { + enum tc_gred_command command; + u32 handle; + u32 parent; + union { + struct tc_gred_qopt_offload_params set; + }; +}; + enum tc_prio_command { TC_PRIO_REPLACE, TC_PRIO_DESTROY, diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 8b8c325f48bc..908c9d1dfdf8 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -311,6 +312,48 @@ static void gred_reset(struct Qdisc *sch) } } +static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) +{ + struct gred_sched *table = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_gred_qopt_offload opt = { + .command = command, + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + if (command == TC_GRED_REPLACE) { + unsigned int i; + + opt.set.grio_on = gred_rio_mode(table); + opt.set.wred_on = gred_wred_mode(table); + opt.set.dp_cnt = table->DPs; + opt.set.dp_def = table->def; + + for (i = 0; i < table->DPs; i++) { + struct gred_sched_data *q = table->tab[i]; + + if (!q) + continue; + opt.set.tab[i].present = true; + opt.set.tab[i].limit = q->limit; + opt.set.tab[i].prio = q->prio; + opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; + opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; + opt.set.tab[i].is_ecn = gred_use_ecn(q); + opt.set.tab[i].is_harddrop = gred_use_harddrop(q); + opt.set.tab[i].probability = q->parms.max_P; + opt.set.tab[i].backlog = &q->backlog; + } + opt.set.qstats = &sch->qstats; + } + + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt); +} + static inline void gred_destroy_vq(struct gred_sched_data *q) { kfree(q); @@ -385,6 +428,7 @@ static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps, } } + gred_offload(sch, TC_GRED_REPLACE); return 0; } @@ -630,6 +674,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_unlock(sch); kfree(prealloc); + + gred_offload(sch, TC_GRED_REPLACE); return 0; err_unlock_free: @@ -815,6 +861,7 @@ static void gred_destroy(struct Qdisc *sch) if (table->tab[i]) gred_destroy_vq(table->tab[i]); } + gred_offload(sch, TC_GRED_DESTROY); } static struct Qdisc_ops gred_qdisc_ops __read_mostly = { -- cgit v1.2.3 From f1abf67217de91f5cd3c757ae857632ca565099a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 16 Nov 2018 19:19:30 -0800 Subject: regulator: Fix return value of _set_load() stub The stub implementation of _set_load() returns a mode value which is within the bounds of valid return codes for success (the documentation just says that failures are negative error codes) but not sensible or what the actual implementation does. Fix it to just return 0. Reported-by: Cheng-Yi Chiang Signed-off-by: Mark Brown Reviewed-by: Douglas Anderson Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 25602afd4844..f3f76051e8b0 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -508,7 +508,7 @@ static inline int regulator_get_error_flags(struct regulator *regulator, static inline int regulator_set_load(struct regulator *regulator, int load_uA) { - return REGULATOR_MODE_NORMAL; + return 0; } static inline int regulator_allow_bypass(struct regulator *regulator, -- cgit v1.2.3 From 01598ba6b1a863fbd819fc5c36c27886e5072164 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 11 Nov 2018 18:48:44 +0200 Subject: docs/mm: update kmalloc kernel-doc description Add references to GFP documentation and the memory-allocation.rst and remove GFP_USER, GFP_DMA and GFP_NOIO descriptions. While on it slightly change the formatting so that the list of GFP flags will be rendered as "description" in the generated html. Signed-off-by: Mike Rapoport Signed-off-by: Jonathan Corbet --- Documentation/core-api/memory-allocation.rst | 2 + include/linux/slab.h | 55 ++++++++++++++-------------- 2 files changed, 29 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst index f8bb9aa120c4..39f35ebdc82f 100644 --- a/Documentation/core-api/memory-allocation.rst +++ b/Documentation/core-api/memory-allocation.rst @@ -1,3 +1,5 @@ +.. _memory_allocation: + ======================= Memory Allocation Guide ======================= diff --git a/include/linux/slab.h b/include/linux/slab.h index 918f374e7156..4a342eb488f6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -486,48 +486,47 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. * - * The @flags argument may be one of: + * The @flags argument may be one of the GFP flags defined at + * include/linux/gfp.h and described at + * :ref:`Documentation/core-api/mm-api.rst ` * - * %GFP_USER - Allocate memory on behalf of user. May sleep. + * The recommended usage of the @flags is described at + * :ref:`Documentation/core-api/memory-allocation.rst ` * - * %GFP_KERNEL - Allocate normal kernel ram. May sleep. + * Below is a brief outline of the most useful GFP flags * - * %GFP_ATOMIC - Allocation will not sleep. May use emergency pools. - * For example, use this inside interrupt handlers. + * %GFP_KERNEL + * Allocate normal kernel ram. May sleep. * - * %GFP_HIGHUSER - Allocate pages from high memory. + * %GFP_NOWAIT + * Allocation will not sleep. * - * %GFP_NOIO - Do not do any I/O at all while trying to get memory. + * %GFP_ATOMIC + * Allocation will not sleep. May use emergency pools. * - * %GFP_NOFS - Do not make any fs calls while trying to get memory. - * - * %GFP_NOWAIT - Allocation will not sleep. - * - * %__GFP_THISNODE - Allocate node-local memory only. - * - * %GFP_DMA - Allocation suitable for DMA. - * Should only be used for kmalloc() caches. Otherwise, use a - * slab created with SLAB_DMA. + * %GFP_HIGHUSER + * Allocate memory from high memory on behalf of user. * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * - * %__GFP_HIGH - This allocation has high priority and may use emergency pools. - * - * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail - * (think twice before using). + * %__GFP_HIGH + * This allocation has high priority and may use emergency pools. * - * %__GFP_NORETRY - If memory is not immediately available, - * then give up at once. + * %__GFP_NOFAIL + * Indicate that this allocation is in no way allowed to fail + * (think twice before using). * - * %__GFP_NOWARN - If allocation fails, don't issue any warnings. + * %__GFP_NORETRY + * If memory is not immediately available, + * then give up at once. * - * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail - * eventually. + * %__GFP_NOWARN + * If allocation fails, don't issue any warnings. * - * There are other flags available as well, but these are not intended - * for general use, and so are not documented here. For a full list of - * potential flags, always refer to linux/gfp.h. + * %__GFP_RETRY_MAYFAIL + * Try really hard to succeed the allocation but fail + * eventually. */ static __always_inline void *kmalloc(size_t size, gfp_t flags) { -- cgit v1.2.3 From 6afe76a6723975391d06c42a422370a588395f84 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 6 Nov 2018 17:05:30 +0100 Subject: spi: spi-mem: Add missing word in the SPI_MEM_DATA_OUT description Missing 'to' in the SPI_MEM_DATA_OUT description. Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal Signed-off-by: Mark Brown --- include/linux/spi/spi-mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 69ee30456864..867839cc69a7 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -58,7 +58,7 @@ * enum spi_mem_data_dir - describes the direction of a SPI memory data * transfer from the controller perspective * @SPI_MEM_DATA_IN: data coming from the SPI memory - * @SPI_MEM_DATA_OUT: data sent the SPI memory + * @SPI_MEM_DATA_OUT: data sent to the SPI memory */ enum spi_mem_data_dir { SPI_MEM_DATA_IN, -- cgit v1.2.3 From 0ebb261a0b2d090de618a383d2378d4a00834958 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 6 Nov 2018 17:05:31 +0100 Subject: spi: spi-mem: Add SPI_MEM_NO_DATA to the spi_mem_data_dir enum When defining spi_mem_op templates we don't necessarily know the size that will be passed when the template is actually used, and basing the supports_op() check on op->data.nbytes to know whether there will be data transferred for a specific operation is this not possible. Add SPI_MEM_NO_DATA to the spi_mem_data_dir enum so that we can base our checks on op->data.dir instead of op->data.nbytes. Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 2 +- include/linux/spi/spi-mem.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 62a7b80801d2..967f581bca4f 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -142,7 +142,7 @@ static bool spi_mem_default_supports_op(struct spi_mem *mem, spi_check_buswidth_req(mem, op->dummy.buswidth, true)) return false; - if (op->data.nbytes && + if (op->data.dir != SPI_MEM_NO_DATA && spi_check_buswidth_req(mem, op->data.buswidth, op->data.dir == SPI_MEM_DATA_OUT)) return false; diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 867839cc69a7..250b6f5c47c2 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -57,10 +57,12 @@ /** * enum spi_mem_data_dir - describes the direction of a SPI memory data * transfer from the controller perspective + * @SPI_MEM_NO_DATA: no data transferred * @SPI_MEM_DATA_IN: data coming from the SPI memory * @SPI_MEM_DATA_OUT: data sent to the SPI memory */ enum spi_mem_data_dir { + SPI_MEM_NO_DATA, SPI_MEM_DATA_IN, SPI_MEM_DATA_OUT, }; -- cgit v1.2.3 From aa167f3fed0c37e0e4c707d4331d827661f46644 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 6 Nov 2018 17:05:33 +0100 Subject: spi: spi-mem: Add a new API to support direct mapping Most modern SPI controllers can directly map a SPI memory (or a portion of the SPI memory) in the CPU address space. Most of the time this brings significant performance improvements as it automates the whole process of sending SPI memory operations every time a new region is accessed. This new API allows SPI memory drivers to create direct mappings and then use them to access the memory instead of using spi_mem_exec_op(). Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 204 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/spi/spi-mem.h | 80 +++++++++++++++++ 2 files changed, 284 insertions(+) (limited to 'include/linux') diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index 7916e655afc8..b12a7974b665 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -432,6 +432,210 @@ int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op) } EXPORT_SYMBOL_GPL(spi_mem_adjust_op_size); +static ssize_t spi_mem_no_dirmap_read(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, void *buf) +{ + struct spi_mem_op op = desc->info.op_tmpl; + int ret; + + op.addr.val = desc->info.offset + offs; + op.data.buf.in = buf; + op.data.nbytes = len; + ret = spi_mem_adjust_op_size(desc->mem, &op); + if (ret) + return ret; + + ret = spi_mem_exec_op(desc->mem, &op); + if (ret) + return ret; + + return op.data.nbytes; +} + +static ssize_t spi_mem_no_dirmap_write(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, const void *buf) +{ + struct spi_mem_op op = desc->info.op_tmpl; + int ret; + + op.addr.val = desc->info.offset + offs; + op.data.buf.out = buf; + op.data.nbytes = len; + ret = spi_mem_adjust_op_size(desc->mem, &op); + if (ret) + return ret; + + ret = spi_mem_exec_op(desc->mem, &op); + if (ret) + return ret; + + return op.data.nbytes; +} + +/** + * spi_mem_dirmap_create() - Create a direct mapping descriptor + * @mem: SPI mem device this direct mapping should be created for + * @info: direct mapping information + * + * This function is creating a direct mapping descriptor which can then be used + * to access the memory using spi_mem_dirmap_read() or spi_mem_dirmap_write(). + * If the SPI controller driver does not support direct mapping, this function + * fallback to an implementation using spi_mem_exec_op(), so that the caller + * doesn't have to bother implementing a fallback on his own. + * + * Return: a valid pointer in case of success, and ERR_PTR() otherwise. + */ +struct spi_mem_dirmap_desc * +spi_mem_dirmap_create(struct spi_mem *mem, + const struct spi_mem_dirmap_info *info) +{ + struct spi_controller *ctlr = mem->spi->controller; + struct spi_mem_dirmap_desc *desc; + int ret = -ENOTSUPP; + + /* Make sure the number of address cycles is between 1 and 8 bytes. */ + if (!info->op_tmpl.addr.nbytes || info->op_tmpl.addr.nbytes > 8) + return ERR_PTR(-EINVAL); + + /* data.dir should either be SPI_MEM_DATA_IN or SPI_MEM_DATA_OUT. */ + if (info->op_tmpl.data.dir == SPI_MEM_NO_DATA) + return ERR_PTR(-EINVAL); + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + return ERR_PTR(-ENOMEM); + + desc->mem = mem; + desc->info = *info; + if (ctlr->mem_ops && ctlr->mem_ops->dirmap_create) + ret = ctlr->mem_ops->dirmap_create(desc); + + if (ret) { + desc->nodirmap = true; + if (!spi_mem_supports_op(desc->mem, &desc->info.op_tmpl)) + ret = -ENOTSUPP; + else + ret = 0; + } + + if (ret) { + kfree(desc); + return ERR_PTR(ret); + } + + return desc; +} +EXPORT_SYMBOL_GPL(spi_mem_dirmap_create); + +/** + * spi_mem_dirmap_destroy() - Destroy a direct mapping descriptor + * @desc: the direct mapping descriptor to destroy + * @info: direct mapping information + * + * This function destroys a direct mapping descriptor previously created by + * spi_mem_dirmap_create(). + */ +void spi_mem_dirmap_destroy(struct spi_mem_dirmap_desc *desc) +{ + struct spi_controller *ctlr = desc->mem->spi->controller; + + if (!desc->nodirmap && ctlr->mem_ops && ctlr->mem_ops->dirmap_destroy) + ctlr->mem_ops->dirmap_destroy(desc); +} +EXPORT_SYMBOL_GPL(spi_mem_dirmap_destroy); + +/** + * spi_mem_dirmap_dirmap_read() - Read data through a direct mapping + * @desc: direct mapping descriptor + * @offs: offset to start reading from. Note that this is not an absolute + * offset, but the offset within the direct mapping which already has + * its own offset + * @len: length in bytes + * @buf: destination buffer. This buffer must be DMA-able + * + * This function reads data from a memory device using a direct mapping + * previously instantiated with spi_mem_dirmap_create(). + * + * Return: the amount of data read from the memory device or a negative error + * code. Note that the returned size might be smaller than @len, and the caller + * is responsible for calling spi_mem_dirmap_read() again when that happens. + */ +ssize_t spi_mem_dirmap_read(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, void *buf) +{ + struct spi_controller *ctlr = desc->mem->spi->controller; + ssize_t ret; + + if (desc->info.op_tmpl.data.dir != SPI_MEM_DATA_IN) + return -EINVAL; + + if (!len) + return 0; + + if (desc->nodirmap) { + ret = spi_mem_no_dirmap_read(desc, offs, len, buf); + } else if (ctlr->mem_ops && ctlr->mem_ops->dirmap_read) { + ret = spi_mem_access_start(desc->mem); + if (ret) + return ret; + + ret = ctlr->mem_ops->dirmap_read(desc, offs, len, buf); + + spi_mem_access_end(desc->mem); + } else { + ret = -ENOTSUPP; + } + + return ret; +} +EXPORT_SYMBOL_GPL(spi_mem_dirmap_read); + +/** + * spi_mem_dirmap_dirmap_write() - Write data through a direct mapping + * @desc: direct mapping descriptor + * @offs: offset to start writing from. Note that this is not an absolute + * offset, but the offset within the direct mapping which already has + * its own offset + * @len: length in bytes + * @buf: source buffer. This buffer must be DMA-able + * + * This function writes data to a memory device using a direct mapping + * previously instantiated with spi_mem_dirmap_create(). + * + * Return: the amount of data written to the memory device or a negative error + * code. Note that the returned size might be smaller than @len, and the caller + * is responsible for calling spi_mem_dirmap_write() again when that happens. + */ +ssize_t spi_mem_dirmap_write(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, const void *buf) +{ + struct spi_controller *ctlr = desc->mem->spi->controller; + ssize_t ret; + + if (desc->info.op_tmpl.data.dir != SPI_MEM_DATA_OUT) + return -EINVAL; + + if (!len) + return 0; + + if (desc->nodirmap) { + ret = spi_mem_no_dirmap_write(desc, offs, len, buf); + } else if (ctlr->mem_ops && ctlr->mem_ops->dirmap_write) { + ret = spi_mem_access_start(desc->mem); + if (ret) + return ret; + + ret = ctlr->mem_ops->dirmap_write(desc, offs, len, buf); + + spi_mem_access_end(desc->mem); + } else { + ret = -ENOTSUPP; + } + + return ret; +} +EXPORT_SYMBOL_GPL(spi_mem_dirmap_write); + static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv) { return container_of(drv, struct spi_mem_driver, spidrv.driver); diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 250b6f5c47c2..3fe24500c5ee 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -124,6 +124,49 @@ struct spi_mem_op { .data = __data, \ } +/** + * struct spi_mem_dirmap_info - Direct mapping information + * @op_tmpl: operation template that should be used by the direct mapping when + * the memory device is accessed + * @offset: absolute offset this direct mapping is pointing to + * @length: length in byte of this direct mapping + * + * These information are used by the controller specific implementation to know + * the portion of memory that is directly mapped and the spi_mem_op that should + * be used to access the device. + * A direct mapping is only valid for one direction (read or write) and this + * direction is directly encoded in the ->op_tmpl.data.dir field. + */ +struct spi_mem_dirmap_info { + struct spi_mem_op op_tmpl; + u64 offset; + u64 length; +}; + +/** + * struct spi_mem_dirmap_desc - Direct mapping descriptor + * @mem: the SPI memory device this direct mapping is attached to + * @info: information passed at direct mapping creation time + * @nodirmap: set to 1 if the SPI controller does not implement + * ->mem_ops->dirmap_create() or when this function returned an + * error. If @nodirmap is true, all spi_mem_dirmap_{read,write}() + * calls will use spi_mem_exec_op() to access the memory. This is a + * degraded mode that allows spi_mem drivers to use the same code + * no matter whether the controller supports direct mapping or not + * @priv: field pointing to controller specific data + * + * Common part of a direct mapping descriptor. This object is created by + * spi_mem_dirmap_create() and controller implementation of ->create_dirmap() + * can create/attach direct mapping resources to the descriptor in the ->priv + * field. + */ +struct spi_mem_dirmap_desc { + struct spi_mem *mem; + struct spi_mem_dirmap_info info; + unsigned int nodirmap; + void *priv; +}; + /** * struct spi_mem - describes a SPI memory device * @spi: the underlying SPI device @@ -179,10 +222,32 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem) * Note that if the implementation of this function allocates memory * dynamically, then it should do so with devm_xxx(), as we don't * have a ->free_name() function. + * @dirmap_create: create a direct mapping descriptor that can later be used to + * access the memory device. This method is optional + * @dirmap_destroy: destroy a memory descriptor previous created by + * ->dirmap_create() + * @dirmap_read: read data from the memory device using the direct mapping + * created by ->dirmap_create(). The function can return less + * data than requested (for example when the request is crossing + * the currently mapped area), and the caller of + * spi_mem_dirmap_read() is responsible for calling it again in + * this case. + * @dirmap_write: write data to the memory device using the direct mapping + * created by ->dirmap_create(). The function can return less + * data than requested (for example when the request is crossing + * the currently mapped area), and the caller of + * spi_mem_dirmap_write() is responsible for calling it again in + * this case. * * This interface should be implemented by SPI controllers providing an * high-level interface to execute SPI memory operation, which is usually the * case for QSPI controllers. + * + * Note on ->dirmap_{read,write}(): drivers should avoid accessing the direct + * mapping from the CPU because doing that can stall the CPU waiting for the + * SPI mem transaction to finish, and this will make real-time maintainers + * unhappy and might make your system less reactive. Instead, drivers should + * use DMA to access this direct mapping. */ struct spi_controller_mem_ops { int (*adjust_op_size)(struct spi_mem *mem, struct spi_mem_op *op); @@ -191,6 +256,12 @@ struct spi_controller_mem_ops { int (*exec_op)(struct spi_mem *mem, const struct spi_mem_op *op); const char *(*get_name)(struct spi_mem *mem); + int (*dirmap_create)(struct spi_mem_dirmap_desc *desc); + void (*dirmap_destroy)(struct spi_mem_dirmap_desc *desc); + ssize_t (*dirmap_read)(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, void *buf); + ssize_t (*dirmap_write)(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, const void *buf); }; /** @@ -251,6 +322,15 @@ int spi_mem_exec_op(struct spi_mem *mem, const char *spi_mem_get_name(struct spi_mem *mem); +struct spi_mem_dirmap_desc * +spi_mem_dirmap_create(struct spi_mem *mem, + const struct spi_mem_dirmap_info *info); +void spi_mem_dirmap_destroy(struct spi_mem_dirmap_desc *desc); +ssize_t spi_mem_dirmap_read(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, void *buf); +ssize_t spi_mem_dirmap_write(struct spi_mem_dirmap_desc *desc, + u64 offs, size_t len, const void *buf); + int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv, struct module *owner); -- cgit v1.2.3 From 1e86ace4c140fd5a693e266c9b23409358f25381 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:31 -0800 Subject: net/mlx5: EQ, Use the right place to store/read IRQ affinity hint Currently the cpu affinity hint mask for completion EQs is stored and read from the wrong place, since reading and storing is done from the same index, there is no actual issue with that, but internal irq_info for completion EQs stars at MLX5_EQ_VEC_COMP_BASE offset in irq_info array, this patch changes the code to use the correct offset to store and read the IRQ affinity hint. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 ++++++++------ include/linux/mlx5/driver.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1243edbedc9e..2839c30dd3a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1760,7 +1760,7 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) { - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); + return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28132c7dc05f..d5cea0a36e6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -640,18 +640,19 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) { struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); + priv->irq_info[vecidx].mask); if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); return 0; @@ -659,11 +660,12 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) { + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + int irq = pci_irq_vector(mdev->pdev, vecidx); irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); + free_cpumask_var(priv->irq_info[vecidx].mask); } static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aa5963b5d38e..7d4ed995b4ce 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1309,7 +1309,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - return dev->priv.irq_info[vector].mask; + return dev->priv.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 4de45c758636c37efd313589f91c739f613fbe7d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:32 -0800 Subject: net/mlx5: EQ, Remove unused fields and structures Some fields and structures are not referenced nor used by the driver, remove them. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 11 ----------- include/linux/mlx5/driver.h | 3 --- 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index aeab0c4f60f4..fd5926daa0a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -78,17 +78,6 @@ enum { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) -struct map_eq_in { - u64 mask; - u32 reserved; - u32 unmap_eqn; -}; - -struct cre_des_eq { - u8 reserved[15]; - u8 eqn; -}; - static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7d4ed995b4ce..15cf6727a62d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -398,7 +398,6 @@ struct mlx5_eq { unsigned int irqn; u8 eqn; int nent; - u64 mask; struct list_head list; int index; struct mlx5_rsc_debug *dbg; @@ -478,8 +477,6 @@ struct mlx5_core_srq { }; struct mlx5_eq_table { - void __iomem *update_ci; - void __iomem *update_arm_ci; struct list_head comp_eqs_list; struct mlx5_eq pages_eq; struct mlx5_eq async_eq; -- cgit v1.2.3 From 2883f352571b9b830561ca21b8a666936366a120 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:33 -0800 Subject: net/mlx5: EQ, No need to store eq index as a field eq->index is used only for completion EQs and is assigned to be the completion eq index, it is used only when traversing the completion eqs list, and it can be calculated dynamically, thus remove the eq->index field. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++-- include/linux/mlx5/driver.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d5cea0a36e6a..f5e6d375a8cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -702,10 +702,11 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; int err = -ENOENT; + int i = 0; spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - if (eq->index == vector) { + if (i++ == vector) { *eqn = eq->eqn; *irqn = eq->irqn; err = 0; @@ -797,7 +798,6 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - eq->index = i; spin_lock(&table->lock); list_add_tail(&eq->list, &table->comp_eqs_list); spin_unlock(&table->lock); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 15cf6727a62d..4b62d71825c1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -399,7 +399,6 @@ struct mlx5_eq { u8 eqn; int nent; struct list_head list; - int index; struct mlx5_rsc_debug *dbg; enum mlx5_eq_type type; union { -- cgit v1.2.3 From aaa553a64438640ee4e41a2c1027c3435a75c0e7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:34 -0800 Subject: net/mlx5: EQ, Remove redundant completion EQ list lock Completion EQs list is only modified on driver load/unload, locking is not required, remove it. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 +++-------------- include/linux/mlx5/driver.h | 3 --- 3 files changed, 3 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fd5926daa0a6..e75272503027 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -810,8 +810,6 @@ int mlx5_eq_init(struct mlx5_core_dev *dev) { int err; - spin_lock_init(&dev->priv.eq_table.lock); - err = mlx5_eq_debugfs_init(dev); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f5e6d375a8cc..f692c2a42130 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -704,7 +704,6 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int err = -ENOENT; int i = 0; - spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { *eqn = eq->eqn; @@ -713,7 +712,6 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, break; } } - spin_unlock(&table->lock); return err; } @@ -724,14 +722,11 @@ struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq; - spin_lock(&table->lock); - list_for_each_entry(eq, &table->comp_eqs_list, list) - if (eq->eqn == eqn) { - spin_unlock(&table->lock); + list_for_each_entry(eq, &table->comp_eqs_list, list) { + if (eq->eqn == eqn) return eq; - } + } - spin_unlock(&table->lock); return ERR_PTR(-ENOENT); } @@ -747,17 +742,13 @@ static void free_comp_eqs(struct mlx5_core_dev *dev) dev->rmap = NULL; } #endif - spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); - spin_unlock(&table->lock); if (mlx5_destroy_unmap_eq(dev, eq)) mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); kfree(eq); - spin_lock(&table->lock); } - spin_unlock(&table->lock); } static int alloc_comp_eqs(struct mlx5_core_dev *dev) @@ -798,9 +789,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - spin_lock(&table->lock); list_add_tail(&eq->list, &table->comp_eqs_list); - spin_unlock(&table->lock); } return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4b62d71825c1..852e397c7624 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -484,9 +484,6 @@ struct mlx5_eq_table { struct mlx5_eq pfault_eq; #endif int num_comp_vectors; - /* protect EQs list - */ - spinlock_t lock; }; struct mlx5_uars_page { -- cgit v1.2.3 From d674a9aa434409826b2408609be493739e61e6f6 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:37 -0800 Subject: net/mlx5: EQ, irq_info and rmap belong to eq_table irq_info and rmap are EQ properties of the driver, and only needed for EQ objects, move them to the eq_table EQs database structure. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +-- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 40 ++++++++++++----------- include/linux/mlx5/driver.h | 10 +++--- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2839c30dd3a0..32ea47c28324 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1760,7 +1760,7 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) { - return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); + return cpumask_first(priv->mdev->priv.eq_table.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, @@ -4960,7 +4960,7 @@ int mlx5e_netdev_init(struct net_device *netdev, netif_carrier_off(netdev); #ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->rmap; + netdev->rx_cpu_rmap = mdev->priv.eq_table.rmap; #endif return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 44ccd4206104..70f62f10065e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -694,7 +694,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_in; - snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + snprintf(priv->eq_table.irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); eq->eqn = MLX5_GET(create_eq_out, out, eq_number); @@ -702,7 +702,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, - priv->irq_info[vecidx].name, eq); + priv->eq_table.irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -952,17 +952,18 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; - if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[vecidx].mask); + irq_info->mask); if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask)) + irq_set_affinity_hint(irq, irq_info->mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); return 0; @@ -973,9 +974,10 @@ static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[vecidx].mask); + free_cpumask_var(irq_info->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) @@ -1014,9 +1016,9 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; } #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { @@ -1042,8 +1044,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ncomp_vec = table->num_comp_vectors; nent = MLX5_COMP_EQ_SIZE; #ifdef CONFIG_RFS_ACCEL - dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!dev->rmap) + table->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!table->rmap) return -ENOMEM; #endif for (i = 0; i < ncomp_vec; i++) { @@ -1056,7 +1058,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) } #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, vecidx)); + irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, vecidx, nent, 0, @@ -1126,9 +1128,9 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; } #endif list_for_each_entry(eq, &table->comp_eqs_list, list) @@ -1160,8 +1162,8 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->irq_info) + table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); + if (!table->irq_info) return -ENOMEM; nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, @@ -1176,7 +1178,7 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) return 0; err_free_irq_info: - kfree(priv->irq_info); + kfree(table->irq_info); return err; } @@ -1185,7 +1187,7 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) struct mlx5_priv *priv = &dev->priv; pci_free_irq_vectors(dev->pdev); - kfree(priv->irq_info); + kfree(priv->eq_table.irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 852e397c7624..dcc3f7aa8572 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -484,6 +484,10 @@ struct mlx5_eq_table { struct mlx5_eq pfault_eq; #endif int num_comp_vectors; + struct mlx5_irq_info *irq_info; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif }; struct mlx5_uars_page { @@ -640,7 +644,6 @@ struct mlx5_port_module_event_stats { struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; - struct mlx5_irq_info *irq_info; /* pages stuff */ struct workqueue_struct *pg_wq; @@ -851,9 +854,6 @@ struct mlx5_core_dev { } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; -#endif -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; #endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; @@ -1302,7 +1302,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - return dev->priv.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + return dev->priv.eq_table.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From f2f3df5501391bc784c8462dc97d989c2194fb74 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:38 -0800 Subject: net/mlx5: EQ, Privatize eq_table and friends Move unnecessary EQ table structures and declaration from the public include/linux/mlx5/driver.h into the private area of mlx5_core and into eq.c/eq.h. Introduce new mlx5 EQ APIs: mlx5_comp_vectors_count(dev); mlx5_comp_irq_get_affinity_mask(dev, vector); And use them from mlx5_ib or mlx5e netdevice instead of direct access to mlx5_core internal structures. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 5 +- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 5 +- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 102 ++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/health.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 77 ++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 15 --- include/linux/mlx5/driver.h | 87 +----------------- 12 files changed, 179 insertions(+), 135 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e9c428071df3..6fbc0cba1bac 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5337,7 +5337,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); + return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector); } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5701,8 +5701,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; - dev->ib_dev.num_comp_vectors = - dev->mdev->priv.eq_table.num_comp_vectors; + dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = &mdev->pdev->dev; mutex_init(&dev->cap_mask_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 4b85abb5c9f7..6e55d2f37c6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -38,6 +38,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" #define TASKLET_MAX_TIME 2 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) @@ -124,7 +125,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, goto err_cmd; /* Add to async EQ CQ tree to recv async events */ - err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq); if (err) goto err_cq_add; @@ -157,7 +158,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index b76766fb6c67..a11e22d0b0cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { QP_PID, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d7fbd5b6ac95..aea74856c702 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -178,8 +178,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); + min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } /* Use this function to get max num channels after netdev was created */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 32ea47c28324..c23caade31bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -49,6 +49,7 @@ #include "lib/clock.h" #include "en/port.h" #include "en/xdp.h" +#include "lib/eq.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -1758,11 +1759,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.eq_table.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1913,9 +1909,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); struct net_dim_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; unsigned int irq; int err; @@ -4960,7 +4956,7 @@ int mlx5e_netdev_init(struct net_device *netdev, netif_carrier_off(netdev); #ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->priv.eq_table.rmap; + netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev); #endif return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 70f62f10065e..32ce20221c44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -38,6 +38,7 @@ #include #endif #include "mlx5_core.h" +#include "lib/eq.h" #include "fpga/core.h" #include "eswitch.h" #include "lib/clock.h" @@ -65,6 +66,26 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_eq_table { + struct list_head comp_eqs_list; + struct mlx5_eq pages_eq; + struct mlx5_eq async_eq; + struct mlx5_eq cmd_eq; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct mlx5_eq pfault_eq; +#endif + int num_comp_vectors; + struct mlx5_irq_info *irq_info; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ @@ -633,10 +654,11 @@ static void init_eq_buf(struct mlx5_eq *eq) } } -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type) +static int +mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, enum mlx5_eq_type type) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; @@ -694,7 +716,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_in; - snprintf(priv->eq_table.irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); eq->eqn = MLX5_GET(create_eq_out, out, eq_number); @@ -702,7 +724,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, - priv->eq_table.irq_info[vecidx].name, eq); + eq_table->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -746,7 +768,7 @@ err_buf: return err; } -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; @@ -806,25 +828,35 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return 0; } -int mlx5_eq_init(struct mlx5_core_dev *dev) +int mlx5_eq_table_init(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table; int err; + eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); + if (!eq_table) + return -ENOMEM; + + dev->priv.eq_table = eq_table; + err = mlx5_eq_debugfs_init(dev); + if (err) + kvfree(eq_table); return err; } -void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) { mlx5_eq_debugfs_cleanup(dev); + kvfree(dev->priv.eq_table); } /* Async EQs */ static int create_async_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; @@ -916,7 +948,7 @@ err1: static void destroy_async_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; int err; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -945,6 +977,11 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) err); } +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) +{ + return &dev->priv.eq_table->async_eq; +} + /* Completion EQs */ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -952,7 +989,7 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -974,7 +1011,7 @@ static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) int vecidx = MLX5_EQ_VEC_COMP_BASE + i; struct mlx5_priv *priv = &mdev->priv; int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table.irq_info[vecidx]; + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; irq_set_affinity_hint(irq, NULL); free_cpumask_var(irq_info->mask); @@ -985,7 +1022,7 @@ static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) int err; int i; - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { err = set_comp_irq_affinity_hint(mdev, i); if (err) goto err_out; @@ -1004,13 +1041,13 @@ static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) { int i; - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) clear_comp_irq_affinity_hint(mdev, i); } static void destroy_comp_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq, *n; clear_comp_irqs_affinity_hints(dev); @@ -1032,7 +1069,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq *eq; int ncomp_vec; @@ -1088,7 +1125,7 @@ clean: int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq, *n; int err = -ENOENT; int i = 0; @@ -1106,9 +1143,32 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, } EXPORT_SYMBOL(mlx5_vector2eqn); +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->num_comp_vectors; +} +EXPORT_SYMBOL(mlx5_comp_vectors_count); + +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) +{ + /* TODO: consider irq_get_affinity_mask(irq) */ + return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; +} +EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif +} + struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq; list_for_each_entry(eq, &table->comp_eqs_list, list) { @@ -1122,7 +1182,7 @@ struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq *eq; clear_comp_irqs_affinity_hints(dev); @@ -1149,7 +1209,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) static int alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = &priv->eq_table; + struct mlx5_eq_table *table = priv->eq_table; int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); @@ -1187,7 +1247,7 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) struct mlx5_priv *priv = &dev->priv; pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table.irq_info); + kfree(priv->eq_table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d004957328f9..324606227b1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -38,6 +38,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "lib/eq.h" #define UPLINK_VPORT 0xFFFF diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 43118de8ee99..b5be6f0b9ed5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -38,6 +38,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h new file mode 100644 index 000000000000..48ee37797b3f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_EQ_H__ +#define __LIB_MLX5_EQ_H__ +#include + +#define MLX5_MAX_IRQ_NAME (32) + +enum { + MLX5_EQ_VEC_PAGES = 0, + MLX5_EQ_VEC_CMD = 1, + MLX5_EQ_VEC_ASYNC = 2, + MLX5_EQ_VEC_PFAULT = 3, + MLX5_EQ_VEC_COMP_BASE, +}; + +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + spinlock_t lock; /* lock completion tasklet list */ +}; + +struct mlx5_eq_pagefault { + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; +}; + +struct mlx5_cq_table { + spinlock_t lock; /* protect radix tree */ + struct radix_tree_root tree; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_frag_buf buf; + int size; + unsigned int irqn; + u8 eqn; + int nent; + struct list_head list; + struct mlx5_rsc_debug *dbg; + enum mlx5_eq_type type; + union { + struct mlx5_eq_tasklet tasklet_ctx; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct mlx5_eq_pagefault pf_ctx; +#endif + }; +}; + +int mlx5_eq_table_init(struct mlx5_core_dev *dev); +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); +void mlx5_cq_tasklet_cb(unsigned long data); +struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 21cc9bbc2563..5d11ef92c8b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -54,6 +54,7 @@ #include #include "mlx5_core.h" #include "fs_core.h" +#include "lib/eq.h" #include "lib/mpfs.h" #include "eswitch.h" #include "lib/mlx5.h" @@ -728,7 +729,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } - err = mlx5_eq_init(dev); + err = mlx5_eq_table_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize eq\n"); goto out; @@ -802,7 +803,7 @@ err_tables_cleanup: mlx5_cq_debugfs_cleanup(dev); err_eq_cleanup: - mlx5_eq_cleanup(dev); + mlx5_eq_table_cleanup(dev); out: return err; @@ -823,7 +824,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - mlx5_eq_cleanup(dev); + mlx5_eq_table_cleanup(dev); } static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 3fa6d26875fe..4d39adcfb0eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -124,21 +124,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); -int mlx5_eq_init(struct mlx5_core_dev *dev); -void mlx5_eq_cleanup(struct mlx5_core_dev *dev); -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type); -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_table_create(struct mlx5_core_dev *dev); -void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); -/* This function should only be called after mlx5_cmd_force_teardown_hca */ -void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); -void mlx5_cq_tasklet_cb(unsigned long data); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcc3f7aa8572..4d6246cb6c19 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -84,18 +84,6 @@ enum { MLX5_MAX_PORTS = 2, }; -enum { - MLX5_EQ_VEC_PAGES = 0, - MLX5_EQ_VEC_CMD = 1, - MLX5_EQ_VEC_ASYNC = 2, - MLX5_EQ_VEC_PFAULT = 3, - MLX5_EQ_VEC_COMP_BASE, -}; - -enum { - MLX5_MAX_IRQ_NAME = 32 -}; - enum { MLX5_ATOMIC_MODE_OFFSET = 16, MLX5_ATOMIC_MODE_IB_COMP = 1, @@ -366,49 +354,6 @@ struct mlx5_frag_buf_ctrl { u8 log_frag_strides; }; -struct mlx5_eq_tasklet { - struct list_head list; - struct list_head process_list; - struct tasklet_struct task; - /* lock on completion tasklet list */ - spinlock_t lock; -}; - -struct mlx5_eq_pagefault { - struct work_struct work; - /* Pagefaults lock */ - spinlock_t lock; - struct workqueue_struct *wq; - mempool_t *pool; -}; - -struct mlx5_cq_table { - /* protect radix tree */ - spinlock_t lock; - struct radix_tree_root tree; -}; - -struct mlx5_eq { - struct mlx5_core_dev *dev; - struct mlx5_cq_table cq_table; - __be32 __iomem *doorbell; - u32 cons_index; - struct mlx5_frag_buf buf; - int size; - unsigned int irqn; - u8 eqn; - int nent; - struct list_head list; - struct mlx5_rsc_debug *dbg; - enum mlx5_eq_type type; - union { - struct mlx5_eq_tasklet tasklet_ctx; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq_pagefault pf_ctx; -#endif - }; -}; - struct mlx5_core_psv { u32 psv_idx; struct psv_layout { @@ -475,21 +420,6 @@ struct mlx5_core_srq { u16 uid; }; -struct mlx5_eq_table { - struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq async_eq; - struct mlx5_eq cmd_eq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq pfault_eq; -#endif - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif -}; - struct mlx5_uars_page { void __iomem *map; bool wc; @@ -572,11 +502,6 @@ struct mlx5_core_sriov { int enabled_vfs; }; -struct mlx5_irq_info { - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; -}; - struct mlx5_fc_stats { spinlock_t counters_idr_lock; /* protects counters_idr */ struct idr counters_idr; @@ -594,6 +519,7 @@ struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; struct mlx5_pagefault; +struct mlx5_eq_table; struct mlx5_rate_limit { u32 rate; @@ -643,7 +569,7 @@ struct mlx5_port_module_event_stats { struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; - struct mlx5_eq_table eq_table; + struct mlx5_eq_table *eq_table; /* pages stuff */ struct workqueue_struct *pg_wq; @@ -1148,6 +1074,9 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev); +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, @@ -1299,10 +1228,4 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -static inline const struct cpumask * -mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) -{ - return dev->priv.eq_table.irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; -} - #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 16d760839ceef510cf95cbfadc069c4473c7a277 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:39 -0800 Subject: net/mlx5: EQ, Different EQ types In mlx5 we have three types of usages for EQs, 1. Asynchronous EQs, used internally by mlx5 core for a. FW command completions b. FW page requests c. one EQ for all other Asynchronous events 2. Completion EQs, used for CQ completion (we create one per core) 3. *Special type of EQ (page fault) used for RDMA on demand paging (ODP). *The 3rd type shouldn't be special at least in mlx5 core, it is yet another async events EQ with specific use case, it will be removed in the next two patches, and will completely move its logic to mlx5_ib, as it is rdma specific. In this patch we remove use case (eq type) specific fields from struct mlx5_eq into a new eq type specific structures. struct mlx5_eq_async; truct mlx5_eq_comp; struct mlx5_eq_pagefault; Separate between their type specific flows. In the future we will allow users to create there own generic EQs. for now we will allow only one for ODP in next patches. We will introduce event listeners registration API for those who want to receive mlx5 async events. After that mlx5 eq handling will be clean from feature/user specific handling. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 376 ++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 53 +-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 4 - include/linux/mlx5/cq.h | 2 +- include/linux/mlx5/driver.h | 10 +- 10 files changed, 270 insertions(+), 199 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 6e55d2f37c6d..713a17ee3751 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -93,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; int err; - eq = mlx5_eqn2eq(dev, eqn); + eq = mlx5_eqn2comp_eq(dev, eqn); if (IS_ERR(eq)) return PTR_ERR(eq); @@ -120,7 +120,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, INIT_LIST_HEAD(&cq->tasklet_ctx.list); /* Add to comp EQ CQ tree to recv comp events */ - err = mlx5_eq_add_cq(eq, cq); + err = mlx5_eq_add_cq(&eq->core, cq); if (err) goto err_cmd; @@ -140,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; err_cq_add: - mlx5_eq_del_cq(eq, cq); + mlx5_eq_del_cq(&eq->core, cq); err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); @@ -162,7 +162,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) if (err) return err; - err = mlx5_eq_del_cq(cq->eq, cq); + err = mlx5_eq_del_cq(&cq->eq->core, cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c23caade31bf..0d495a6b3949 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -320,7 +320,7 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv) static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); + mlx5_eq_synchronize_async_irq(priv->mdev); } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, @@ -4117,17 +4117,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, struct mlx5e_txqsq *sq) { - struct mlx5_eq *eq = sq->cq.mcq.eq; + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; u32 eqe_count; netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->eqn, eq->cons_index, eq->irqn); + eq->core.eqn, eq->core.cons_index, eq->core.irqn); eqe_count = mlx5_eq_poll_irq_disabled(eq); if (!eqe_count) return false; - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); + netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); sq->channel->stats->eq_rearm++; return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 32ce20221c44..252c9f0569b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -72,13 +72,16 @@ struct mlx5_irq_info { }; struct mlx5_eq_table { - struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq async_eq; - struct mlx5_eq cmd_eq; + struct list_head comp_eqs_list; + struct mlx5_eq pages_eq; + struct mlx5_eq async_eq; + struct mlx5_eq cmd_eq; + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq pfault_eq; + struct mlx5_eq_pagefault pfault_eq; #endif + struct mutex lock; /* sync async eqs creations */ + u8 num_async_eqs; int num_comp_vectors; struct mlx5_irq_info *irq_info; #ifdef CONFIG_RFS_ACCEL @@ -224,24 +227,24 @@ static void eqe_pf_action(struct work_struct *work) struct mlx5_pagefault *pfault = container_of(work, struct mlx5_pagefault, work); - struct mlx5_eq *eq = pfault->eq; + struct mlx5_eq_pagefault *eq = pfault->eq; - mlx5_core_page_fault(eq->dev, pfault); - mempool_free(pfault, eq->pf_ctx.pool); + mlx5_core_page_fault(eq->core.dev, pfault); + mempool_free(pfault, eq->pool); } -static void eq_pf_process(struct mlx5_eq *eq) +static void eq_pf_process(struct mlx5_eq_pagefault *eq) { - struct mlx5_core_dev *dev = eq->dev; + struct mlx5_core_dev *dev = eq->core.dev; struct mlx5_eqe_page_fault *pf_eqe; struct mlx5_pagefault *pfault; struct mlx5_eqe *eqe; int set_ci = 0; - while ((eqe = next_eqe_sw(eq))) { - pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); + while ((eqe = next_eqe_sw(&eq->core))) { + pfault = mempool_alloc(eq->pool, GFP_ATOMIC); if (!pfault) { - schedule_work(&eq->pf_ctx.work); + schedule_work(&eq->work); break; } @@ -311,30 +314,30 @@ static void eq_pf_process(struct mlx5_eq *eq) pfault->eq = eq; INIT_WORK(&pfault->work, eqe_pf_action); - queue_work(eq->pf_ctx.wq, &pfault->work); + queue_work(eq->wq, &pfault->work); - ++eq->cons_index; + ++eq->core.cons_index; ++set_ci; if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); + eq_update_ci(&eq->core, 0); set_ci = 0; } } - eq_update_ci(eq, 1); + eq_update_ci(&eq->core, 1); } static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_pagefault *eq = eq_ptr; unsigned long flags; - if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { + if (spin_trylock_irqsave(&eq->lock, flags)) { eq_pf_process(eq); - spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); + spin_unlock_irqrestore(&eq->lock, flags); } else { - schedule_work(&eq->pf_ctx.work); + schedule_work(&eq->work); } return IRQ_HANDLED; @@ -352,35 +355,61 @@ static void mempool_refill(mempool_t *pool) static void eq_pf_action(struct work_struct *work) { - struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); + struct mlx5_eq_pagefault *eq = + container_of(work, struct mlx5_eq_pagefault, work); - mempool_refill(eq->pf_ctx.pool); + mempool_refill(eq->pool); - spin_lock_irq(&eq->pf_ctx.lock); + spin_lock_irq(&eq->lock); eq_pf_process(eq); - spin_unlock_irq(&eq->pf_ctx.lock); + spin_unlock_irq(&eq->lock); } -static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) +static int +create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) { - spin_lock_init(&pf_ctx->lock); - INIT_WORK(&pf_ctx->work, eq_pf_action); + int err; - pf_ctx->wq = alloc_workqueue(name, - WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, - MLX5_NUM_CMD_EQE); - if (!pf_ctx->wq) + spin_lock_init(&eq->lock); + INIT_WORK(&eq->work, eq_pf_action); + + eq->pool = mempool_create_kmalloc_pool(MLX5_NUM_PF_DRAIN, + sizeof(struct mlx5_pagefault)); + if (!eq->pool) return -ENOMEM; - pf_ctx->pool = mempool_create_kmalloc_pool - (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); - if (!pf_ctx->pool) + eq->wq = alloc_workqueue("mlx5_page_fault", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); + if (!eq->wq) { + err = -ENOMEM; + goto err_mempool; + } + + err = mlx5_create_async_eq(dev, &eq->core, MLX5_NUM_ASYNC_EQE, + 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + "mlx5_page_fault_eq", mlx5_eq_pf_int); + if (err) goto err_wq; return 0; err_wq: - destroy_workqueue(pf_ctx->wq); - return -ENOMEM; + destroy_workqueue(eq->wq); +err_mempool: + mempool_destroy(eq->pool); + return err; +} + +static int destroy_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) +{ + int err; + + err = mlx5_destroy_async_eq(dev, &eq->core); + cancel_work_sync(&eq->work); + destroy_workqueue(eq->wq); + mempool_destroy(eq->pool); + + return err; } int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, @@ -444,37 +473,88 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn) +static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) { struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); return; } - ++cq->arm_sn; - - cq->comp(cq); + cq->event(cq, event_type); mlx5_cq_put(cq); } -static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) +static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) { - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); + struct mlx5_eq_comp *eq_comp = eq_ptr; + struct mlx5_eq *eq = eq_ptr; + struct mlx5_eqe *eqe; + int set_ci = 0; + u32 cqn = -1; - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); - return; + while ((eqe = next_eqe_sw(eq))) { + struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + + cq = mlx5_eq_cq_get(eq, cqn); + if (likely(cq)) { + ++cq->arm_sn; + cq->comp(cq); + mlx5_cq_put(cq); + } else { + mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); + } + + ++eq->cons_index; + ++set_ci; + + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } } - cq->event(cq, event_type); + eq_update_ci(eq, 1); - mlx5_cq_put(cq); + if (cqn != -1) + tasklet_schedule(&eq_comp->tasklet_ctx.task); + + return IRQ_HANDLED; } -static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. + */ +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) +{ + u32 count_eqe; + + disable_irq(eq->core.irqn); + count_eqe = eq->core.cons_index; + mlx5_eq_comp_int(eq->core.irqn, eq); + count_eqe = eq->core.cons_index - count_eqe; + enable_irq(eq->core.irqn); + + return count_eqe; +} + +static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; struct mlx5_core_dev *dev = eq->dev; @@ -494,10 +574,6 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); switch (eqe->type) { - case MLX5_EVENT_TYPE_COMP: - cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; - mlx5_eq_cq_completion(eq, cqn); - break; case MLX5_EVENT_TYPE_DCT_DRAINED: rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); @@ -619,30 +695,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) eq_update_ci(eq, 1); - if (cqn != -1) - tasklet_schedule(&eq->tasklet_ctx.task); - return IRQ_HANDLED; } -/* Some architectures don't latch interrupts when they are disabled, so using - * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to - * avoid losing them. It is not recommended to use it, unless this is the last - * resort. - */ -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) -{ - u32 count_eqe; - - disable_irq(eq->irqn); - count_eqe = eq->cons_index; - mlx5_eq_int(eq->irqn, eq); - count_eqe = eq->cons_index - count_eqe; - enable_irq(eq->irqn); - - return count_eqe; -} - static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; @@ -656,13 +711,12 @@ static void init_eq_buf(struct mlx5_eq *eq) static int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, enum mlx5_eq_type type) + int nent, u64 mask, const char *name, irq_handler_t handler) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - irq_handler_t handler; __be64 *pas; void *eqc; int inlen; @@ -674,20 +728,12 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->type = type; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) - handler = mlx5_eq_pf_int; - else -#endif - handler = mlx5_eq_int; - init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + @@ -732,21 +778,6 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) { - err = init_pf_ctx(&eq->pf_ctx, name); - if (err) - goto err_irq; - } else -#endif - { - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); - } - /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -780,15 +811,6 @@ static int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) eq->eqn); synchronize_irq(eq->irqn); - if (eq->type == MLX5_EQ_TYPE_COMP) { - tasklet_disable(&eq->tasklet_ctx.task); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - } else if (eq->type == MLX5_EQ_TYPE_PF) { - cancel_work_sync(&eq->pf_ctx.work); - destroy_workqueue(eq->pf_ctx.wq); - mempool_destroy(eq->pf_ctx.pool); -#endif - } mlx5_buf_free(dev, &eq->buf); return err; @@ -841,8 +863,15 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) err = mlx5_eq_debugfs_init(dev); if (err) - kvfree(eq_table); + goto kvfree_eq_table; + mutex_init(&eq_table->lock); + + return 0; + +kvfree_eq_table: + kvfree(eq_table); + dev->priv.eq_table = NULL; return err; } @@ -854,6 +883,43 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ +int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int nent, u64 mask, const char *name, irq_handler_t handler) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + u8 vecdix; + int err; + + mutex_lock(&eq_table->lock); + if (eq_table->num_async_eqs >= MLX5_EQ_MAX_ASYNC_EQS) { + err = -ENOSPC; + goto unlock; + } + + vecdix = eq_table->num_async_eqs + 1; + + err = mlx5_create_map_eq(dev, eq, vecdix, nent, mask, name, handler); + if (!err) + eq_table->num_async_eqs++; + +unlock: + mutex_unlock(&eq_table->lock); + return err; +} + +int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + err = mlx5_destroy_unmap_eq(dev, eq); + if (!err) + eq_table->num_async_eqs--; + mutex_unlock(&eq_table->lock); + return err; +} + static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -887,9 +953,9 @@ static int create_async_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); - err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, - MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); + err = mlx5_create_async_eq(dev, &table->cmd_eq, MLX5_NUM_CMD_EQE, + 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", + mlx5_eq_async_int); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -897,19 +963,15 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); - err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, - MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); + err = mlx5_create_async_eq(dev, &table->async_eq, MLX5_NUM_ASYNC_EQE, + async_event_mask, "mlx5_async_eq", mlx5_eq_async_int); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } - err = mlx5_create_map_eq(dev, &table->pages_eq, - MLX5_EQ_VEC_PAGES, - /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - MLX5_EQ_TYPE_ASYNC); + err = mlx5_create_async_eq(dev, &table->pages_eq, /* TODO: sriov max_vf + */ 1, + 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", mlx5_eq_async_int); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -917,12 +979,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_create_map_eq(dev, &table->pfault_eq, - MLX5_EQ_VEC_PFAULT, - MLX5_NUM_ASYNC_EQE, - 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - "mlx5_page_fault_eq", - MLX5_EQ_TYPE_PF); + err = create_pf_eq(dev, &table->pfault_eq); if (err) { mlx5_core_warn(dev, "failed to create page fault EQ %d\n", err); @@ -932,17 +989,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) return err; err3: - mlx5_destroy_unmap_eq(dev, &table->pages_eq); + mlx5_destroy_async_eq(dev, &table->pages_eq); #else return err; #endif err2: - mlx5_destroy_unmap_eq(dev, &table->async_eq); + mlx5_destroy_async_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); - mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + mlx5_destroy_async_eq(dev, &table->cmd_eq); return err; } @@ -953,25 +1010,25 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); + err = destroy_pf_eq(dev, &table->pfault_eq); if (err) mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", err); } #endif - err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + err = mlx5_destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + err = mlx5_destroy_async_eq(dev, &table->async_eq); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + err = mlx5_destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -982,6 +1039,16 @@ struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) return &dev->priv.eq_table->async_eq; } +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->async_eq.irqn); +} + +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); +} + /* Completion EQs */ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -1048,7 +1115,7 @@ static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq, *n; + struct mlx5_eq_comp *eq, *n; clear_comp_irqs_affinity_hints(dev); @@ -1060,9 +1127,10 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); - if (mlx5_destroy_unmap_eq(dev, eq)) - mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", - eq->eqn); + if (mlx5_destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); kfree(eq); } } @@ -1071,7 +1139,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; char name[MLX5_MAX_IRQ_NAME]; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; int ncomp_vec; int nent; int err; @@ -1094,17 +1162,23 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) goto clean; } + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + #ifdef CONFIG_RFS_ACCEL irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, eq, vecidx, nent, 0, - name, MLX5_EQ_TYPE_COMP); + err = mlx5_create_map_eq(dev, &eq->core, vecidx, nent, 0, + name, mlx5_eq_comp_int); if (err) { kfree(eq); goto clean; } - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); } @@ -1126,14 +1200,14 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq, *n; + struct mlx5_eq_comp *eq, *n; int err = -ENOENT; int i = 0; list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { - *eqn = eq->eqn; - *irqn = eq->irqn; + *eqn = eq->core.eqn; + *irqn = eq->core.irqn; err = 0; break; } @@ -1166,13 +1240,13 @@ struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) #endif } -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; list_for_each_entry(eq, &table->comp_eqs_list, list) { - if (eq->eqn == eqn) + if (eq->core.eqn == eqn) return eq; } @@ -1183,7 +1257,7 @@ struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; clear_comp_irqs_affinity_hints(dev); @@ -1194,14 +1268,14 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) } #endif list_for_each_entry(eq, &table->comp_eqs_list, list) - free_irq(eq->irqn, eq); + free_irq(eq->core.irqn, eq); free_irq(table->pages_eq.irqn, &table->pages_eq); free_irq(table->async_eq.irqn, &table->async_eq); free_irq(table->cmd_eq.irqn, &table->cmd_eq); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(dev, pg)) - free_irq(table->pfault_eq.irqn, &table->pfault_eq); + free_irq(table->pfault_eq.core.irqn, &table->pfault_eq.core); #endif pci_free_irq_vectors(dev->pdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 324606227b1a..2346b6ba3d54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1568,7 +1568,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); + mlx5_eq_synchronize_async_irq(esw->dev); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index b5be6f0b9ed5..066883003aea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -85,7 +85,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) u64 vector; /* wait for pending handlers to complete */ - synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); + mlx5_eq_synchronize_cmd_irq(dev); spin_lock_irqsave(&dev->cmd.alloc_lock, flags); vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); if (!vector) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 48ee37797b3f..706d58383dbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -8,11 +8,8 @@ #define MLX5_MAX_IRQ_NAME (32) enum { - MLX5_EQ_VEC_PAGES = 0, - MLX5_EQ_VEC_CMD = 1, - MLX5_EQ_VEC_ASYNC = 2, - MLX5_EQ_VEC_PFAULT = 3, - MLX5_EQ_VEC_COMP_BASE, + MLX5_EQ_MAX_ASYNC_EQS = 4, /* mlx5_core needs at least 3 */ + MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, }; struct mlx5_eq_tasklet { @@ -22,13 +19,6 @@ struct mlx5_eq_tasklet { spinlock_t lock; /* lock completion tasklet list */ }; -struct mlx5_eq_pagefault { - struct work_struct work; - spinlock_t lock; /* Pagefaults spinlock */ - struct workqueue_struct *wq; - mempool_t *pool; -}; - struct mlx5_cq_table { spinlock_t lock; /* protect radix tree */ struct radix_tree_root tree; @@ -44,29 +34,48 @@ struct mlx5_eq { unsigned int irqn; u8 eqn; int nent; - struct list_head list; struct mlx5_rsc_debug *dbg; - enum mlx5_eq_type type; - union { - struct mlx5_eq_tasklet tasklet_ctx; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq_pagefault pf_ctx; -#endif - }; +}; + +struct mlx5_eq_comp { + struct mlx5_eq core; /* Must be first */ + struct mlx5_eq_tasklet tasklet_ctx; + struct list_head list; +}; + +struct mlx5_eq_pagefault { + struct mlx5_eq core; /* Must be first */ + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; }; int mlx5_eq_table_init(struct mlx5_core_dev *dev); void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_eq_table_create(struct mlx5_core_dev *dev); void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int nent, u64 mask, const char *name, + irq_handler_t handler); +int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); + int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); void mlx5_cq_tasklet_cb(unsigned long data); struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); + /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5d11ef92c8b6..3de83fe65f2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -53,8 +53,8 @@ #endif #include #include "mlx5_core.h" -#include "fs_core.h" #include "lib/eq.h" +#include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" #include "lib/mlx5.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 4d39adcfb0eb..4728b027cb9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -125,10 +125,6 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 31a750570c38..28b757a64029 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -60,7 +60,7 @@ struct mlx5_core_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; u16 uid; }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4d6246cb6c19..fe9b552aa649 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -210,14 +210,6 @@ enum mlx5_port_status { MLX5_PORT_DOWN = 2, }; -enum mlx5_eq_type { - MLX5_EQ_TYPE_COMP, - MLX5_EQ_TYPE_ASYNC, -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - MLX5_EQ_TYPE_PF, -#endif -}; - struct mlx5_bfreg_info { u32 *sys_pages; int num_low_latency_bfregs; @@ -692,7 +684,7 @@ struct mlx5_pagefault { } rdma; }; - struct mlx5_eq *eq; + struct mlx5_eq_pagefault *eq; struct work_struct work; }; -- cgit v1.2.3 From 7701707cb94ed4d1e63ae4fa5ef62a2345ef9db7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:40 -0800 Subject: net/mlx5: EQ, Generic EQ Add mlx5_eq_{create/destroy}_generic APIs and EQE access methods, for mlx5 core consumers generic EQs. This API will be used in downstream patch to move page fault (RDMA ODP) EQ logic into mlx5_ib rdma driver, hence it will use a generic EQ. Current mlx5 EQ allocation scheme: On load mlx5 allocates 4 (for async) + #cores (for data completions) MSIX vectors, mlx5 core will assign 3 MSIX vectors for internal async EQs and will use all of the #cores MSIX vectors for completion EQs, (One vector is going to be reserved for a generic EQ). After this patch an external user (e.g mlx5_ib) of mlx5_core can use this new API to create new generic EQs with the reserved msix vector index for that eq. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 243 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 12 +- include/linux/mlx5/eq.h | 39 ++++ 3 files changed, 221 insertions(+), 73 deletions(-) create mode 100644 include/linux/mlx5/eq.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 252c9f0569b1..ec1f5018546e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #ifdef CONFIG_RFS_ACCEL #include @@ -69,6 +70,7 @@ enum { struct mlx5_irq_info { cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; + void *context; /* dev_id provided to request_irq */ }; struct mlx5_eq_table { @@ -81,7 +83,6 @@ struct mlx5_eq_table { struct mlx5_eq_pagefault pfault_eq; #endif struct mutex lock; /* sync async eqs creations */ - u8 num_async_eqs; int num_comp_vectors; struct mlx5_irq_info *irq_info; #ifdef CONFIG_RFS_ACCEL @@ -229,19 +230,19 @@ static void eqe_pf_action(struct work_struct *work) work); struct mlx5_eq_pagefault *eq = pfault->eq; - mlx5_core_page_fault(eq->core.dev, pfault); + mlx5_core_page_fault(eq->core->dev, pfault); mempool_free(pfault, eq->pool); } static void eq_pf_process(struct mlx5_eq_pagefault *eq) { - struct mlx5_core_dev *dev = eq->core.dev; + struct mlx5_core_dev *dev = eq->core->dev; struct mlx5_eqe_page_fault *pf_eqe; struct mlx5_pagefault *pfault; struct mlx5_eqe *eqe; int set_ci = 0; - while ((eqe = next_eqe_sw(&eq->core))) { + while ((eqe = next_eqe_sw(eq->core))) { pfault = mempool_alloc(eq->pool, GFP_ATOMIC); if (!pfault) { schedule_work(&eq->work); @@ -316,16 +317,16 @@ static void eq_pf_process(struct mlx5_eq_pagefault *eq) INIT_WORK(&pfault->work, eqe_pf_action); queue_work(eq->wq, &pfault->work); - ++eq->core.cons_index; + ++eq->core->cons_index; ++set_ci; if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(&eq->core, 0); + eq_update_ci(eq->core, 0); set_ci = 0; } } - eq_update_ci(&eq->core, 1); + eq_update_ci(eq->core, 1); } static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) @@ -368,6 +369,7 @@ static void eq_pf_action(struct work_struct *work) static int create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) { + struct mlx5_eq_param param = {}; int err; spin_lock_init(&eq->lock); @@ -386,11 +388,19 @@ create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) goto err_mempool; } - err = mlx5_create_async_eq(dev, &eq->core, MLX5_NUM_ASYNC_EQE, - 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - "mlx5_page_fault_eq", mlx5_eq_pf_int); - if (err) + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PFAULT_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + .nent = MLX5_NUM_ASYNC_EQE, + .context = eq, + .handler = mlx5_eq_pf_int + }; + + eq->core = mlx5_eq_create_generic(dev, "mlx5_page_fault_eq", ¶m); + if (IS_ERR(eq->core)) { + err = PTR_ERR(eq->core); goto err_wq; + } return 0; err_wq: @@ -404,7 +414,7 @@ static int destroy_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq { int err; - err = mlx5_destroy_async_eq(dev, &eq->core); + err = mlx5_eq_destroy_generic(dev, eq->core); cancel_work_sync(&eq->work); destroy_workqueue(eq->wq); mempool_destroy(eq->pool); @@ -710,25 +720,29 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, irq_handler_t handler) +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, + struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; + u8 vecidx = param->index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; + if (eq_table->irq_info[vecidx].context) + return -EEXIST; + /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) @@ -749,7 +763,7 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - MLX5_SET64(create_eq_in, in, event_bitmask, mask); + MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -764,13 +778,15 @@ mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq_table->irq_info[vecidx].context = param->context; + eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, handler, 0, - eq_table->irq_info[vecidx].name, eq); + err = request_irq(eq->irqn, param->handler, 0, + eq_table->irq_info[vecidx].name, param->context); if (err) goto err_eq; @@ -799,12 +815,19 @@ err_buf: return err; } -static int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_info *irq_info; int err; + irq_info = &eq_table->irq_info[eq->vecidx]; + mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, eq); + + free_irq(eq->irqn, irq_info->context); + irq_info->context = NULL; + err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -883,48 +906,38 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - int nent, u64 mask, const char *name, irq_handler_t handler) +static int create_async_eq(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - u8 vecdix; int err; mutex_lock(&eq_table->lock); - if (eq_table->num_async_eqs >= MLX5_EQ_MAX_ASYNC_EQS) { + if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { err = -ENOSPC; goto unlock; } - vecdix = eq_table->num_async_eqs + 1; - - err = mlx5_create_map_eq(dev, eq, vecdix, nent, mask, name, handler); - if (!err) - eq_table->num_async_eqs++; - + err = create_map_eq(dev, eq, name, param); unlock: mutex_unlock(&eq_table->lock); return err; } -int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; mutex_lock(&eq_table->lock); - err = mlx5_destroy_unmap_eq(dev, eq); - if (!err) - eq_table->num_async_eqs--; + err = destroy_unmap_eq(dev, eq); mutex_unlock(&eq_table->lock); return err; } -static int create_async_eqs(struct mlx5_core_dev *dev) +static u64 gather_async_events_mask(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; - int err; if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); @@ -953,9 +966,23 @@ static int create_async_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); - err = mlx5_create_async_eq(dev, &table->cmd_eq, MLX5_NUM_CMD_EQE, - 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", - mlx5_eq_async_int); + return async_event_mask; +} + +static int create_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + int err; + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_CMD_IDX, + .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .nent = MLX5_NUM_CMD_EQE, + .context = &table->cmd_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -963,15 +990,27 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); - err = mlx5_create_async_eq(dev, &table->async_eq, MLX5_NUM_ASYNC_EQE, - async_event_mask, "mlx5_async_eq", mlx5_eq_async_int); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_ASYNC_IDX, + .mask = gather_async_events_mask(dev), + .nent = MLX5_NUM_ASYNC_EQE, + .context = &table->async_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } - err = mlx5_create_async_eq(dev, &table->pages_eq, /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", mlx5_eq_async_int); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PAGEREQ_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .nent = /* TODO: sriov max_vf + */ 1, + .context = &table->pages_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -989,17 +1028,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) return err; err3: - mlx5_destroy_async_eq(dev, &table->pages_eq); + destroy_async_eq(dev, &table->pages_eq); #else return err; #endif err2: - mlx5_destroy_async_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); - mlx5_destroy_async_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq); return err; } @@ -1017,18 +1056,18 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) } #endif - err = mlx5_destroy_async_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = mlx5_destroy_async_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = mlx5_destroy_async_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -1049,6 +1088,77 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); } +/* Generic EQ API for mlx5_core consumers + * Needed For RDMA ODP EQ for now + */ +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq_param *param) +{ + struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); + int err; + + if (!eq) + return ERR_PTR(-ENOMEM); + + err = create_async_eq(dev, name, eq, param); + if (err) { + kvfree(eq); + eq = ERR_PTR(err); + } + + return eq; +} +EXPORT_SYMBOL(mlx5_eq_create_generic); + +int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (IS_ERR(eq)) + return -EINVAL; + + err = destroy_async_eq(dev, eq); + if (err) + goto out; + + kvfree(eq); +out: + return err; +} +EXPORT_SYMBOL(mlx5_eq_destroy_generic); + +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) +{ + u32 ci = eq->cons_index + cc; + struct mlx5_eqe *eqe; + + eqe = get_eqe(eq, ci & (eq->nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + if (eqe) + dma_rmb(); + + return eqe; +} +EXPORT_SYMBOL(mlx5_eq_get_eqe); + +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val; + + eq->cons_index += cc; + val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} +EXPORT_SYMBOL(mlx5_eq_update_ci); + /* Completion EQs */ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -1127,7 +1237,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); - if (mlx5_destroy_unmap_eq(dev, &eq->core)) + if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); tasklet_disable(&eq->tasklet_ctx.task); @@ -1155,6 +1265,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) #endif for (i = 0; i < ncomp_vec; i++) { int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -1172,8 +1283,14 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, &eq->core, vecidx, nent, 0, - name, mlx5_eq_comp_int); + param = (struct mlx5_eq_param) { + .index = vecidx, + .mask = 0, + .nent = nent, + .context = &eq->core, + .handler = mlx5_eq_comp_int + }; + err = create_map_eq(dev, &eq->core, name, ¶m); if (err) { kfree(eq); goto clean; @@ -1257,7 +1374,7 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq_comp *eq; + int i, max_eqs; clear_comp_irqs_affinity_hints(dev); @@ -1267,16 +1384,16 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) table->rmap = NULL; } #endif - list_for_each_entry(eq, &table->comp_eqs_list, list) - free_irq(eq->core.irqn, eq); - free_irq(table->pages_eq.irqn, &table->pages_eq); - free_irq(table->async_eq.irqn, &table->async_eq); - free_irq(table->cmd_eq.irqn, &table->cmd_eq); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) - free_irq(table->pfault_eq.core.irqn, &table->pfault_eq.core); -#endif + mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ + max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + for (i = max_eqs - 1; i >= 0; i--) { + if (!table->irq_info[i].context) + continue; + free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); + table->irq_info[i].context = NULL; + } + mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 706d58383dbd..db32057ad054 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,11 +7,6 @@ #define MLX5_MAX_IRQ_NAME (32) -enum { - MLX5_EQ_MAX_ASYNC_EQS = 4, /* mlx5_core needs at least 3 */ - MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, -}; - struct mlx5_eq_tasklet { struct list_head list; struct list_head process_list; @@ -31,6 +26,7 @@ struct mlx5_eq { u32 cons_index; struct mlx5_frag_buf buf; int size; + unsigned int vecidx; unsigned int irqn; u8 eqn; int nent; @@ -44,7 +40,7 @@ struct mlx5_eq_comp { }; struct mlx5_eq_pagefault { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq *core; struct work_struct work; spinlock_t lock; /* Pagefaults spinlock */ struct workqueue_struct *wq; @@ -55,10 +51,6 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev); void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_eq_table_create(struct mlx5_core_dev *dev); void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); -int mlx5_create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - int nent, u64 mask, const char *name, - irq_handler_t handler); -int mlx5_destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h new file mode 100644 index 000000000000..c733673ba5f6 --- /dev/null +++ b/include/linux/mlx5/eq.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef MLX5_CORE_EQ_H +#define MLX5_CORE_EQ_H + +#include + +enum { + MLX5_EQ_PAGEREQ_IDX = 0, + MLX5_EQ_CMD_IDX = 1, + MLX5_EQ_ASYNC_IDX = 2, + /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */ + MLX5_EQ_PFAULT_IDX = 3, + MLX5_EQ_MAX_ASYNC_EQS, + /* completion eqs vector indices start here */ + MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, +}; + +struct mlx5_eq; + +struct mlx5_eq_param { + u8 index; + int nent; + u64 mask; + void *context; + irq_handler_t handler; +}; + +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq_param *param); +int +mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); + +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc); +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm); + +#endif /* MLX5_CORE_EQ_H */ -- cgit v1.2.3 From d5d284b829a6eb7127df24d1bd3896a698981e62 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 19 Nov 2018 10:52:41 -0800 Subject: {net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA Use the new generic EQ API to move all ODP RDMA data structures and logic form mlx5 core driver into mlx5_ib driver. Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Tariq Toukan Acked-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 10 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 15 +- drivers/infiniband/hw/mlx5/odp.c | 281 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 34 --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 252 ------------------ drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 8 - drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 - include/linux/mlx5/driver.h | 49 ---- include/linux/mlx5/eq.h | 21 ++ 10 files changed, 308 insertions(+), 381 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6fbc0cba1bac..fcf4a0328a90 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6040,6 +6040,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } +void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_odp_cleanup_one(dev); +} + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { @@ -6225,7 +6230,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, mlx5_ib_stage_odp_init, - NULL), + mlx5_ib_stage_odp_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6395,9 +6400,6 @@ static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, .event = mlx5_ib_event, -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - .pfault = mlx5_ib_pfault, -#endif .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b651a7a6fde9..27999fd32356 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -880,6 +880,15 @@ struct mlx5_ib_lb_state { bool enabled; }; +struct mlx5_ib_pf_eq { + struct mlx5_ib_dev *dev; + struct mlx5_eq *core; + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; const struct uverbs_object_tree_def *driver_trees[7]; @@ -902,6 +911,8 @@ struct mlx5_ib_dev { #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; + struct mlx5_ib_pf_eq odp_pf_eq; + /* * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. @@ -1158,9 +1169,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -1175,6 +1185,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } +static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7d784b40e017..416d141322a0 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -37,6 +37,46 @@ #include "mlx5_ib.h" #include "cmd.h" +#include + +/* Contains the details of a pagefault. */ +struct mlx5_pagefault { + u32 bytes_committed; + u32 token; + u8 event_subtype; + u8 type; + union { + /* Initiator or send message responder pagefault details. */ + struct { + /* Received packet size, only valid for responders. */ + u32 packet_size; + /* + * Number of resource holding WQE, depends on type. + */ + u32 wq_num; + /* + * WQE index. Refers to either the send queue or + * receive queue, according to event_subtype. + */ + u16 wqe_index; + } wqe; + /* RDMA responder pagefault details */ + struct { + u32 r_key; + /* + * Received packet size, minimal size page fault + * resolution required for forward progress. + */ + u32 packet_size; + u32 rdma_op_len; + u64 rdma_va; + } rdma; + }; + + struct mlx5_ib_pf_eq *eq; + struct work_struct work; +}; + #define MAX_PREFETCH_LEN (4*1024*1024U) /* Timeout in ms to wait for an active mmu notifier to complete when handling @@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, { int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? pfault->wqe.wq_num : pfault->token; - int ret = mlx5_core_page_fault_resume(dev->mdev, - pfault->token, - wq_num, - pfault->type, - error); - if (ret) - mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n", - wq_num); + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { }; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { }; + int err; + + MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type); + MLX5_SET(page_fault_resume_in, in, token, pfault->token); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, error, !!error); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", + wq_num, err); } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, @@ -1196,10 +1242,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } } -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault) +static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - struct mlx5_ib_dev *dev = context; u8 event_subtype = pfault->event_subtype; switch (event_subtype) { @@ -1216,6 +1260,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, } } +static void mlx5_ib_eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_ib_pf_eq *eq = pfault->eq; + + mlx5_ib_pfault(eq->dev, pfault); + mempool_free(pfault, eq->pool); +} + +static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int cc = 0; + + while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) { + pfault = mempool_alloc(eq->pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->work); + break; + } + + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_ib_warn(eq->dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action); + queue_work(eq->wq, &pfault->work); + + cc = mlx5_eq_update_cc(eq->core, ++cc); + } + + mlx5_eq_update_ci(eq->core, cc, 1); +} + +static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_ib_pf_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->lock, flags)) { + mlx5_ib_eq_pf_process(eq); + spin_unlock_irqrestore(&eq->lock, flags); + } else { + schedule_work(&eq->work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Cheap workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void mlx5_ib_eq_pf_action(struct work_struct *work) +{ + struct mlx5_ib_pf_eq *eq = + container_of(work, struct mlx5_ib_pf_eq, work); + + mempool_refill(eq->pool); + + spin_lock_irq(&eq->lock); + mlx5_ib_eq_pf_process(eq); + spin_unlock_irq(&eq->lock); +} + +enum { + MLX5_IB_NUM_PF_EQE = 0x1000, + MLX5_IB_NUM_PF_DRAIN = 64, +}; + +static int +mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eq_param param = {}; + int err; + + INIT_WORK(&eq->work, mlx5_ib_eq_pf_action); + spin_lock_init(&eq->lock); + eq->dev = dev; + + eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN, + sizeof(struct mlx5_pagefault)); + if (!eq->pool) + return -ENOMEM; + + eq->wq = alloc_workqueue("mlx5_ib_page_fault", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); + if (!eq->wq) { + err = -ENOMEM; + goto err_mempool; + } + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PFAULT_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + .nent = MLX5_IB_NUM_PF_EQE, + .context = eq, + .handler = mlx5_ib_eq_pf_int + }; + eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + if (IS_ERR(eq->core)) { + err = PTR_ERR(eq->core); + goto err_wq; + } + + return 0; +err_wq: + destroy_workqueue(eq->wq); +err_mempool: + mempool_destroy(eq->pool); + return err; +} + +static int +mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + int err; + + err = mlx5_eq_destroy_generic(dev->mdev, eq->core); + cancel_work_sync(&eq->work); + destroy_workqueue(eq->wq); + mempool_destroy(eq->pool); + + return err; +} + void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) { if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) @@ -1244,7 +1485,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { - int ret; + int ret = 0; if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1254,7 +1495,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - return 0; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return ret; + + ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); + + return ret; +} + +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; + + mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); } int mlx5_ib_odp_init(void) @@ -1264,4 +1518,3 @@ int mlx5_ib_odp_init(void) return 0; } - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 37ba7c78859d..7eedbea38a78 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -139,17 +139,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev_ctx->intf->pfault) { - if (priv->pfault) { - mlx5_core_err(dev, "multiple page fault handlers not supported"); - } else { - priv->pfault_ctx = dev_ctx->context; - priv->pfault = dev_ctx->intf->pfault; - } - } -#endif spin_unlock_irq(&priv->ctx_lock); } @@ -179,15 +168,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!dev_ctx) return; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - spin_lock_irq(&priv->ctx_lock); - if (priv->pfault == dev_ctx->intf->pfault) - priv->pfault = NULL; - spin_unlock_irq(&priv->ctx_lock); - - synchronize_srcu(&priv->pfault_srcu); -#endif - spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); @@ -447,20 +427,6 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, spin_unlock_irqrestore(&priv->ctx_lock, flags); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault) -{ - struct mlx5_priv *priv = &dev->priv; - int srcu_idx; - - srcu_idx = srcu_read_lock(&priv->pfault_srcu); - if (priv->pfault) - priv->pfault(dev, priv->pfault_ctx, pfault); - srcu_read_unlock(&priv->pfault_srcu, srcu_idx); -} -#endif - void mlx5_dev_list_lock(void) { mutex_lock(&mlx5_intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ec1f5018546e..895401609c63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -56,13 +56,6 @@ enum { MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, }; -enum { - MLX5_NUM_SPARE_EQE = 0x80, - MLX5_NUM_ASYNC_EQE = 0x1000, - MLX5_NUM_CMD_EQE = 32, - MLX5_NUM_PF_DRAIN = 64, -}; - enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; @@ -79,9 +72,6 @@ struct mlx5_eq_table { struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct mlx5_eq_pagefault pfault_eq; -#endif struct mutex lock; /* sync async eqs creations */ int num_comp_vectors; struct mlx5_irq_info *irq_info; @@ -222,224 +212,6 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) mb(); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -static void eqe_pf_action(struct work_struct *work) -{ - struct mlx5_pagefault *pfault = container_of(work, - struct mlx5_pagefault, - work); - struct mlx5_eq_pagefault *eq = pfault->eq; - - mlx5_core_page_fault(eq->core->dev, pfault); - mempool_free(pfault, eq->pool); -} - -static void eq_pf_process(struct mlx5_eq_pagefault *eq) -{ - struct mlx5_core_dev *dev = eq->core->dev; - struct mlx5_eqe_page_fault *pf_eqe; - struct mlx5_pagefault *pfault; - struct mlx5_eqe *eqe; - int set_ci = 0; - - while ((eqe = next_eqe_sw(eq->core))) { - pfault = mempool_alloc(eq->pool, GFP_ATOMIC); - if (!pfault) { - schedule_work(&eq->work); - break; - } - - dma_rmb(); - pf_eqe = &eqe->data.page_fault; - pfault->event_subtype = eqe->sub_type; - pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); - - mlx5_core_dbg(dev, - "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", - eqe->sub_type, pfault->bytes_committed); - - switch (eqe->sub_type) { - case MLX5_PFAULT_SUBTYPE_RDMA: - /* RDMA based event */ - pfault->type = - be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; - pfault->token = - be32_to_cpu(pf_eqe->rdma.pftype_token) & - MLX5_24BIT_MASK; - pfault->rdma.r_key = - be32_to_cpu(pf_eqe->rdma.r_key); - pfault->rdma.packet_size = - be16_to_cpu(pf_eqe->rdma.packet_length); - pfault->rdma.rdma_op_len = - be32_to_cpu(pf_eqe->rdma.rdma_op_len); - pfault->rdma.rdma_va = - be64_to_cpu(pf_eqe->rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", - pfault->type, pfault->token, - pfault->rdma.r_key); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", - pfault->rdma.rdma_op_len, - pfault->rdma.rdma_va); - break; - - case MLX5_PFAULT_SUBTYPE_WQE: - /* WQE based event */ - pfault->type = - (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; - pfault->token = - be32_to_cpu(pf_eqe->wqe.token); - pfault->wqe.wq_num = - be32_to_cpu(pf_eqe->wqe.pftype_wq) & - MLX5_24BIT_MASK; - pfault->wqe.wqe_index = - be16_to_cpu(pf_eqe->wqe.wqe_index); - pfault->wqe.packet_size = - be16_to_cpu(pf_eqe->wqe.packet_length); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", - pfault->type, pfault->token, - pfault->wqe.wq_num, - pfault->wqe.wqe_index); - break; - - default: - mlx5_core_warn(dev, - "Unsupported page fault event sub-type: 0x%02hhx\n", - eqe->sub_type); - /* Unsupported page faults should still be - * resolved by the page fault handler - */ - } - - pfault->eq = eq; - INIT_WORK(&pfault->work, eqe_pf_action); - queue_work(eq->wq, &pfault->work); - - ++eq->core->cons_index; - ++set_ci; - - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq->core, 0); - set_ci = 0; - } - } - - eq_update_ci(eq->core, 1); -} - -static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) -{ - struct mlx5_eq_pagefault *eq = eq_ptr; - unsigned long flags; - - if (spin_trylock_irqsave(&eq->lock, flags)) { - eq_pf_process(eq); - spin_unlock_irqrestore(&eq->lock, flags); - } else { - schedule_work(&eq->work); - } - - return IRQ_HANDLED; -} - -/* mempool_refill() was proposed but unfortunately wasn't accepted - * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html - * Chip workaround. - */ -static void mempool_refill(mempool_t *pool) -{ - while (pool->curr_nr < pool->min_nr) - mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); -} - -static void eq_pf_action(struct work_struct *work) -{ - struct mlx5_eq_pagefault *eq = - container_of(work, struct mlx5_eq_pagefault, work); - - mempool_refill(eq->pool); - - spin_lock_irq(&eq->lock); - eq_pf_process(eq); - spin_unlock_irq(&eq->lock); -} - -static int -create_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) -{ - struct mlx5_eq_param param = {}; - int err; - - spin_lock_init(&eq->lock); - INIT_WORK(&eq->work, eq_pf_action); - - eq->pool = mempool_create_kmalloc_pool(MLX5_NUM_PF_DRAIN, - sizeof(struct mlx5_pagefault)); - if (!eq->pool) - return -ENOMEM; - - eq->wq = alloc_workqueue("mlx5_page_fault", - WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, - MLX5_NUM_CMD_EQE); - if (!eq->wq) { - err = -ENOMEM; - goto err_mempool; - } - - param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PFAULT_IDX, - .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - .nent = MLX5_NUM_ASYNC_EQE, - .context = eq, - .handler = mlx5_eq_pf_int - }; - - eq->core = mlx5_eq_create_generic(dev, "mlx5_page_fault_eq", ¶m); - if (IS_ERR(eq->core)) { - err = PTR_ERR(eq->core); - goto err_wq; - } - - return 0; -err_wq: - destroy_workqueue(eq->wq); -err_mempool: - mempool_destroy(eq->pool); - return err; -} - -static int destroy_pf_eq(struct mlx5_core_dev *dev, struct mlx5_eq_pagefault *eq) -{ - int err; - - err = mlx5_eq_destroy_generic(dev, eq->core); - cancel_work_sync(&eq->work); - destroy_workqueue(eq->wq); - mempool_destroy(eq->pool); - - return err; -} - -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error) -{ - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - - MLX5_SET(page_fault_resume_in, in, opcode, - MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, error, !!error); - MLX5_SET(page_fault_resume_in, in, page_fault_type, type); - MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); - MLX5_SET(page_fault_resume_in, in, token, token); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} -EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); -#endif - static void general_event_handler(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { @@ -1016,22 +788,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) goto err2; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = create_pf_eq(dev, &table->pfault_eq); - if (err) { - mlx5_core_warn(dev, "failed to create page fault EQ %d\n", - err); - goto err3; - } - } - - return err; -err3: - destroy_async_eq(dev, &table->pages_eq); -#else return err; -#endif err2: destroy_async_eq(dev, &table->async_eq); @@ -1047,15 +804,6 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = destroy_pf_eq(dev, &table->pfault_eq); - if (err) - mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", - err); - } -#endif - err = destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index db32057ad054..4cc2d442cef6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -39,14 +39,6 @@ struct mlx5_eq_comp { struct list_head list; }; -struct mlx5_eq_pagefault { - struct mlx5_eq *core; - struct work_struct work; - spinlock_t lock; /* Pagefaults spinlock */ - struct workqueue_struct *wq; - mempool_t *pool; -}; - int mlx5_eq_table_init(struct mlx5_core_dev *dev); void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_eq_table_create(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3de83fe65f2b..91022f141855 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1169,14 +1169,6 @@ static int init_one(struct pci_dev *pdev, INIT_LIST_HEAD(&priv->waiting_events_list); priv->is_accum_events = false; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - err = init_srcu_struct(&priv->pfault_srcu); - if (err) { - dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", - err); - goto clean_dev; - } -#endif mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); INIT_LIST_HEAD(&priv->bfregs.reg_head.list); @@ -1185,7 +1177,7 @@ static int init_one(struct pci_dev *pdev, err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_srcu; + goto clean_dev; } err = mlx5_health_init(dev); @@ -1218,11 +1210,7 @@ clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); -clean_srcu: -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: -#endif devlink_free(devlink); return err; @@ -1246,9 +1234,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); -#endif devlink_free(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 4728b027cb9e..21727d9eeb84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -100,8 +100,6 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fe9b552aa649..f41e6713df10 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -510,7 +510,6 @@ struct mlx5_fc_stats { struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; -struct mlx5_pagefault; struct mlx5_eq_table; struct mlx5_rate_limit { @@ -619,13 +618,6 @@ struct mlx5_priv { struct mlx5_port_module_event_stats pme_stats; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - void (*pfault)(struct mlx5_core_dev *dev, - void *context, - struct mlx5_pagefault *pfault); - void *pfault_ctx; - struct srcu_struct pfault_srcu; -#endif struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; }; @@ -650,44 +642,6 @@ enum mlx5_pagefault_type_flags { MLX5_PFAULT_RDMA = 1 << 2, }; -/* Contains the details of a pagefault. */ -struct mlx5_pagefault { - u32 bytes_committed; - u32 token; - u8 event_subtype; - u8 type; - union { - /* Initiator or send message responder pagefault details. */ - struct { - /* Received packet size, only valid for responders. */ - u32 packet_size; - /* - * Number of resource holding WQE, depends on type. - */ - u32 wq_num; - /* - * WQE index. Refers to either the send queue or - * receive queue, according to event_subtype. - */ - u16 wqe_index; - } wqe; - /* RDMA responder pagefault details */ - struct { - u32 r_key; - /* - * Received packet size, minimal size page fault - * resolution required for forward progress. - */ - u32 packet_size; - u32 rdma_op_len; - u64 rdma_va; - } rdma; - }; - - struct mlx5_eq_pagefault *eq; - struct work_struct work; -}; - struct mlx5_td { struct list_head tirs_list; u32 tdn; @@ -1118,9 +1072,6 @@ struct mlx5_interface { void (*detach)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); - void (*pfault)(struct mlx5_core_dev *dev, - void *context, - struct mlx5_pagefault *pfault); void * (*get_dev)(void *context); int protocol; struct list_head list; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index c733673ba5f6..71d82c5a1a02 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -17,6 +17,10 @@ enum { MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, }; +#define MLX5_NUM_CMD_EQE (32) +#define MLX5_NUM_ASYNC_EQE (0x1000) +#define MLX5_NUM_SPARE_EQE (0x80) + struct mlx5_eq; struct mlx5_eq_param { @@ -36,4 +40,21 @@ mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc); void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm); +/* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create EQs with MLX5_NUM_SPARE_EQE extra entries, + * so we must update our consumer index at + * least that often. + * + * mlx5_eq_update_cc must be called on every EQE @EQ irq handler + */ +static inline u32 mlx5_eq_update_cc(struct mlx5_eq *eq, u32 cc) +{ + if (unlikely(cc >= MLX5_NUM_SPARE_EQE)) { + mlx5_eq_update_ci(eq, cc, 0); + cc = 0; + } + return cc; +} + #endif /* MLX5_CORE_EQ_H */ -- cgit v1.2.3 From 838e96904ff3fc6c30e5ebbc611474669856e3c0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:11 -0800 Subject: bpf: Introduce bpf_func_info This patch added interface to load a program with the following additional information: . prog_btf_fd . func_info, func_info_rec_size and func_info_cnt where func_info will provide function range and type_id corresponding to each function. The func_info_rec_size is introduced in the UAPI to specify struct bpf_func_info size passed from user space. This intends to make bpf_func_info structure growable in the future. If the kernel gets a different bpf_func_info size from userspace, it will try to handle user request with part of bpf_func_info it can understand. In this patch, kernel can understand struct bpf_func_info { __u32 insn_offset; __u32 type_id; }; If user passed a bpf func_info record size of 16 bytes, the kernel can still handle part of records with the above definition. If verifier agrees with function range provided by the user, the bpf_prog ksym for each function will use the func name provided in the type_id, which is supposed to provide better encoding as it is not limited by 16 bytes program name limitation and this is better for bpf program which contains multiple subprograms. The bpf_prog_info interface is also extended to return btf_id, func_info, func_info_rec_size and func_info_cnt to userspace, so userspace can print out the function prototype for each xlated function. The insn_offset in the returned func_info corresponds to the insn offset for xlated functions. With other jit related fields in bpf_prog_info, userspace can also print out function prototypes for each jited function. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +- include/linux/bpf_verifier.h | 1 + include/linux/btf.h | 2 + include/uapi/linux/bpf.h | 13 +++++ kernel/bpf/btf.c | 4 +- kernel/bpf/core.c | 13 +++++ kernel/bpf/syscall.c | 59 +++++++++++++++++++-- kernel/bpf/verifier.c | 120 ++++++++++++++++++++++++++++++++++++++++++- 8 files changed, 209 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 987815152629..7f0e225bf630 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -316,6 +316,8 @@ struct bpf_prog_aux { void *security; #endif struct bpf_prog_offload *offload; + struct btf *btf; + u32 type_id; /* type id for this prog/func */ union { struct work_struct work; struct rcu_head rcu; @@ -527,7 +529,8 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) } /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, + union bpf_attr __user *uattr); void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); /* Map specifics */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 11f5df1092d9..204382f46fd8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -204,6 +204,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u16 stack_depth; /* max. stack depth used by this function */ + u32 type_id; /* btf type_id for this subprog */ }; /* single container for all structs diff --git a/include/linux/btf.h b/include/linux/btf.h index e076c4697049..7f2c0a4a45ea 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -46,5 +46,7 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); int btf_get_fd_by_id(u32 id); u32 btf_id(const struct btf *btf); +const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); +const char *btf_name_by_offset(const struct btf *btf, u32 offset); #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 05d95290b848..c1554aa07465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -338,6 +338,10 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2638,6 +2642,10 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 func_info_cnt; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2949,4 +2957,9 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_offset; + __u32 type_id; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6a2be79b73fc..69da9169819a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -474,7 +474,7 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) return !*src; } -static const char *btf_name_by_offset(const struct btf *btf, u32 offset) +const char *btf_name_by_offset(const struct btf *btf, u32 offset) { if (!offset) return "(anon)"; @@ -484,7 +484,7 @@ static const char *btf_name_by_offset(const struct btf *btf, u32 offset) return "(invalid-name-offset)"; } -static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) +const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { if (type_id > btf->nr_types) return NULL; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1a796e0799ec..16d77012ad3e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -21,12 +21,14 @@ * Kris Katterjohn - Added many additional checks in bpf_check_classic() */ +#include #include #include #include #include #include #include +#include #include #include #include @@ -390,6 +392,8 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog, static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { const char *end = sym + KSYM_NAME_LEN; + const struct btf_type *type; + const char *func_name; BUILD_BUG_ON(sizeof("bpf_prog_") + sizeof(prog->tag) * 2 + @@ -404,6 +408,15 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); + + /* prog->aux->name will be ignored if full btf name is available */ + if (prog->aux->btf) { + type = btf_type_by_id(prog->aux->btf, prog->aux->type_id); + func_name = btf_name_by_offset(prog->aux->btf, type->name_off); + snprintf(sym, (size_t)(end - sym), "_%s", func_name); + return; + } + if (prog->aux->name[0]) snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); else diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cf5040fd5434..998377808102 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1213,6 +1213,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del_all(prog); + btf_put(prog->aux->btf); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } @@ -1437,9 +1438,9 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type +#define BPF_PROG_LOAD_LAST_FIELD func_info_cnt -static int bpf_prog_load(union bpf_attr *attr) +static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog; @@ -1525,7 +1526,7 @@ static int bpf_prog_load(union bpf_attr *attr) goto free_prog; /* run eBPF verifier */ - err = bpf_check(&prog, attr); + err = bpf_check(&prog, attr, uattr); if (err < 0) goto free_used_maps; @@ -2079,6 +2080,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.xlated_prog_len = 0; info.nr_jited_ksyms = 0; info.nr_jited_func_lens = 0; + info.func_info_cnt = 0; goto done; } @@ -2216,6 +2218,55 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } } + if (prog->aux->btf) { + u32 ucnt, urec_size; + + info.btf_id = btf_id(prog->aux->btf); + + ucnt = info.func_info_cnt; + info.func_info_cnt = prog->aux->func_cnt ? : 1; + urec_size = info.func_info_rec_size; + info.func_info_rec_size = sizeof(struct bpf_func_info); + if (ucnt) { + /* expect passed-in urec_size is what the kernel expects */ + if (urec_size != info.func_info_rec_size) + return -EINVAL; + + if (bpf_dump_raw_ok()) { + struct bpf_func_info kern_finfo; + char __user *user_finfo; + u32 i, insn_offset; + + user_finfo = u64_to_user_ptr(info.func_info); + if (prog->aux->func_cnt) { + ucnt = min_t(u32, info.func_info_cnt, ucnt); + insn_offset = 0; + for (i = 0; i < ucnt; i++) { + kern_finfo.insn_offset = insn_offset; + kern_finfo.type_id = prog->aux->func[i]->aux->type_id; + if (copy_to_user(user_finfo, &kern_finfo, + sizeof(kern_finfo))) + return -EFAULT; + + /* func[i]->len holds the prog len */ + insn_offset += prog->aux->func[i]->len; + user_finfo += urec_size; + } + } else { + kern_finfo.insn_offset = 0; + kern_finfo.type_id = prog->aux->type_id; + if (copy_to_user(user_finfo, &kern_finfo, + sizeof(kern_finfo))) + return -EFAULT; + } + } else { + info.func_info_cnt = 0; + } + } + } else { + info.func_info_cnt = 0; + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) @@ -2501,7 +2552,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = map_get_next_key(&attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr); + err = bpf_prog_load(&attr, uattr); break; case BPF_OBJ_PIN: err = bpf_obj_pin(&attr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b5222aa61d54..f102c4fd0c5a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11,10 +11,12 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. */ +#include #include #include #include #include +#include #include #include #include @@ -4639,6 +4641,114 @@ err_free: return ret; } +/* The minimum supported BTF func info size */ +#define MIN_BPF_FUNCINFO_SIZE 8 +#define MAX_FUNCINFO_REC_SIZE 252 + +static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, + union bpf_attr *attr, union bpf_attr __user *uattr) +{ + u32 i, nfuncs, urec_size, min_size, prev_offset; + u32 krec_size = sizeof(struct bpf_func_info); + struct bpf_func_info krecord = {}; + const struct btf_type *type; + void __user *urecord; + struct btf *btf; + int ret = 0; + + nfuncs = attr->func_info_cnt; + if (!nfuncs) + return 0; + + if (nfuncs != env->subprog_cnt) { + verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); + return -EINVAL; + } + + urec_size = attr->func_info_rec_size; + if (urec_size < MIN_BPF_FUNCINFO_SIZE || + urec_size > MAX_FUNCINFO_REC_SIZE || + urec_size % sizeof(u32)) { + verbose(env, "invalid func info rec size %u\n", urec_size); + return -EINVAL; + } + + btf = btf_get_by_fd(attr->prog_btf_fd); + if (IS_ERR(btf)) { + verbose(env, "unable to get btf from fd\n"); + return PTR_ERR(btf); + } + + urecord = u64_to_user_ptr(attr->func_info); + min_size = min_t(u32, krec_size, urec_size); + + for (i = 0; i < nfuncs; i++) { + ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); + if (ret) { + if (ret == -E2BIG) { + verbose(env, "nonzero tailing record in func info"); + /* set the size kernel expects so loader can zero + * out the rest of the record. + */ + if (put_user(min_size, &uattr->func_info_rec_size)) + ret = -EFAULT; + } + goto free_btf; + } + + if (copy_from_user(&krecord, urecord, min_size)) { + ret = -EFAULT; + goto free_btf; + } + + /* check insn_offset */ + if (i == 0) { + if (krecord.insn_offset) { + verbose(env, + "nonzero insn_offset %u for the first func info record", + krecord.insn_offset); + ret = -EINVAL; + goto free_btf; + } + } else if (krecord.insn_offset <= prev_offset) { + verbose(env, + "same or smaller insn offset (%u) than previous func info record (%u)", + krecord.insn_offset, prev_offset); + ret = -EINVAL; + goto free_btf; + } + + if (env->subprog_info[i].start != krecord.insn_offset) { + verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); + ret = -EINVAL; + goto free_btf; + } + + /* check type_id */ + type = btf_type_by_id(btf, krecord.type_id); + if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) { + verbose(env, "invalid type id %d in func info", + krecord.type_id); + ret = -EINVAL; + goto free_btf; + } + + if (i == 0) + prog->aux->type_id = krecord.type_id; + env->subprog_info[i].type_id = krecord.type_id; + + prev_offset = krecord.insn_offset; + urecord += urec_size; + } + + prog->aux->btf = btf; + return 0; + +free_btf: + btf_put(btf); + return ret; +} + /* check %cur's range satisfies %old's */ static bool range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur) @@ -5939,6 +6049,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; + /* the btf will be freed only at prog->aux */ + func[i]->aux->btf = prog->aux->btf; + func[i]->aux->type_id = env->subprog_info[i].type_id; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -6325,7 +6438,8 @@ static void free_states(struct bpf_verifier_env *env) kfree(env->explored_states); } -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, + union bpf_attr __user *uattr) { struct bpf_verifier_env *env; struct bpf_verifier_log *log; @@ -6397,6 +6511,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (ret < 0) goto skip_full_check; + ret = check_btf_func(env->prog, env, attr, uattr); + if (ret < 0) + goto skip_full_check; + ret = do_check(env); if (env->cur_state) { free_verifier_state(env->cur_state, true); -- cgit v1.2.3 From f6161a8f3036caa45f225486be39783e99e0fa29 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 20 Nov 2018 14:08:20 -0800 Subject: bpf: fix a compilation error when CONFIG_BPF_SYSCALL is not defined Kernel test robot (lkp@intel.com) reports a compilation error at https://www.spinics.net/lists/netdev/msg534913.html introduced by commit 838e96904ff3 ("bpf: Introduce bpf_func_info"). If CONFIG_BPF is defined and CONFIG_BPF_SYSCALL is not defined, the following error will appear: kernel/bpf/core.c:414: undefined reference to `btf_type_by_id' kernel/bpf/core.c:415: undefined reference to `btf_name_by_offset' When CONFIG_BPF_SYSCALL is not defined, let us define stub inline functions for btf_type_by_id() and btf_name_by_offset() in include/linux/btf.h. This way, the compilation failure can be avoided. Fixes: 838e96904ff3 ("bpf: Introduce bpf_func_info") Reported-by: kbuild test robot Cc: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 7f2c0a4a45ea..8c2199b5d250 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -46,7 +46,21 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); int btf_get_fd_by_id(u32 id); u32 btf_id(const struct btf *btf); + +#ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); +#else +static inline const struct btf_type *btf_type_by_id(const struct btf *btf, + u32 type_id) +{ + return NULL; +} +static inline const char *btf_name_by_offset(const struct btf *btf, + u32 offset) +{ + return NULL; +} +#endif #endif -- cgit v1.2.3 From 1db4909e76f64a85f4aaa187f0f683f5c85a471d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 20 Nov 2018 09:44:35 +0800 Subject: blk-mq: not embed .mq_kobj and ctx->kobj into queue instance Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime from block layer's view, actually they don't because userspace may grab one kobject anytime via sysfs. This patch fixes the issue by the following approach: 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing all ctxs 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release handler of .mq_kobj 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that .mq_kobj is always released after all ctxs are freed. This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE is enabled. Reported-by: Guenter Roeck Cc: "jianchao.wang" Tested-by: Guenter Roeck Reviewed-by: Greg Kroah-Hartman Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 34 ++++++++++++++++++++++++---------- block/blk-mq.c | 39 ++++++++++++++++++++++++++++++++------- block/blk-mq.h | 6 ++++++ include/linux/blkdev.h | 2 +- 4 files changed, 63 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 3d25b9c419e9..6efef1f679f0 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -15,6 +15,18 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { + struct blk_mq_ctxs *ctxs = container_of(kobj, struct blk_mq_ctxs, kobj); + + free_percpu(ctxs->queue_ctx); + kfree(ctxs); +} + +static void blk_mq_ctx_sysfs_release(struct kobject *kobj) +{ + struct blk_mq_ctx *ctx = container_of(kobj, struct blk_mq_ctx, kobj); + + /* ctx->ctxs won't be released until all ctx are freed */ + kobject_put(&ctx->ctxs->kobj); } static void blk_mq_hw_sysfs_release(struct kobject *kobj) @@ -213,7 +225,7 @@ static struct kobj_type blk_mq_ktype = { static struct kobj_type blk_mq_ctx_ktype = { .sysfs_ops = &blk_mq_sysfs_ops, .default_attrs = default_ctx_attrs, - .release = blk_mq_sysfs_release, + .release = blk_mq_ctx_sysfs_release, }; static struct kobj_type blk_mq_hw_ktype = { @@ -245,7 +257,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) if (!hctx->nr_ctx) return 0; - ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num); + ret = kobject_add(&hctx->kobj, q->mq_kobj, "%u", hctx->queue_num); if (ret) return ret; @@ -268,8 +280,8 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); - kobject_del(&q->mq_kobj); + kobject_uevent(q->mq_kobj, KOBJ_REMOVE); + kobject_del(q->mq_kobj); kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; @@ -289,7 +301,7 @@ void blk_mq_sysfs_deinit(struct request_queue *q) ctx = per_cpu_ptr(q->queue_ctx, cpu); kobject_put(&ctx->kobj); } - kobject_put(&q->mq_kobj); + kobject_put(q->mq_kobj); } void blk_mq_sysfs_init(struct request_queue *q) @@ -297,10 +309,12 @@ void blk_mq_sysfs_init(struct request_queue *q) struct blk_mq_ctx *ctx; int cpu; - kobject_init(&q->mq_kobj, &blk_mq_ktype); + kobject_init(q->mq_kobj, &blk_mq_ktype); for_each_possible_cpu(cpu) { ctx = per_cpu_ptr(q->queue_ctx, cpu); + + kobject_get(q->mq_kobj); kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); } } @@ -313,11 +327,11 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q) WARN_ON_ONCE(!q->kobj.parent); lockdep_assert_held(&q->sysfs_lock); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); + ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; - kobject_uevent(&q->mq_kobj, KOBJ_ADD); + kobject_uevent(q->mq_kobj, KOBJ_ADD); queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); @@ -334,8 +348,8 @@ unreg: while (--i >= 0) blk_mq_unregister_hctx(q->queue_hw_ctx[i]); - kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); - kobject_del(&q->mq_kobj); + kobject_uevent(q->mq_kobj, KOBJ_REMOVE); + kobject_del(q->mq_kobj); kobject_put(&dev->kobj); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 174384eaace7..b16204df65d1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2515,6 +2515,34 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, mutex_unlock(&set->tag_list_lock); } +/* All allocations will be freed in release handler of q->mq_kobj */ +static int blk_mq_alloc_ctxs(struct request_queue *q) +{ + struct blk_mq_ctxs *ctxs; + int cpu; + + ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL); + if (!ctxs) + return -ENOMEM; + + ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx); + if (!ctxs->queue_ctx) + goto fail; + + for_each_possible_cpu(cpu) { + struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu); + ctx->ctxs = ctxs; + } + + q->mq_kobj = &ctxs->kobj; + q->queue_ctx = ctxs->queue_ctx; + + return 0; + fail: + kfree(ctxs); + return -ENOMEM; +} + /* * It is the actual release handler for mq, but we do it from * request queue's release handler for avoiding use-after-free @@ -2540,8 +2568,6 @@ void blk_mq_release(struct request_queue *q) * both share lifetime with request queue. */ blk_mq_sysfs_deinit(q); - - free_percpu(q->queue_ctx); } struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) @@ -2731,8 +2757,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->poll_cb) goto err_exit; - q->queue_ctx = alloc_percpu(struct blk_mq_ctx); - if (!q->queue_ctx) + if (blk_mq_alloc_ctxs(q)) goto err_exit; /* init q->mq_kobj and sw queues' kobjects */ @@ -2742,7 +2767,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) - goto err_percpu; + goto err_sys_init; blk_mq_realloc_hw_ctxs(set, q); if (!q->nr_hw_queues) @@ -2794,8 +2819,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, err_hctxs: kfree(q->queue_hw_ctx); -err_percpu: - free_percpu(q->queue_ctx); +err_sys_init: + blk_mq_sysfs_deinit(q); err_exit: q->mq_ops = NULL; return ERR_PTR(-ENOMEM); diff --git a/block/blk-mq.h b/block/blk-mq.h index facb6e9ddce4..9ae8e9f8f8b1 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -7,6 +7,11 @@ struct blk_mq_tag_set; +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + /** * struct blk_mq_ctx - State for a software queue facing the submitting CPUs */ @@ -27,6 +32,7 @@ struct blk_mq_ctx { unsigned long ____cacheline_aligned_in_smp rq_completed[2]; struct request_queue *queue; + struct blk_mq_ctxs *ctxs; struct kobject kobj; } ____cacheline_aligned_in_smp; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97c0a3b2262..9b53db06ad08 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -456,7 +456,7 @@ struct request_queue { /* * mq queue kobject */ - struct kobject mq_kobj; + struct kobject *mq_kobj; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; -- cgit v1.2.3 From 342e53bd8548e07c6a734d2d3a6437ad6e6d3b09 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Oct 2018 13:28:07 +0100 Subject: arm64: perf: Add support for Armv8.1 PMCEID register format Armv8.1 allocated the upper 32-bits of the PMCEID registers to describe the common architectural and microarchitecture events beginning at 0x4000. Add support for these registers to our probing code, so that we can advertise the SPE events when they are supported by the CPU. Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 25 ++++++++++++++++++------- include/linux/perf/arm_pmu.h | 4 +++- 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index ac1c5c41501d..1a783df6f234 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -183,12 +183,10 @@ #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED -/* PMUv3 HW events mapping. */ - /* * ARMv8 Architectural defined events, not all of these may - * be supported on any given implementation. Undefined events will - * be disabled at run-time. + * be supported on any given implementation. Unsupported events will + * be disabled at run-time based on the PMCEID registers. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, @@ -434,7 +432,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); - if (test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) + if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) + return attr->mode; + + pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; + if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap)) return attr->mode; return 0; @@ -1061,6 +1065,7 @@ static void __armv8pmu_probe_pmu(void *info) struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu; u64 dfr0; + u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver; @@ -1079,11 +1084,17 @@ static void __armv8pmu_probe_pmu(void *info) /* Add the CPU cycles counter */ cpu_pmu->num_events += 1; - pmceid[0] = read_sysreg(pmceid0_el0); - pmceid[1] = read_sysreg(pmceid1_el0); + pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0); + pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0); bitmap_from_arr32(cpu_pmu->pmceid_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + pmceid[0] = pmceid_raw[0] >> 32; + pmceid[1] = pmceid_raw[1] >> 32; + + bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index bf309ff6f244..4641e850b204 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -102,8 +102,10 @@ struct arm_pmu { int (*filter_match)(struct perf_event *event); int num_events; bool secure_access; /* 32-bit ARM only */ -#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 +#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); +#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 + DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct hlist_node node; -- cgit v1.2.3 From cbb72a3c19eff0ea3ccb0b068eca189063c86174 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Wed, 7 Nov 2018 19:40:58 +0000 Subject: drivers/perf: xgene: Add CPU hotplug support If the CPU assigned to the xgene PMU is taken offline, then subsequent perf invocations on the PMU will fail: # echo 0 > /sys/devices/system/cpu/cpu0/online # perf stat -a -e l3c0/cycle-count/,l3c0/write/ sleep 1 Error: The sys_perf_event_open() syscall returned with 19 (No such device) for event (l3c0/cycle-count/). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? This patch implements a hotplug notifier in the xgene PMU driver so that the PMU context is migrated to another online CPU should its assigned CPU disappear. Acked-by: Mark Rutland Signed-off-by: Hoan Tran [will: Made naming of new cpuhp_state enum entry consistent] Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 80 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/cpuhotplug.h | 1 + 2 files changed, 74 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 0e31f1392a53..0dc9ff0f8894 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -130,12 +131,14 @@ struct xgene_pmu_ops { struct xgene_pmu { struct device *dev; + struct hlist_node node; int version; void __iomem *pcppmu_csr; u32 mcb_active_mask; u32 mc_active_mask; u32 l3c_active_mask; cpumask_t cpu; + int irq; raw_spinlock_t lock; const struct xgene_pmu_ops *ops; struct list_head l3cpmus; @@ -1806,6 +1809,53 @@ static const struct acpi_device_id xgene_pmu_acpi_match[] = { MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match); #endif +static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct xgene_pmu, + node); + + if (cpumask_empty(&xgene_pmu->cpu)) + cpumask_set_cpu(cpu, &xgene_pmu->cpu); + + /* Overflow interrupt also should use the same CPU */ + WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu)); + + return 0; +} + +static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct xgene_pmu, + node); + struct xgene_pmu_dev_ctx *ctx; + unsigned int target; + + if (!cpumask_test_and_clear_cpu(cpu, &xgene_pmu->cpu)) + return 0; + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) { + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); + } + list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) { + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); + } + list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) { + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); + } + list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) { + perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target); + } + + cpumask_set_cpu(target, &xgene_pmu->cpu); + /* Overflow interrupt also should use the same CPU */ + WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu)); + + return 0; +} + static int xgene_pmu_probe(struct platform_device *pdev) { const struct xgene_pmu_data *dev_data; @@ -1815,6 +1865,14 @@ static int xgene_pmu_probe(struct platform_device *pdev) int irq, rc; int version; + /* Install a hook to update the reader CPU in case it goes offline */ + rc = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, + "CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE", + xgene_pmu_online_cpu, + xgene_pmu_offline_cpu); + if (rc) + return rc; + xgene_pmu = devm_kzalloc(&pdev->dev, sizeof(*xgene_pmu), GFP_KERNEL); if (!xgene_pmu) return -ENOMEM; @@ -1865,6 +1923,7 @@ static int xgene_pmu_probe(struct platform_device *pdev) dev_err(&pdev->dev, "No IRQ resource\n"); return -EINVAL; } + rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr, IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(&pdev->dev), xgene_pmu); @@ -1873,6 +1932,8 @@ static int xgene_pmu_probe(struct platform_device *pdev) return rc; } + xgene_pmu->irq = irq; + raw_spin_lock_init(&xgene_pmu->lock); /* Check for active MCBs and MCUs */ @@ -1883,13 +1944,11 @@ static int xgene_pmu_probe(struct platform_device *pdev) xgene_pmu->mc_active_mask = 0x1; } - /* Pick one core to use for cpumask attributes */ - cpumask_set_cpu(smp_processor_id(), &xgene_pmu->cpu); - - /* Make sure that the overflow interrupt is handled by this CPU */ - rc = irq_set_affinity(irq, &xgene_pmu->cpu); + /* Add this instance to the list used by the hotplug callback */ + rc = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, + &xgene_pmu->node); if (rc) { - dev_err(&pdev->dev, "Failed to set interrupt affinity!\n"); + dev_err(&pdev->dev, "Error %d registering hotplug", rc); return rc; } @@ -1897,13 +1956,18 @@ static int xgene_pmu_probe(struct platform_device *pdev) rc = xgene_pmu_probe_pmu_dev(xgene_pmu, pdev); if (rc) { dev_err(&pdev->dev, "No PMU perf devices found!\n"); - return rc; + goto out_unregister; } /* Enable interrupt */ xgene_pmu->ops->unmask_int(xgene_pmu); return 0; + +out_unregister: + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, + &xgene_pmu->node); + return rc; } static void @@ -1924,6 +1988,8 @@ static int xgene_pmu_remove(struct platform_device *pdev) xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->iobpmus); xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcbpmus); xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, + &xgene_pmu->node); return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e0cd2baa8380..d007a319dfd4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -164,6 +164,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, + CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, -- cgit v1.2.3 From a2e768b861108d846b6df21074cff738660b45b7 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 20 Nov 2018 13:20:31 +0100 Subject: net/vlan: introduce skb_vlan_tag_get_cfi() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Abstract CFI/DEI bit access consistently with other VLAN tag fields. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 7a541eadf78e..4cca4da7a6de 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -65,7 +65,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ #define VLAN_PRIO_SHIFT 13 -#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ +#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator / Drop Eligible Indicator */ #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ #define VLAN_N_VID 4096 @@ -80,6 +80,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) #define skb_vlan_tag_present(__skb) ((__skb)->vlan_present) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) +#define skb_vlan_tag_get_cfi(__skb) (!!((__skb)->vlan_tci & VLAN_CFI_MASK)) #define skb_vlan_tag_get_prio(__skb) (((__skb)->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT) static inline int vlan_get_rx_ctag_filter_info(struct net_device *dev) -- cgit v1.2.3 From 085ddc87d05fdf649ccee7a7da42110e9e1c6311 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Nov 2018 08:02:41 +0000 Subject: bridge: Allow querying bridge port flags Allow querying bridge port flags so that drivers capable of performing VxLAN learning will update the bridge driver only if learning is enabled on its bridge port corresponding to the VxLAN device. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 6 ++++++ net/bridge/br_if.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index c20c7e197d07..ef7c3d376b21 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -119,6 +119,7 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, struct net_device *br_fdb_find_port(const struct net_device *br_dev, const unsigned char *addr, __u16 vid); +bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); #else static inline struct net_device * br_fdb_find_port(const struct net_device *br_dev, @@ -127,6 +128,11 @@ br_fdb_find_port(const struct net_device *br_dev, { return NULL; } +static inline bool +br_port_flag_is_set(const struct net_device *dev, unsigned long flag) +{ + return false; +} #endif #endif diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 9b46d2dc4c22..d4863f5679ac 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -741,3 +741,15 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) if (mask & BR_NEIGH_SUPPRESS) br_recalculate_neigh_suppress_enabled(br); } + +bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag) +{ + struct net_bridge_port *p; + + p = br_port_get_rtnl_rcu(dev); + if (!p) + return false; + + return p->flags & flag; +} +EXPORT_SYMBOL_GPL(br_port_flag_is_set); -- cgit v1.2.3 From d491324f966518fbd3f4c627a3e9766d018a4eef Mon Sep 17 00:00:00 2001 From: Songjun Wu Date: Thu, 22 Nov 2018 15:47:35 +0800 Subject: include: Add lantiq.h in include/linux/ In some existing lantiq driver, the C codes include lantiq_soc.h header file directly. ./arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h ./arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h Those drivers need to be extended to support more platform. lantiq.h is added in include/linux/ to make it globally available and provides some wrapper codes. Signed-off-by: Songjun Wu Signed-off-by: Greg Kroah-Hartman --- include/linux/lantiq.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/lantiq.h (limited to 'include/linux') diff --git a/include/linux/lantiq.h b/include/linux/lantiq.h new file mode 100644 index 000000000000..67921169d84d --- /dev/null +++ b/include/linux/lantiq.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_LANTIQ_H +#define __LINUX_LANTIQ_H + +#ifdef CONFIG_LANTIQ +#include +#else + +#ifndef LTQ_EARLY_ASC +#define LTQ_EARLY_ASC 0 +#endif + +#ifndef CPHYSADDR +#define CPHYSADDR(a) 0 +#endif + +static inline struct clk *clk_get_fpi(void) +{ + return NULL; +} +#endif /* CONFIG_LANTIQ */ +#endif /* __LINUX_LANTIQ_H */ -- cgit v1.2.3 From 5451781dadf85000665e0e2c3288e9e0f34b860a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 20 Nov 2018 09:52:53 -0800 Subject: regulator: core: Only count load for enabled consumers In general when the consumer of a regulator requests that the regulator be disabled it no longer will be drawing much load from the regulator--it should just be the leakage current and that should be very close to 0. Up to this point the regulator framework has continued to count a consumer's load request for disabled regulators. This has led to code patterns that look like this: enable_my_thing(): regular_set_load(reg, load_uA) regulator_enable(reg) disable_my_thing(): regulator_disable(reg) regulator_set_load(reg, 0) Sometimes disable_my_thing() sets a nominal (<= 100 uA) load instead of setting a 0 uA load. I will make the assertion that nearly all (if not all) places where we set a nominal load of 100 uA or less we end up with a result that is the same as if we had set a load of 0 uA. Specifically: - The whole point of setting the load is to help set the operating mode of the regulator. Higher loads may need less efficient operating modes. - The only time this matters at all is if there is another consumer of the regulator that wants the regulator on. If there are no other consumers of the regulator then the regulator will turn off and we don't care about the operating mode. - If there's another consumer that actually wants the regulator on then presumably it is requesting a load that makes our nominal <= 100 uA load insignificant. A quick survey of the existing callers to regulator_set_load() to see how everyone uses it: Signed-off-by: Douglas Anderson Signed-off-by: Mark Brown --- drivers/regulator/core.c | 193 ++++++++++++++++++++++++++++----------- drivers/regulator/internal.h | 2 + include/linux/regulator/driver.h | 1 - 3 files changed, 144 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index ff5ca185bb8f..26a0c523ed86 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -99,7 +99,7 @@ struct regulator_supply_alias { }; static int _regulator_is_enabled(struct regulator_dev *rdev); -static int _regulator_disable(struct regulator_dev *rdev); +static int _regulator_disable(struct regulator *regulator); static int _regulator_get_voltage(struct regulator_dev *rdev); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); @@ -764,8 +764,10 @@ static ssize_t regulator_total_uA_show(struct device *dev, int uA = 0; regulator_lock(rdev); - list_for_each_entry(regulator, &rdev->consumer_list, list) - uA += regulator->uA_load; + list_for_each_entry(regulator, &rdev->consumer_list, list) { + if (regulator->enable_count) + uA += regulator->uA_load; + } regulator_unlock(rdev); return sprintf(buf, "%d\n", uA); } @@ -938,8 +940,10 @@ static int drms_uA_update(struct regulator_dev *rdev) return -EINVAL; /* calc total requested load */ - list_for_each_entry(sibling, &rdev->consumer_list, list) - current_uA += sibling->uA_load; + list_for_each_entry(sibling, &rdev->consumer_list, list) { + if (sibling->enable_count) + current_uA += sibling->uA_load; + } current_uA += rdev->constraints->system_load; @@ -2024,6 +2028,9 @@ static void _regulator_put(struct regulator *regulator) lockdep_assert_held_once(®ulator_list_mutex); + /* Docs say you must disable before calling regulator_put() */ + WARN_ON(regulator->enable_count); + rdev = regulator->rdev; debugfs_remove_recursive(regulator->debugfs); @@ -2417,15 +2424,75 @@ static int _regulator_do_enable(struct regulator_dev *rdev) return 0; } +/** + * _regulator_handle_consumer_enable - handle that a consumer enabled + * @regulator: regulator source + * + * Some things on a regulator consumer (like the contribution towards total + * load on the regulator) only have an effect when the consumer wants the + * regulator enabled. Explained in example with two consumers of the same + * regulator: + * consumer A: set_load(100); => total load = 0 + * consumer A: regulator_enable(); => total load = 100 + * consumer B: set_load(1000); => total load = 100 + * consumer B: regulator_enable(); => total load = 1100 + * consumer A: regulator_disable(); => total_load = 1000 + * + * This function (together with _regulator_handle_consumer_disable) is + * responsible for keeping track of the refcount for a given regulator consumer + * and applying / unapplying these things. + * + * Returns 0 upon no error; -error upon error. + */ +static int _regulator_handle_consumer_enable(struct regulator *regulator) +{ + struct regulator_dev *rdev = regulator->rdev; + + lockdep_assert_held_once(&rdev->mutex.base); + + regulator->enable_count++; + if (regulator->uA_load && regulator->enable_count == 1) + return drms_uA_update(rdev); + + return 0; +} + +/** + * _regulator_handle_consumer_disable - handle that a consumer disabled + * @regulator: regulator source + * + * The opposite of _regulator_handle_consumer_enable(). + * + * Returns 0 upon no error; -error upon error. + */ +static int _regulator_handle_consumer_disable(struct regulator *regulator) +{ + struct regulator_dev *rdev = regulator->rdev; + + lockdep_assert_held_once(&rdev->mutex.base); + + if (!regulator->enable_count) { + rdev_err(rdev, "Underflow of regulator enable count\n"); + return -EINVAL; + } + + regulator->enable_count--; + if (regulator->uA_load && regulator->enable_count == 0) + return drms_uA_update(rdev); + + return 0; +} + /* locks held by regulator_enable() */ -static int _regulator_enable(struct regulator_dev *rdev) +static int _regulator_enable(struct regulator *regulator) { + struct regulator_dev *rdev = regulator->rdev; int ret; lockdep_assert_held_once(&rdev->mutex.base); if (rdev->supply) { - ret = _regulator_enable(rdev->supply->rdev); + ret = _regulator_enable(rdev->supply); if (ret < 0) return ret; } @@ -2437,9 +2504,9 @@ static int _regulator_enable(struct regulator_dev *rdev) goto err_disable_supply; } - /* check voltage and requested load before enabling */ - if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS)) - drms_uA_update(rdev); + ret = _regulator_handle_consumer_enable(regulator); + if (ret < 0) + goto err_disable_supply; if (rdev->use_count == 0) { /* The regulator may on if it's not switchable or left on */ @@ -2448,18 +2515,18 @@ static int _regulator_enable(struct regulator_dev *rdev) if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { ret = -EPERM; - goto err_disable_supply; + goto err_consumer_disable; } ret = _regulator_do_enable(rdev); if (ret < 0) - goto err_disable_supply; + goto err_consumer_disable; _notifier_call_chain(rdev, REGULATOR_EVENT_ENABLE, NULL); } else if (ret < 0) { rdev_err(rdev, "is_enabled() failed: %d\n", ret); - goto err_disable_supply; + goto err_consumer_disable; } /* Fallthrough on positive return values - already enabled */ } @@ -2468,9 +2535,12 @@ static int _regulator_enable(struct regulator_dev *rdev) return 0; +err_consumer_disable: + _regulator_handle_consumer_disable(regulator); + err_disable_supply: if (rdev->supply) - _regulator_disable(rdev->supply->rdev); + _regulator_disable(rdev->supply); return ret; } @@ -2490,13 +2560,10 @@ int regulator_enable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; - int ret = 0; - - if (regulator->always_on) - return 0; + int ret; regulator_lock_dependent(rdev, &ww_ctx); - ret = _regulator_enable(rdev); + ret = _regulator_enable(regulator); regulator_unlock_dependent(rdev, &ww_ctx); return ret; @@ -2535,8 +2602,9 @@ static int _regulator_do_disable(struct regulator_dev *rdev) } /* locks held by regulator_disable() */ -static int _regulator_disable(struct regulator_dev *rdev) +static int _regulator_disable(struct regulator *regulator) { + struct regulator_dev *rdev = regulator->rdev; int ret = 0; lockdep_assert_held_once(&rdev->mutex.base); @@ -2571,17 +2639,17 @@ static int _regulator_disable(struct regulator_dev *rdev) rdev->use_count = 0; } else if (rdev->use_count > 1) { - if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS)) - drms_uA_update(rdev); - rdev->use_count--; } + if (ret == 0) + ret = _regulator_handle_consumer_disable(regulator); + if (ret == 0 && rdev->coupling_desc.n_coupled > 1) ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); if (ret == 0 && rdev->supply) - ret = _regulator_disable(rdev->supply->rdev); + ret = _regulator_disable(rdev->supply); return ret; } @@ -2602,13 +2670,10 @@ int regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; - int ret = 0; - - if (regulator->always_on) - return 0; + int ret; regulator_lock_dependent(rdev, &ww_ctx); - ret = _regulator_disable(rdev); + ret = _regulator_disable(regulator); regulator_unlock_dependent(rdev, &ww_ctx); return ret; @@ -2657,10 +2722,17 @@ int regulator_force_disable(struct regulator *regulator) int ret; regulator_lock_dependent(rdev, &ww_ctx); - regulator->uA_load = 0; + ret = _regulator_force_disable(regulator->rdev); + if (rdev->coupling_desc.n_coupled > 1) regulator_balance_voltage(rdev, PM_SUSPEND_ON); + + if (regulator->uA_load) { + regulator->uA_load = 0; + ret = drms_uA_update(rdev); + } + regulator_unlock_dependent(rdev, &ww_ctx); if (rdev->supply) @@ -2677,14 +2749,11 @@ static void regulator_disable_work(struct work_struct *work) disable_work.work); struct ww_acquire_ctx ww_ctx; int count, i, ret; + struct regulator *regulator; + int total_count = 0; regulator_lock_dependent(rdev, &ww_ctx); - BUG_ON(!rdev->deferred_disables); - - count = rdev->deferred_disables; - rdev->deferred_disables = 0; - /* * Workqueue functions queue the new work instance while the previous * work instance is being processed. Cancel the queued work instance @@ -2693,11 +2762,22 @@ static void regulator_disable_work(struct work_struct *work) */ cancel_delayed_work(&rdev->disable_work); - for (i = 0; i < count; i++) { - ret = _regulator_disable(rdev); - if (ret != 0) - rdev_err(rdev, "Deferred disable failed: %d\n", ret); + list_for_each_entry(regulator, &rdev->consumer_list, list) { + count = regulator->deferred_disables; + + if (!count) + continue; + + total_count += count; + regulator->deferred_disables = 0; + + for (i = 0; i < count; i++) { + ret = _regulator_disable(regulator); + if (ret != 0) + rdev_err(rdev, "Deferred disable failed: %d\n", ret); + } } + WARN_ON(!total_count); if (rdev->coupling_desc.n_coupled > 1) regulator_balance_voltage(rdev, PM_SUSPEND_ON); @@ -2731,14 +2811,11 @@ int regulator_disable_deferred(struct regulator *regulator, int ms) { struct regulator_dev *rdev = regulator->rdev; - if (regulator->always_on) - return 0; - if (!ms) return regulator_disable(regulator); regulator_lock(rdev); - rdev->deferred_disables++; + regulator->deferred_disables++; mod_delayed_work(system_power_efficient_wq, &rdev->disable_work, msecs_to_jiffies(ms)); regulator_unlock(rdev); @@ -4145,16 +4222,30 @@ EXPORT_SYMBOL_GPL(regulator_get_error_flags); * DRMS will sum the total requested load on the regulator and change * to the most efficient operating mode if platform constraints allow. * + * NOTE: when a regulator consumer requests to have a regulator + * disabled then any load that consumer requested no longer counts + * toward the total requested load. If the regulator is re-enabled + * then the previously requested load will start counting again. + * + * If a regulator is an always-on regulator then an individual consumer's + * load will still be removed if that consumer is fully disabled. + * * On error a negative errno is returned. */ int regulator_set_load(struct regulator *regulator, int uA_load) { struct regulator_dev *rdev = regulator->rdev; - int ret; + int old_uA_load; + int ret = 0; regulator_lock(rdev); + old_uA_load = regulator->uA_load; regulator->uA_load = uA_load; - ret = drms_uA_update(rdev); + if (regulator->enable_count && old_uA_load != uA_load) { + ret = drms_uA_update(rdev); + if (ret < 0) + regulator->uA_load = old_uA_load; + } regulator_unlock(rdev); return ret; @@ -4325,11 +4416,8 @@ int regulator_bulk_enable(int num_consumers, int ret = 0; for (i = 0; i < num_consumers; i++) { - if (consumers[i].consumer->always_on) - consumers[i].ret = 0; - else - async_schedule_domain(regulator_bulk_enable_async, - &consumers[i], &async_domain); + async_schedule_domain(regulator_bulk_enable_async, + &consumers[i], &async_domain); } async_synchronize_full_domain(&async_domain); @@ -5225,8 +5313,11 @@ static void regulator_summary_show_subtree(struct seq_file *s, switch (rdev->desc->type) { case REGULATOR_VOLTAGE: - seq_printf(s, "%37dmA %5dmV %5dmV", + seq_printf(s, "%3d %33dmA%c%5dmV %5dmV", + consumer->enable_count, consumer->uA_load / 1000, + consumer->uA_load && !consumer->enable_count ? + '*' : ' ', consumer->voltage[PM_SUSPEND_ON].min_uV / 1000, consumer->voltage[PM_SUSPEND_ON].max_uV / 1000); break; diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index 943926a156f2..6017f15c5d75 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -42,6 +42,8 @@ struct regulator { unsigned int always_on:1; unsigned int bypass:1; int uA_load; + unsigned int enable_count; + unsigned int deferred_disables; struct regulator_voltage voltage[REGULATOR_STATES_NUM]; const char *supply_name; struct device_attribute dev_attr; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 7065031f0846..389bcaf7900f 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -474,7 +474,6 @@ struct regulator_dev { struct regmap *regmap; struct delayed_work disable_work; - int deferred_disables; void *reg_data; /* regulator_dev data */ -- cgit v1.2.3 From 41c9e132c5cc3e5f28cf44032ff82f7614a42989 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sat, 10 Nov 2018 21:29:03 +0100 Subject: rtc: nvmem: remove nvmem from struct rtc_device Using devm_nvmem_register allows to avoid tracking the nvmem pointer in the rtc_device structure. This ultimately allows to register multiple nvmem devices from an RTC driver. Signed-off-by: Alexandre Belloni --- drivers/rtc/nvmem.c | 24 ++++++++++-------------- include/linux/rtc.h | 1 - 2 files changed, 10 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c index 2a7220d8b02d..ebdfe8e3a1a0 100644 --- a/drivers/rtc/nvmem.c +++ b/drivers/rtc/nvmem.c @@ -25,11 +25,9 @@ rtc_nvram_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { - struct rtc_device *rtc = attr->private; - dev_warn_once(kobj_to_dev(kobj), nvram_warning); - return nvmem_device_read(rtc->nvmem, off, count, buf); + return nvmem_device_read(attr->private, off, count, buf); } static ssize_t @@ -37,14 +35,13 @@ rtc_nvram_write(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { - struct rtc_device *rtc = attr->private; - dev_warn_once(kobj_to_dev(kobj), nvram_warning); - return nvmem_device_write(rtc->nvmem, off, count, buf); + return nvmem_device_write(attr->private, off, count, buf); } -static int rtc_nvram_register(struct rtc_device *rtc, size_t size) +static int rtc_nvram_register(struct rtc_device *rtc, + struct nvmem_device *nvmem, size_t size) { int err; @@ -56,7 +53,7 @@ static int rtc_nvram_register(struct rtc_device *rtc, size_t size) rtc->nvram->attr.name = "nvram"; rtc->nvram->attr.mode = 0644; - rtc->nvram->private = rtc; + rtc->nvram->private = nvmem; sysfs_bin_attr_init(rtc->nvram); @@ -85,21 +82,20 @@ static void rtc_nvram_unregister(struct rtc_device *rtc) int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { - if (!IS_ERR_OR_NULL(rtc->nvmem)) - return -EBUSY; + struct nvmem_device *nvmem; if (!nvmem_config) return -ENODEV; nvmem_config->dev = rtc->dev.parent; nvmem_config->owner = rtc->owner; - rtc->nvmem = devm_nvmem_register(rtc->dev.parent, nvmem_config); - if (IS_ERR(rtc->nvmem)) - return PTR_ERR(rtc->nvmem); + nvmem = devm_nvmem_register(rtc->dev.parent, nvmem_config); + if (IS_ERR(nvmem)) + return PTR_ERR(nvmem); /* Register the old ABI */ if (rtc->nvram_old_abi) - rtc_nvram_register(rtc, nvmem_config->size); + rtc_nvram_register(rtc, nvmem, nvmem_config->size); return 0; } diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 311375dbb673..58147b057acd 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -138,7 +138,6 @@ struct rtc_device { bool registered; - struct nvmem_device *nvmem; /* Old ABI support */ bool nvram_old_abi; struct bin_attribute *nvram; -- cgit v1.2.3 From 6fe07ce35e8ad870ba1cf82e0481e0fc0f526eff Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 21 Nov 2018 20:28:39 +0000 Subject: x86/resctrl: Rename the config option INTEL_RDT to RESCTRL The resource control feature is supported by both Intel and AMD. So, rename CONFIG_INTEL_RDT to the vendor-neutral CONFIG_RESCTRL. Now CONFIG_RESCTRL will be used for both Intel and AMD to enable Resource Control support. Update the texts in config and condition accordingly. [ bp: Simplify Kconfig text. ] Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Brijesh Singh Cc: "Chang S. Bae" Cc: David Miller Cc: David Woodhouse Cc: Dmitry Safonov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Kate Stewart Cc: "Kirill A. Shutemov" Cc: Cc: Mauro Carvalho Chehab Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Pu Wen Cc: Cc: "Rafael J. Wysocki" Cc: Reinette Chatre Cc: Rian Hunter Cc: Sherry Hurwitz Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Thomas Lendacky Cc: Tony Luck Cc: Vitaly Kuznetsov Cc: Link: https://lkml.kernel.org/r/20181121202811.4492-9-babu.moger@amd.com --- arch/x86/Kconfig | 22 +++++++++++++++------- arch/x86/include/asm/resctrl_sched.h | 4 ++-- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/resctrl/Makefile | 4 ++-- include/linux/sched.h | 2 +- 5 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d734f3c8234..2d0577e805d2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -448,15 +448,23 @@ config RETPOLINE code are eliminated. Since this includes the syscall entry path, it is not entirely pointless. -config INTEL_RDT - bool "Intel Resource Director Technology support" - depends on X86 && CPU_SUP_INTEL +config RESCTRL + bool "Resource Control support" + depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS help - Select to enable resource allocation and monitoring which are - sub-features of Intel Resource Director Technology(RDT). More - information about RDT can be found in the Intel x86 - Architecture Software Developer Manual. + Enable Resource Control support. + + Provide support for the allocation and monitoring of system resources + usage by the CPU. + + Intel calls this Intel Resource Director Technology + (Intel(R) RDT). More information about RDT can be found in the + Intel x86 Architecture Software Developer Manual. + + AMD calls this AMD Platform Quality of Service (AMD QoS). + More information about AMD QoS can be found in the AMD64 Technology + Platform Quality of Service Extensions manual. Say N if unsure. diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h index 6e082697a613..54990fe2a3ae 100644 --- a/arch/x86/include/asm/resctrl_sched.h +++ b/arch/x86/include/asm/resctrl_sched.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_RESCTRL_SCHED_H #define _ASM_X86_RESCTRL_SCHED_H -#ifdef CONFIG_INTEL_RDT +#ifdef CONFIG_RESCTRL #include #include @@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void) static inline void resctrl_sched_in(void) {} -#endif /* CONFIG_INTEL_RDT */ +#endif /* CONFIG_RESCTRL */ #endif /* _ASM_X86_RESCTRL_SCHED_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 8501d16dd642..dc4acaa1549d 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_INTEL_RDT) += resctrl/ +obj-$(CONFIG_RESCTRL) += resctrl/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index fa3cb91d7849..6895049ceef7 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_INTEL_RDT) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_INTEL_RDT) += ctrlmondata.o pseudo_lock.o +obj-$(CONFIG_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_RESCTRL) += ctrlmondata.o pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/include/linux/sched.h b/include/linux/sched.h index a51c13c2b1a0..7952dfba2c76 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -993,7 +993,7 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif -#ifdef CONFIG_INTEL_RDT +#ifdef CONFIG_RESCTRL u32 closid; u32 rmid; #endif -- cgit v1.2.3 From e45678973dcbb131f29a6c90b0ea3829f38eeab8 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 21 Nov 2018 17:12:05 +0200 Subject: {net, IB}/mlx4: Initialize CQ buffers in the driver when possible Perform CQ initialization in the driver when the capability is supported by the FW. When passing the CQ to HW indicate that the CQ buffer has been pre-initialized. Doing so decreases CQ creation time. Testing on P8 showed a single 2048 entry CQ creation time was reduced from ~395us to ~170us, which is 2.3x faster. Signed-off-by: Daniel Jurgens Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cq.c | 9 +++- drivers/net/ethernet/mellanox/mlx4/cq.c | 71 ++++++++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 2 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 3 ++ include/linux/mlx4/device.h | 4 +- 5 files changed, 82 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 82adc0d1d30e..43512347b4f0 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -181,6 +181,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; + void *buf_addr; int err; if (entries < 1 || entries > dev->dev->caps.max_cqes) @@ -211,6 +212,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_cq; } + buf_addr = (void *)(unsigned long)ucmd.buf_addr; + err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, ucmd.buf_addr, entries); if (err) @@ -237,6 +240,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (err) goto err_db; + buf_addr = &cq->buf.buf; + uar = &dev->priv_uar; cq->mcq.usage = MLX4_RES_USAGE_DRIVER; } @@ -246,7 +251,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma, &cq->mcq, vector, 0, - !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)); + !!(cq->create_flags & + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION), + buf_addr, !!context); if (err) goto err_dbmap; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index d8e9a323122e..db909b6069b5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -144,9 +144,9 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, - int cq_num) + int cq_num, u8 opmod) { - return mlx4_cmd(dev, mailbox->dma, cq_num, 0, + return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } @@ -287,11 +287,61 @@ static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) __mlx4_cq_free_icm(dev, cqn); } +static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size) +{ + int entries_per_copy = PAGE_SIZE / cqe_size; + void *init_ents; + int err = 0; + int i; + + init_ents = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!init_ents) + return -ENOMEM; + + /* Populate a list of CQ entries to reduce the number of + * copy_to_user calls. 0xcc is the initialization value + * required by the FW. + */ + memset(init_ents, 0xcc, PAGE_SIZE); + + if (entries_per_copy < entries) { + for (i = 0; i < entries / entries_per_copy; i++) { + err = copy_to_user(buf, init_ents, PAGE_SIZE); + if (err) + goto out; + + buf += PAGE_SIZE; + } + } else { + err = copy_to_user(buf, init_ents, entries * cqe_size); + } + +out: + kfree(init_ents); + + return err; +} + +static void mlx4_init_kernel_cqes(struct mlx4_buf *buf, + int entries, + int cqe_size) +{ + int i; + + if (buf->nbufs == 1) + memset(buf->direct.buf, 0xcc, entries * cqe_size); + else + for (i = 0; i < buf->npages; i++) + memset(buf->page_list[i].buf, 0xcc, + 1UL << buf->page_shift); +} + int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, unsigned vector, int collapsed, - int timestamp_en) + int timestamp_en, void *buf_addr, bool user_cq) { + bool sw_cq_init = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SW_CQ_INIT; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; struct mlx4_cmd_mailbox *mailbox; @@ -336,7 +386,20 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); cq_context->db_rec_addr = cpu_to_be64(db_rec); - err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn); + if (sw_cq_init) { + if (user_cq) { + err = mlx4_init_user_cqes(buf_addr, nent, + dev->caps.cqe_size); + if (err) + sw_cq_init = false; + } else { + mlx4_init_kernel_cqes(buf_addr, nent, + dev->caps.cqe_size); + } + } + + err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn, sw_cq_init); + mlx4_free_cmd_mailbox(dev, mailbox); if (err) goto err_radix; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 1e487acb4667..062a88fcc5d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -143,7 +143,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.usage = MLX4_RES_USAGE_DRIVER; err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, - cq->vector, 0, timestamp_en); + cq->vector, 0, timestamp_en, &cq->wqres.buf, false); if (err) goto free_eq; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index babcfd9c0571..7df728f1e5b5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -166,6 +166,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [37] = "sl to vl mapping table change event support", [38] = "user MAC support", [39] = "Report driver version to FW support", + [40] = "SW CQ initialization support", }; int i; @@ -1098,6 +1099,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; if (field32 & (1 << 21)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS; + if (field32 & (1 << 23)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SW_CQ_INIT; for (i = 1; i <= dev_cap->num_ports; i++) { err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index dca6ab4eaa99..36e412c3d657 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -226,6 +226,7 @@ enum { MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37, MLX4_DEV_CAP_FLAG2_USER_MAC_EN = 1ULL << 38, MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1ULL << 39, + MLX4_DEV_CAP_FLAG2_SW_CQ_INIT = 1ULL << 40, }; enum { @@ -1136,7 +1137,8 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, - unsigned vector, int collapsed, int timestamp_en); + unsigned int vector, int collapsed, int timestamp_en, + void *buf_addr, bool user_cq); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 flags, u8 usage); -- cgit v1.2.3 From 89f579ce99f7e028e81885d3965f973c0f787611 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 22 Nov 2018 10:04:09 +0800 Subject: x86/headers: Fix -Wmissing-prototypes warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building the kernel with W=1 we get a lot of -Wmissing-prototypes warnings, which are trivial in nature and easy to fix - and which may mask some real future bugs if the prototypes get out of sync with the function definition. This patch fixes most of -Wmissing-prototypes warnings which are in the root directory of arch/x86/kernel, not including the subdirectories. These are the warnings fixed in this patch: arch/x86/kernel/signal.c:865:17: warning: no previous prototype for ‘sys32_x32_rt_sigreturn’ [-Wmissing-prototypes] arch/x86/kernel/signal_compat.c:164:6: warning: no previous prototype for ‘sigaction_compat_abi’ [-Wmissing-prototypes] arch/x86/kernel/traps.c:625:46: warning: no previous prototype for ‘sync_regs’ [-Wmissing-prototypes] arch/x86/kernel/traps.c:640:24: warning: no previous prototype for ‘fixup_bad_iret’ [-Wmissing-prototypes] arch/x86/kernel/traps.c:929:13: warning: no previous prototype for ‘trap_init’ [-Wmissing-prototypes] arch/x86/kernel/irq.c:270:28: warning: no previous prototype for ‘smp_x86_platform_ipi’ [-Wmissing-prototypes] arch/x86/kernel/irq.c:301:16: warning: no previous prototype for ‘smp_kvm_posted_intr_ipi’ [-Wmissing-prototypes] arch/x86/kernel/irq.c:314:16: warning: no previous prototype for ‘smp_kvm_posted_intr_wakeup_ipi’ [-Wmissing-prototypes] arch/x86/kernel/irq.c:328:16: warning: no previous prototype for ‘smp_kvm_posted_intr_nested_ipi’ [-Wmissing-prototypes] arch/x86/kernel/irq_work.c:16:28: warning: no previous prototype for ‘smp_irq_work_interrupt’ [-Wmissing-prototypes] arch/x86/kernel/irqinit.c:79:13: warning: no previous prototype for ‘init_IRQ’ [-Wmissing-prototypes] arch/x86/kernel/quirks.c:672:13: warning: no previous prototype for ‘early_platform_quirks’ [-Wmissing-prototypes] arch/x86/kernel/tsc.c:1499:15: warning: no previous prototype for ‘calibrate_delay_is_known’ [-Wmissing-prototypes] arch/x86/kernel/process.c:653:13: warning: no previous prototype for ‘arch_post_acpi_subsys_init’ [-Wmissing-prototypes] arch/x86/kernel/process.c:717:15: warning: no previous prototype for ‘arch_randomize_brk’ [-Wmissing-prototypes] arch/x86/kernel/process.c:784:6: warning: no previous prototype for ‘do_arch_prctl_common’ [-Wmissing-prototypes] arch/x86/kernel/reboot.c:869:6: warning: no previous prototype for ‘nmi_panic_self_stop’ [-Wmissing-prototypes] arch/x86/kernel/smp.c:176:27: warning: no previous prototype for ‘smp_reboot_interrupt’ [-Wmissing-prototypes] arch/x86/kernel/smp.c:260:28: warning: no previous prototype for ‘smp_reschedule_interrupt’ [-Wmissing-prototypes] arch/x86/kernel/smp.c:281:28: warning: no previous prototype for ‘smp_call_function_interrupt’ [-Wmissing-prototypes] arch/x86/kernel/smp.c:291:28: warning: no previous prototype for ‘smp_call_function_single_interrupt’ [-Wmissing-prototypes] arch/x86/kernel/ftrace.c:840:6: warning: no previous prototype for ‘arch_ftrace_update_trampoline’ [-Wmissing-prototypes] arch/x86/kernel/ftrace.c:934:7: warning: no previous prototype for ‘arch_ftrace_trampoline_func’ [-Wmissing-prototypes] arch/x86/kernel/ftrace.c:946:6: warning: no previous prototype for ‘arch_ftrace_trampoline_free’ [-Wmissing-prototypes] arch/x86/kernel/crash.c:114:6: warning: no previous prototype for ‘crash_smp_send_stop’ [-Wmissing-prototypes] arch/x86/kernel/crash.c:351:5: warning: no previous prototype for ‘crash_setup_memmap_entries’ [-Wmissing-prototypes] arch/x86/kernel/crash.c:424:5: warning: no previous prototype for ‘crash_load_segments’ [-Wmissing-prototypes] arch/x86/kernel/machine_kexec_64.c:372:7: warning: no previous prototype for ‘arch_kexec_kernel_image_load’ [-Wmissing-prototypes] arch/x86/kernel/paravirt-spinlocks.c:12:16: warning: no previous prototype for ‘__native_queued_spin_unlock’ [-Wmissing-prototypes] arch/x86/kernel/paravirt-spinlocks.c:18:6: warning: no previous prototype for ‘pv_is_native_spin_unlock’ [-Wmissing-prototypes] arch/x86/kernel/paravirt-spinlocks.c:24:16: warning: no previous prototype for ‘__native_vcpu_is_preempted’ [-Wmissing-prototypes] arch/x86/kernel/paravirt-spinlocks.c:30:6: warning: no previous prototype for ‘pv_is_native_vcpu_is_preempted’ [-Wmissing-prototypes] arch/x86/kernel/kvm.c:258:1: warning: no previous prototype for ‘do_async_page_fault’ [-Wmissing-prototypes] arch/x86/kernel/jailhouse.c:200:6: warning: no previous prototype for ‘jailhouse_paravirt’ [-Wmissing-prototypes] arch/x86/kernel/check.c:91:13: warning: no previous prototype for ‘setup_bios_corruption_check’ [-Wmissing-prototypes] arch/x86/kernel/check.c:139:6: warning: no previous prototype for ‘check_for_bios_corruption’ [-Wmissing-prototypes] arch/x86/kernel/devicetree.c:32:13: warning: no previous prototype for ‘early_init_dt_scan_chosen_arch’ [-Wmissing-prototypes] arch/x86/kernel/devicetree.c:42:13: warning: no previous prototype for ‘add_dtb’ [-Wmissing-prototypes] arch/x86/kernel/devicetree.c:108:6: warning: no previous prototype for ‘x86_of_pci_init’ [-Wmissing-prototypes] arch/x86/kernel/devicetree.c:314:13: warning: no previous prototype for ‘x86_dtb_init’ [-Wmissing-prototypes] arch/x86/kernel/tracepoint.c:16:5: warning: no previous prototype for ‘trace_pagefault_reg’ [-Wmissing-prototypes] arch/x86/kernel/tracepoint.c:22:6: warning: no previous prototype for ‘trace_pagefault_unreg’ [-Wmissing-prototypes] arch/x86/kernel/head64.c:113:22: warning: no previous prototype for ‘__startup_64’ [-Wmissing-prototypes] arch/x86/kernel/head64.c:262:15: warning: no previous prototype for ‘__startup_secondary_64’ [-Wmissing-prototypes] arch/x86/kernel/head64.c:350:12: warning: no previous prototype for ‘early_make_pgtable’ [-Wmissing-prototypes] [ mingo: rewrote the changelog, fixed build errors. ] Signed-off-by: Yi Wang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andy.shevchenko@gmail.com Cc: anton@enomsg.org Cc: ard.biesheuvel@linaro.org Cc: bhe@redhat.com Cc: bhelgaas@google.com Cc: bp@alien8.de Cc: ccross@android.com Cc: devicetree@vger.kernel.org Cc: douly.fnst@cn.fujitsu.com Cc: dwmw@amazon.co.uk Cc: dyoung@redhat.com Cc: ebiederm@xmission.com Cc: frank.rowand@sony.com Cc: frowand.list@gmail.com Cc: ivan.gorinov@intel.com Cc: jailhouse-dev@googlegroups.com Cc: jan.kiszka@siemens.com Cc: jgross@suse.com Cc: jroedel@suse.de Cc: keescook@chromium.org Cc: kexec@lists.infradead.org Cc: konrad.wilk@oracle.com Cc: kvm@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: luto@kernel.org Cc: m.mizuma@jp.fujitsu.com Cc: namit@vmware.com Cc: oleg@redhat.com Cc: pasha.tatashin@oracle.com Cc: pbonzini@redhat.com Cc: prarit@redhat.com Cc: pravin.shedge4linux@gmail.com Cc: rajvi.jingar@intel.com Cc: rkrcmar@redhat.com Cc: robh+dt@kernel.org Cc: robh@kernel.org Cc: rostedt@goodmis.org Cc: takahiro.akashi@linaro.org Cc: thomas.lendacky@amd.com Cc: tony.luck@intel.com Cc: up2wing@gmail.com Cc: virtualization@lists.linux-foundation.org Cc: zhe.he@windriver.com Cc: zhong.weidong@zte.com.cn Link: http://lkml.kernel.org/r/1542852249-19820-1-git-send-email-wang.yi59@zte.com.cn Signed-off-by: Ingo Molnar --- arch/x86/include/asm/crash.h | 1 + arch/x86/include/asm/irq.h | 7 +++++++ arch/x86/include/asm/irq_work.h | 1 + arch/x86/include/asm/kvm_para.h | 1 + arch/x86/include/asm/paravirt.h | 5 +++++ arch/x86/include/asm/reboot.h | 1 + arch/x86/include/asm/sighandling.h | 5 +++++ arch/x86/include/asm/smp.h | 6 ++++++ arch/x86/include/asm/traps.h | 4 ++++ arch/x86/include/asm/tsc.h | 1 + arch/x86/kernel/check.c | 3 ++- arch/x86/kernel/crash.c | 1 + arch/x86/kernel/devicetree.c | 1 + arch/x86/kernel/jailhouse.c | 1 + arch/x86/kernel/process.c | 3 +++ arch/x86/kernel/quirks.c | 1 + arch/x86/kernel/tracepoint.c | 1 + include/linux/ftrace.h | 3 +++ include/linux/kexec.h | 1 + include/linux/of_fdt.h | 1 + include/linux/ptrace.h | 1 + 21 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index a7adb2bfbf0b..0acf5ee45a21 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -6,5 +6,6 @@ int crash_load_segments(struct kimage *image); int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); +void crash_smp_send_stop(void); #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 2395bb794c7b..fbb16e6b6c18 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -30,6 +30,9 @@ extern void fixup_irqs(void); #ifdef CONFIG_HAVE_KVM extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); +extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs); +extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs); +extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs); #endif extern void (*x86_platform_ipi_callback)(void); @@ -41,9 +44,13 @@ extern __visible unsigned int do_IRQ(struct pt_regs *regs); extern void init_ISA_irqs(void); +extern void __init init_IRQ(void); + #ifdef CONFIG_X86_LOCAL_APIC void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); + +extern __visible void smp_x86_platform_ipi(struct pt_regs *regs); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 800ffce0db29..80b35e3adf03 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -10,6 +10,7 @@ static inline bool arch_irq_work_has_interrupt(void) return boot_cpu_has(X86_FEATURE_APIC); } extern void arch_irq_work_raise(void); +extern __visible void smp_irq_work_interrupt(struct pt_regs *regs); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 4c723632c036..5ed3cf1c3934 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -92,6 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); +void do_async_page_fault(struct pt_regs *regs, unsigned long error_code); #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4bf42f9e4eea..a97f28d914d5 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -26,6 +26,11 @@ struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; +__visible void __native_queued_spin_unlock(struct qspinlock *lock); +bool pv_is_native_spin_unlock(void); +__visible bool __native_vcpu_is_preempted(long cpu); +bool pv_is_native_vcpu_is_preempted(void); + static inline u64 paravirt_steal_clock(int cpu) { return PVOP_CALL1(u64, time.steal_clock, cpu); diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index a671a1145906..04c17be9b5fd 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -26,6 +26,7 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_APM 1 typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); +void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); void run_crash_ipi_callback(struct pt_regs *regs); diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index bd26834724e5..2fcbd6f33ef7 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -17,4 +17,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where); int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask); + +#ifdef CONFIG_X86_X32_ABI +asmlinkage long sys32_x32_rt_sigreturn(void); +#endif + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 547c4fe50711..2e95b6c1bca3 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -148,6 +148,12 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); + +asmlinkage __visible void smp_reboot_interrupt(void); +__visible void smp_reschedule_interrupt(struct pt_regs *regs); +__visible void smp_call_function_interrupt(struct pt_regs *regs); +__visible void smp_call_function_single_interrupt(struct pt_regs *r); + #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 877afdedbbc5..5fcdf5687406 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -75,6 +75,10 @@ dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code) dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code); +asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); +asmlinkage __visible notrace +struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); +void __init trap_init(void); #endif dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index eb5bbfeccb66..8a0c25c6bf09 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -35,6 +35,7 @@ extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); extern void tsc_early_init(void); extern void tsc_init(void); +extern unsigned long calibrate_delay_is_known(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 1979a76bfadd..5136e6818da8 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -9,6 +9,7 @@ #include #include +#include /* * Some BIOSes seem to corrupt the low 64k of memory during events @@ -136,7 +137,7 @@ void __init setup_bios_corruption_check(void) } -void check_for_bios_corruption(void) +static void check_for_bios_corruption(void) { int i; int corruption = 0; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index f631a3f15587..c8b07d8ea5a2 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -37,6 +37,7 @@ #include #include #include +#include /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data { diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 7299dcbf8e85..8d85e00bb40a 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -23,6 +23,7 @@ #include #include #include +#include __initdata u64 initial_dtb; char __initdata cmd_line[COMMAND_LINE_SIZE]; diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 108c48d0d40e..1b2ee55a2dfb 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -19,6 +19,7 @@ #include #include #include +#include static __initdata struct jailhouse_setup_data setup_data; static unsigned int precalibrated_tsc_khz; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3c3ee8982577..b7cb5348f37f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 736348ead421..8451f38ad399 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -7,6 +7,7 @@ #include #include +#include #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 5bd30c442794..2e85f4dcf77b 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -10,6 +10,7 @@ #include #include +#include DEFINE_STATIC_KEY_FALSE(trace_pagefault_key); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a397907e8d72..182d669cc918 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -420,6 +420,9 @@ enum { }; void arch_ftrace_update_code(int command); +void arch_ftrace_update_trampoline(struct ftrace_ops *ops); +void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec); +void arch_ftrace_trampoline_free(struct ftrace_ops *ops); struct ftrace_rec_iter; diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 9e4e638fb505..53efedae3d5b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -174,6 +174,7 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, bool get_value); void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); +void * __weak arch_kexec_kernel_image_load(struct kimage *image); int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, const Elf_Shdr *relsec, diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index b9cd9ebdf9b9..a713e5d156d8 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -76,6 +76,7 @@ extern int early_init_dt_scan_memory(unsigned long node, const char *uname, extern int early_init_dt_scan_chosen_stdout(void); extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_fdt_reserve_self(void); +extern void __init early_init_dt_scan_chosen_arch(unsigned long node); extern void early_init_dt_add_memory_arch(u64 base, u64 size); extern int early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size); extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 6c2ffed907f5..a37d7c00da65 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -428,4 +428,5 @@ extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); +extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); #endif -- cgit v1.2.3 From 58c5fc2b96e4ae65068d815a1c3ca81da92fa1c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 31 Oct 2018 19:21:08 +0100 Subject: time: Remove useless filenames in top level comments Remove the pointless filenames in the top level comments. They have no value at all and just occupy space. While at it tidy up some of the comments and remove a stale one. Signed-off-by: Thomas Gleixner Acked-by: Nicolas Pitre Acked-by: Kees Cook Acked-by: Ingo Molnar Acked-by: John Stultz Acked-by: Corey Minyard Cc: Peter Zijlstra Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Peter Anvin Cc: Russell King Cc: Richard Cochran Cc: "Paul E. McKenney" Cc: David Riley Cc: Colin Cross Cc: Mark Brown Link: https://lkml.kernel.org/r/20181031182252.794898238@linutronix.de --- include/linux/hrtimer.h | 2 -- kernel/time/clockevents.c | 2 -- kernel/time/clocksource.c | 5 ----- kernel/time/hrtimer.c | 16 ++++------------ kernel/time/itimer.c | 2 -- kernel/time/jiffies.c | 2 -- kernel/time/posix-clock.c | 2 +- kernel/time/posix-timers.c | 4 ---- kernel/time/sched_clock.c | 4 ++-- kernel/time/tick-broadcast-hrtimer.c | 4 +--- kernel/time/tick-broadcast.c | 2 -- kernel/time/tick-common.c | 2 -- kernel/time/tick-oneshot.c | 2 -- kernel/time/tick-sched.c | 2 -- kernel/time/time.c | 12 ++++-------- kernel/time/timecounter.c | 6 +----- kernel/time/timekeeping.c | 10 ++-------- kernel/time/timer.c | 2 -- kernel/time/timer_list.c | 2 -- 19 files changed, 15 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3892e9c8b2de..50ebe2ad43e0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -1,6 +1,4 @@ /* - * include/linux/hrtimer.h - * * hrtimers - High-resolution kernel timers * * Copyright(C) 2005, Thomas Gleixner diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index af58898d9ebf..9b8c7c0fd113 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -1,6 +1,4 @@ /* - * linux/kernel/time/clockevents.c - * * This file contains functions which manage clock event devices. * * Copyright(C) 2005-2006, Thomas Gleixner diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index ffe081623aec..1c5273fbd500 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -1,6 +1,4 @@ /* - * linux/kernel/time/clocksource.c - * * This file contains the functions which manage clocksource drivers. * * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) @@ -18,9 +16,6 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * TODO WishList: - * o Allow clocksource drivers to be unregistered */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 9cdd74bd2d27..223548bb81c6 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1,26 +1,18 @@ /* - * linux/kernel/hrtimer.c - * * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * * High-resolution kernel timers * - * In contrast to the low-resolution timeout API implemented in - * kernel/timer.c, hrtimers provide finer resolution and accuracy - * depending on system configuration and capabilities. - * - * These timers are currently used for: - * - itimers - * - POSIX timers - * - nanosleep - * - precise in-kernel timing + * In contrast to the low-resolution timeout API, aka timer wheel, + * hrtimers provide finer resolution and accuracy depending on system + * configuration and capabilities. * * Started by: Thomas Gleixner and Ingo Molnar * * Credits: - * based on kernel/timer.c + * Based on the original timer wheel code * * Help, testing, suggestions, bugfixes, improvements were * provided by: diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 9a65713c8309..02068b2d5862 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* - * linux/kernel/itimer.c - * * Copyright (C) 1992 Darren Senn */ diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 497719127bf9..9c3957fe9317 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -1,6 +1,4 @@ /*********************************************************************** -* linux/kernel/time/jiffies.c -* * This file contains the jiffies based clocksource. * * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index fe56c4e06c51..4959815f4fd7 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -1,5 +1,5 @@ /* - * posix-clock.c - support for dynamic clock devices + * Support for dynamic clock devices * * Copyright (C) 2010 OMICRON electronics GmbH * diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index bd62b5eeb5a0..c72307c119d9 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1,10 +1,6 @@ /* - * linux/kernel/posix-timers.c - * - * * 2002-10-15 Posix Clocks & timers * by George Anzinger george@mvista.com - * * Copyright (C) 2002 2003 by MontaVista Software. * * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index cbc72c2c1fca..b38b6628f89b 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -1,6 +1,6 @@ /* - * sched_clock.c: Generic sched_clock() support, to extend low level - * hardware time counters to full 64-bit ns values. + * Generic sched_clock() support, to extend low level hardware time + * counters to full 64-bit ns values. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index a59641fb88b6..5be6154e2fd2 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * linux/kernel/time/tick-broadcast-hrtimer.c - * This file emulates a local clock event device - * via a pseudo clock device. + * Emulate a local clock event device via a pseudo clock device. */ #include #include diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index be0aac2b4300..4f5abde2dfa7 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1,6 +1,4 @@ /* - * linux/kernel/time/tick-broadcast.c - * * This file contains functions which emulate a local clock-event * device via a broadcast event source. * diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 14de3727b18e..7b5008039c2d 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -1,6 +1,4 @@ /* - * linux/kernel/time/tick-common.c - * * This file contains the base functions to manage periodic tick * related events. * diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 6fe615d57ebb..77989efe13d2 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -1,6 +1,4 @@ /* - * linux/kernel/time/tick-oneshot.c - * * This file contains functions which manage high resolution tick * related events. * diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 69e673b88474..cb557e56a19f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1,6 +1,4 @@ /* - * linux/kernel/time/tick-sched.c - * * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner diff --git a/kernel/time/time.c b/kernel/time/time.c index ad204cf6d001..13ffa9950ffc 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -1,14 +1,10 @@ /* - * linux/kernel/time.c - * * Copyright (C) 1991, 1992 Linus Torvalds * - * This file contains the interface functions for the various - * time related system calls: time, stime, gettimeofday, settimeofday, - * adjtime - */ -/* - * Modification history kernel/time.c + * This file contains the interface functions for the various time related + * system calls: time, stime, gettimeofday, settimeofday, adjtime + * + * Modification history: * * 1993-09-02 Philip Gladstone * Created file with time related functions from sched/core.c and adjtimex() diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c index 8afd78932bdf..400f3456d564 100644 --- a/kernel/time/timecounter.c +++ b/kernel/time/timecounter.c @@ -1,8 +1,5 @@ /* - * linux/kernel/time/timecounter.c - * - * based on code that migrated away from - * linux/kernel/time/clocksource.c + * Based on clocksource code. See commit 74d23cc704d1 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,7 +11,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ - #include #include diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2d110c948805..30fdf48f50c2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1,13 +1,7 @@ /* - * linux/kernel/time/timekeeping.c - * - * Kernel timekeeping code and accessor functions - * - * This code was moved from linux/kernel/timer.c. - * Please see that file for copyright and history logs. - * + * Kernel timekeeping code and accessor functions. Based on code from + * timer.c, moved in commit 8524070b7982. */ - #include #include #include diff --git a/kernel/time/timer.c b/kernel/time/timer.c index fa49cd753dea..2f248bbedb4a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1,6 +1,4 @@ /* - * linux/kernel/timer.c - * * Kernel internal timers * * Copyright (C) 1991, 1992 Linus Torvalds diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index d647dabdac97..5d64fff384c8 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -1,6 +1,4 @@ /* - * kernel/time/timer_list.c - * * List pending timers * * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar -- cgit v1.2.3 From 35728b8209ee7d25b6241a56304ee926469bd154 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 31 Oct 2018 19:21:09 +0100 Subject: time: Add SPDX license identifiers Update the time(r) core files files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Philippe Ombredanne, Kate Stewart and myself. The data has been created with two independent license scanners and manual inspection. The following files do not contain any direct license information and have been omitted from the big initial SPDX changes: timeconst.bc: The .bc files were not touched time.c, timer.c, timekeeping.c: Licence was deduced from EXPORT_SYMBOL_GPL As those files do not contain direct license references they fall under the project license, i.e. GPL V2 only. Signed-off-by: Thomas Gleixner Acked-by: Kees Cook Acked-by: Ingo Molnar Acked-by: John Stultz Acked-by: Corey Minyard Cc: Peter Zijlstra Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Russell King Cc: Richard Cochran Cc: Nicolas Pitre Cc: David Riley Cc: Colin Cross Cc: Mark Brown Cc: H. Peter Anvin Cc: Paul E. McKenney Link: https://lkml.kernel.org/r/20181031182252.879109557@linutronix.de --- include/linux/hrtimer.h | 1 + kernel/time/alarmtimer.c | 1 + kernel/time/clockevents.c | 1 + kernel/time/clocksource.c | 1 + kernel/time/hrtimer.c | 1 + kernel/time/jiffies.c | 1 + kernel/time/posix-clock.c | 1 + kernel/time/posix-stubs.c | 1 + kernel/time/posix-timers.c | 1 + kernel/time/sched_clock.c | 1 + kernel/time/test_udelay.c | 1 + kernel/time/tick-broadcast.c | 1 + kernel/time/tick-common.c | 1 + kernel/time/tick-oneshot.c | 1 + kernel/time/tick-sched.c | 1 + kernel/time/time.c | 1 + kernel/time/timeconst.bc | 2 ++ kernel/time/timeconv.c | 1 + kernel/time/timecounter.c | 1 + kernel/time/timekeeping.c | 1 + kernel/time/timekeeping_debug.c | 1 + kernel/time/timer.c | 1 + kernel/time/timer_list.c | 1 + 23 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 50ebe2ad43e0..851e4231d3ab 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * hrtimers - High-resolution kernel timers * diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index fa5de5e8de61..69070d399d70 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Alarmtimer interface * diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 9b8c7c0fd113..0fdbdf17f8a2 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file contains functions which manage clock event devices. * diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c5273fbd500..b1abeac5f3f7 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * This file contains the functions which manage clocksource drivers. * diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 223548bb81c6..16dacc8d3ca2 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 9c3957fe9317..0deb0be2c445 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /*********************************************************************** * This file contains the jiffies based clocksource. * diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 4959815f4fd7..339e35e4605f 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Support for dynamic clock devices * diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 989ccf028bde..b9f9f6f02e11 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Dummy stubs used when CONFIG_POSIX_TIMERS=n * diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index c72307c119d9..e8cd9aa6c9cf 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * 2002-10-15 Posix Clocks & timers * by George Anzinger george@mvista.com diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index b38b6628f89b..11570ba451cc 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Generic sched_clock() support, to extend low level hardware time * counters to full 64-bit ns values. diff --git a/kernel/time/test_udelay.c b/kernel/time/test_udelay.c index b0928ab3270f..d6a87bb2040f 100644 --- a/kernel/time/test_udelay.c +++ b/kernel/time/test_udelay.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * udelay() test kernel module * diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 4f5abde2dfa7..f4725f53d852 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file contains functions which emulate a local clock-event * device via a broadcast event source. diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7b5008039c2d..455b8d65a2b7 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file contains the base functions to manage periodic tick * related events. diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 77989efe13d2..1c8ad0fb33c0 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file contains functions which manage high resolution tick * related events. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cb557e56a19f..62ecb2a802ca 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar diff --git a/kernel/time/time.c b/kernel/time/time.c index 13ffa9950ffc..5aa0a156e331 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds * diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc index f83bbb81600b..7ed0e0fb5831 100644 --- a/kernel/time/timeconst.bc +++ b/kernel/time/timeconst.bc @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + scale=0 define gcd(a,b) { diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c index 7142580ad94f..589e0a552129 100644 --- a/kernel/time/timeconv.c +++ b/kernel/time/timeconv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: LGPL-2.0+ /* * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. * This file is part of the GNU C Library. diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c index 400f3456d564..933462326489 100644 --- a/kernel/time/timecounter.c +++ b/kernel/time/timecounter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Based on clocksource code. See commit 74d23cc704d1 * diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 30fdf48f50c2..cd02bd38cf2d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Kernel timekeeping code and accessor functions. Based on code from * timer.c, moved in commit 8524070b7982. diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index 238e4be60229..d06f09209fb7 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * debugfs file to track time spent in suspend * diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2f248bbedb4a..444156debfa0 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Kernel internal timers * diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 5d64fff384c8..f81693cdf981 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * List pending timers * -- cgit v1.2.3 From f49c174b5f431db9fa17315269e288d4548b651c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 31 Oct 2018 19:21:10 +0100 Subject: hrtimers/tick/clockevents: Remove sloppy license references "For licencing details see kernel-base/COPYING" and similar license references have no value over the SPDX identifier. Remove them. Signed-off-by: Thomas Gleixner Acked-by: Kees Cook Acked-by: Ingo Molnar Acked-by: John Stultz Acked-by: Corey Minyard Cc: Peter Zijlstra Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Peter Anvin Cc: Russell King Cc: Richard Cochran Cc: "Paul E. McKenney" Cc: Nicolas Pitre Cc: David Riley Cc: Colin Cross Cc: Mark Brown Link: https://lkml.kernel.org/r/20181031182252.963632760@linutronix.de --- include/linux/hrtimer.h | 2 -- kernel/time/clockevents.c | 3 --- kernel/time/hrtimer.c | 2 -- kernel/time/tick-broadcast.c | 3 --- kernel/time/tick-common.c | 3 --- kernel/time/tick-oneshot.c | 3 --- kernel/time/tick-sched.c | 2 -- kernel/time/timer_list.c | 4 ---- 8 files changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 851e4231d3ab..2e8957eac4d4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -8,8 +8,6 @@ * data type definitions, declarations, prototypes * * Started by: Thomas Gleixner and Ingo Molnar - * - * For licencing details see kernel-base/COPYING */ #ifndef _LINUX_HRTIMER_H #define _LINUX_HRTIMER_H diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 0fdbdf17f8a2..5e77662dd2d9 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -5,9 +5,6 @@ * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner - * - * This code is licenced under the GPL version 2. For details see - * kernel-base/COPYING. */ #include diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 16dacc8d3ca2..f5cfa1b73d6f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -20,8 +20,6 @@ * * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel * et. al. - * - * For licencing details see kernel-base/COPYING */ #include diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f4725f53d852..803fa67aace9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -6,9 +6,6 @@ * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner - * - * This code is licenced under the GPL version 2. For details see - * kernel-base/COPYING. */ #include #include diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 455b8d65a2b7..529143b4c8d2 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -6,9 +6,6 @@ * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner - * - * This code is licenced under the GPL version 2. For details see - * kernel-base/COPYING. */ #include #include diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 1c8ad0fb33c0..f9745d47425a 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -6,9 +6,6 @@ * Copyright(C) 2005-2006, Thomas Gleixner * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner - * - * This code is licenced under the GPL version 2. For details see - * kernel-base/COPYING. */ #include #include diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 62ecb2a802ca..6fa52cd6df0b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -7,8 +7,6 @@ * No idle tick implementation for low and high resolution timers * * Started by: Thomas Gleixner and Ingo Molnar - * - * Distribute under GPLv2. */ #include #include diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index f81693cdf981..98ba50dcb1b2 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -3,10 +3,6 @@ * List pending timers * * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include -- cgit v1.2.3 From 7b0c03ecc42fb223baf015877fee9d517c2c8af1 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 17 Nov 2018 17:17:21 +0100 Subject: dmaengine: dw-dmac: implement dma protection control setting This patch adds a new device-tree property that allows to specify the dma protection control bits for the all of the DMA controller's channel uniformly. Setting the "correct" bits can have a huge impact on the PPC460EX and APM82181 that use this DMA engine in combination with a DesignWare' SATA-II core (sata_dwc_460ex driver). In the OpenWrt Forum, the user takimata reported that: |It seems your patch unleashed the full power of the SATA port. |Where I was previously hitting a really hard limit at around |82 MB/s for reading and 27 MB/s for writing, I am now getting this: | |root@OpenWrt:/mnt# time dd if=/dev/zero of=tempfile bs=1M count=1024 |1024+0 records in |1024+0 records out |real 0m 13.65s |user 0m 0.01s |sys 0m 11.89s | |root@OpenWrt:/mnt# time dd if=tempfile of=/dev/null bs=1M count=1024 |1024+0 records in |1024+0 records out |real 0m 8.41s |user 0m 0.01s |sys 0m 4.70s | |This means: 121 MB/s reading and 75 MB/s writing! | |The drive is a WD Green WD10EARX taken from an older MBL Single. |I repeated the test a few times with even larger files to rule out |any caching, I'm still seeing the same great performance. OpenWrt is |now completely on par with the original MBL firmware's performance. Another user And.short reported: |I can report that your fix worked! Boots up fine with two |drives even with more partitions, and no more reboot on |concurrent disk access! A closer look into the sata_dwc_460ex code revealed that the driver did initally set the correct protection control bits. However, this feature was lost when the sata_dwc_460ex driver was converted to the generic DMA driver framework. BugLink: https://forum.openwrt.org/t/wd-mybook-live-duo-two-disks/16195/55 BugLink: https://forum.openwrt.org/t/wd-mybook-live-duo-two-disks/16195/50 Fixes: 8b3444852a2b ("sata_dwc_460ex: move to generic DMA driver") Reviewed-by: Andy Shevchenko Signed-off-by: Christian Lamparter Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 2 ++ drivers/dma/dw/platform.c | 6 ++++++ drivers/dma/dw/regs.h | 4 ++++ include/linux/platform_data/dma-dw.h | 6 ++++++ 4 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index d0c3e50b39fb..2c5ca1961256 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -160,12 +160,14 @@ static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc) static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc) { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); bool hs_polarity = dwc->dws.hs_polarity; cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); + cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); /* Set polarity of handshake interface */ cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index f01b2c173fa6..31ff8113c3de 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -162,6 +162,12 @@ dw_dma_parse_dt(struct platform_device *pdev) pdata->multi_block[tmp] = 1; } + if (!of_property_read_u32(np, "snps,dma-protection-control", &tmp)) { + if (tmp > CHAN_PROTCTL_MASK) + return NULL; + pdata->protctl = tmp; + } + return pdata; } #else diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 09e7dfdbb790..646c9c960c07 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -200,6 +200,10 @@ enum dw_dma_msize { #define DWC_CFGH_FCMODE (1 << 0) #define DWC_CFGH_FIFO_MODE (1 << 1) #define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_PROTCTL_DATA (0 << 2) /* data access - always set */ +#define DWC_CFGH_PROTCTL_PRIV (1 << 2) /* privileged -> AHB HPROT[1] */ +#define DWC_CFGH_PROTCTL_BUFFER (2 << 2) /* bufferable -> AHB HPROT[2] */ +#define DWC_CFGH_PROTCTL_CACHE (4 << 2) /* cacheable -> AHB HPROT[3] */ #define DWC_CFGH_DS_UPD_EN (1 << 5) #define DWC_CFGH_SS_UPD_EN (1 << 6) #define DWC_CFGH_SRC_PER(x) ((x) << 7) diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 896cb71a382c..1a1d58ebffbf 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -49,6 +49,7 @@ struct dw_dma_slave { * @data_width: Maximum data width supported by hardware per AHB master * (in bytes, power of 2) * @multi_block: Multi block transfers supported by hardware per channel. + * @protctl: Protection control signals setting per channel. */ struct dw_dma_platform_data { unsigned int nr_channels; @@ -65,6 +66,11 @@ struct dw_dma_platform_data { unsigned char nr_masters; unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; unsigned char multi_block[DW_DMA_MAX_NR_CHANNELS]; +#define CHAN_PROTCTL_PRIVILEGED BIT(0) +#define CHAN_PROTCTL_BUFFERABLE BIT(1) +#define CHAN_PROTCTL_CACHEABLE BIT(2) +#define CHAN_PROTCTL_MASK GENMASK(2, 0) + unsigned char protctl; }; #endif /* _PLATFORM_DATA_DMA_DW_H */ -- cgit v1.2.3 From 2183435c251e09df11e1b431c84416424b5fd2ac Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 24 Nov 2018 12:01:41 +0300 Subject: net: fixup type in netdev_start_xmit() Return code should be formally "netdev_tx_t". Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4b4207ebd5c0..1eeb019d85a3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4373,7 +4373,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi struct netdev_queue *txq, bool more) { const struct net_device_ops *ops = dev->netdev_ops; - int rc; + netdev_tx_t rc; rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) -- cgit v1.2.3 From 620344c43edfa020bbadfd81a144ebe5181fc94f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 25 Nov 2018 14:30:29 +0100 Subject: net: core: add __netdev_sent_queue as variant of __netdev_tx_sent_queue Similar to netdev_sent_queue add helper __netdev_sent_queue as variant of __netdev_tx_sent_queue. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1eeb019d85a3..9b00043effa3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3226,6 +3226,14 @@ static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes); } +static inline bool __netdev_sent_queue(struct net_device *dev, + unsigned int bytes, + bool xmit_more) +{ + return __netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes, + xmit_more); +} + static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, unsigned int pkts, unsigned int bytes) { -- cgit v1.2.3 From 4bffc669d6248d655aeb985a0e51bfaaf21c8b40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 25 Nov 2018 08:26:23 -0800 Subject: net: remove unsafe skb_insert() I do not see how one can effectively use skb_insert() without holding some kind of lock. Otherwise other cpus could have changed the list right before we have a chance of acquiring list->lock. Only existing user is in drivers/infiniband/hw/nes/nes_mgt.c and this one probably meant to use __skb_insert() since it appears nesqp->pau_list is protected by nesqp->pau_lock. This looks like nesqp->pau_lock could be removed, since nesqp->pau_list.lock could be used instead. Signed-off-by: Eric Dumazet Cc: Faisal Latif Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma Signed-off-by: David S. Miller --- drivers/infiniband/hw/nes/nes_mgt.c | 4 ++-- include/linux/skbuff.h | 2 -- net/core/skbuff.c | 22 ---------------------- 3 files changed, 2 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index fc0c191014e9..cc4dce5c3e5f 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -551,14 +551,14 @@ static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct ne /* Queue skb by sequence number */ if (skb_queue_len(&nesqp->pau_list) == 0) { - skb_queue_head(&nesqp->pau_list, skb); + __skb_queue_head(&nesqp->pau_list, skb); } else { skb_queue_walk(&nesqp->pau_list, tmpskb) { cb = (struct nes_rskb_cb *)&tmpskb->cb[0]; if (before(seqnum, cb->seqnum)) break; } - skb_insert(tmpskb, skb, &nesqp->pau_list); + __skb_insert(skb, tmpskb->prev, tmpskb, &nesqp->pau_list); } if (nesqp->pau_state == PAU_READY) process_it = true; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f17a7452ac7b..73902acf2b71 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1749,8 +1749,6 @@ static inline void skb_queue_head_init_class(struct sk_buff_head *list, * The "__skb_xxxx()" functions are the non-atomic ones that * can only be called with interrupts disabled. */ -void skb_insert(struct sk_buff *old, struct sk_buff *newsk, - struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9a8a72cefe9b..02cd7ae3d0fb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2990,28 +2990,6 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head } EXPORT_SYMBOL(skb_append); -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * @list: list to use - * - * Place a packet before a given packet in a list. The list locks are - * taken and this function is atomic with respect to other list locked - * calls. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_insert(newsk, old->prev, old, list); - spin_unlock_irqrestore(&list->lock, flags); -} -EXPORT_SYMBOL(skb_insert); - static inline void skb_split_inside_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, const int pos) -- cgit v1.2.3 From 7f7c548c5f652375a61c1072bac3db11f7a48326 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 9 Oct 2018 14:43:18 +0000 Subject: usb: gadget: f_fs: Add support for CCID descriptors. Nothing to remap, only check length. Define a minimal structure for CCID descriptor only used to check length. As this descriptor shares the same value as HID descriptors, keep track and compare current interface's class to expected HID and CCID standard values. Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 29 ++++++++++++++++------ include/linux/usb/ccid.h | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 include/linux/usb/ccid.h (limited to 'include/linux') diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 31e8bf3578c8..65b72e5c4605 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -1926,7 +1927,7 @@ typedef int (*ffs_os_desc_callback)(enum ffs_os_desc_type entity, static int __must_check ffs_do_single_desc(char *data, unsigned len, ffs_entity_callback entity, - void *priv) + void *priv, int *current_class) { struct usb_descriptor_header *_ds = (void *)data; u8 length; @@ -1984,6 +1985,7 @@ static int __must_check ffs_do_single_desc(char *data, unsigned len, __entity(INTERFACE, ds->bInterfaceNumber); if (ds->iInterface) __entity(STRING, ds->iInterface); + *current_class = ds->bInterfaceClass; } break; @@ -1997,11 +1999,22 @@ static int __must_check ffs_do_single_desc(char *data, unsigned len, } break; - case HID_DT_HID: - pr_vdebug("hid descriptor\n"); - if (length != sizeof(struct hid_descriptor)) - goto inv_length; - break; + case USB_TYPE_CLASS | 0x01: + if (*current_class == USB_INTERFACE_CLASS_HID) { + pr_vdebug("hid descriptor\n"); + if (length != sizeof(struct hid_descriptor)) + goto inv_length; + break; + } else if (*current_class == USB_INTERFACE_CLASS_CCID) { + pr_vdebug("ccid descriptor\n"); + if (length != sizeof(struct ccid_descriptor)) + goto inv_length; + break; + } else { + pr_vdebug("unknown descriptor: %d for class %d\n", + _ds->bDescriptorType, *current_class); + return -EINVAL; + } case USB_DT_OTG: if (length != sizeof(struct usb_otg_descriptor)) @@ -2058,6 +2071,7 @@ static int __must_check ffs_do_descs(unsigned count, char *data, unsigned len, { const unsigned _len = len; unsigned long num = 0; + int current_class = -1; ENTER(); @@ -2078,7 +2092,8 @@ static int __must_check ffs_do_descs(unsigned count, char *data, unsigned len, if (!data) return _len - len; - ret = ffs_do_single_desc(data, len, entity, priv); + ret = ffs_do_single_desc(data, len, entity, priv, + ¤t_class); if (unlikely(ret < 0)) { pr_debug("%s returns %d\n", __func__, ret); return ret; diff --git a/include/linux/usb/ccid.h b/include/linux/usb/ccid.h new file mode 100644 index 000000000000..3431446d6864 --- /dev/null +++ b/include/linux/usb/ccid.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Vincent Pelletier + */ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __CCID_H +#define __CCID_H + +#include + +#define USB_INTERFACE_CLASS_CCID 0x0b + +struct ccid_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __le16 bcdCCID; + __u8 bMaxSlotIndex; + __u8 bVoltageSupport; + __le32 dwProtocols; + __le32 dwDefaultClock; + __le32 dwMaximumClock; + __u8 bNumClockSupported; + __le32 dwDataRate; + __le32 dwMaxDataRate; + __u8 bNumDataRatesSupported; + __le32 dwMaxIFSD; + __le32 dwSynchProtocols; + __le32 dwMechanical; + __le32 dwFeatures; + __le32 dwMaxCCIDMessageLength; + __u8 bClassGetResponse; + __u8 bClassEnvelope; + __le16 wLcdLayout; + __u8 bPINSupport; + __u8 bMaxCCIDBusySlots; +} __attribute__ ((packed)); + +#endif /* __CCID_H */ -- cgit v1.2.3 From 1052b8ac5282daf35df331edcbdb645839d17e6a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Nov 2018 08:21:49 -0700 Subject: blk-mq: when polling for IO, look for any completion If we want to support async IO polling, then we have to allow finding completions that aren't just for the one we are looking for. Always pass in -1 to the mq_ops->poll() helper, and have that return how many events were found in this poll loop. Signed-off-by: Jens Axboe --- block/blk-core.c | 13 +++++++-- block/blk-mq.c | 71 ++++++++++++++++++++++++------------------------ drivers/nvme/host/pci.c | 14 +++++----- drivers/nvme/host/rdma.c | 39 ++++++++++---------------- include/linux/blkdev.h | 2 +- 5 files changed, 70 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 04f5be473638..03c4202b69bf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1273,10 +1273,19 @@ blk_qc_t submit_bio(struct bio *bio) } EXPORT_SYMBOL(submit_bio); -bool blk_poll(struct request_queue *q, blk_qc_t cookie) +/** + * blk_poll - poll for IO completions + * @q: the queue + * @cookie: cookie passed back at IO submission time + * + * Description: + * Poll for completions on the passed in queue. Returns number of + * completed entries found. + */ +int blk_poll(struct request_queue *q, blk_qc_t cookie) { if (!q->poll_fn || !blk_qc_t_valid(cookie)) - return false; + return 0; if (current->plug) blk_flush_plug_list(current->plug, false); diff --git a/block/blk-mq.c b/block/blk-mq.c index b16204df65d1..ec6c79578332 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3285,15 +3285,12 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, return false; /* - * poll_nsec can be: + * If we get here, hybrid polling is enabled. Hence poll_nsec can be: * - * -1: don't ever hybrid sleep * 0: use half of prev avg * >0: use this specific value */ - if (q->poll_nsec == -1) - return false; - else if (q->poll_nsec > 0) + if (q->poll_nsec > 0) nsecs = q->poll_nsec; else nsecs = blk_mq_poll_nsecs(q, hctx, rq); @@ -3330,11 +3327,41 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, return true; } -static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) +static bool blk_mq_poll_hybrid(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, blk_qc_t cookie) { - struct request_queue *q = hctx->queue; + struct request *rq; + + if (q->poll_nsec == -1) + return false; + + if (!blk_qc_t_is_internal(cookie)) + rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); + else { + rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); + /* + * With scheduling, if the request has completed, we'll + * get a NULL return here, as we clear the sched tag when + * that happens. The request still remains valid, like always, + * so we should be safe with just the NULL check. + */ + if (!rq) + return false; + } + + return blk_mq_poll_hybrid_sleep(q, hctx, rq); +} + +static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie) +{ + struct blk_mq_hw_ctx *hctx; long state; + if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return 0; + + hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; + /* * If we sleep, have the caller restart the poll loop to reset * the state. Like for the other success return cases, the @@ -3342,7 +3369,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) * the IO isn't complete, we'll get called again and will go * straight to the busy poll loop. */ - if (blk_mq_poll_hybrid_sleep(q, hctx, rq)) + if (blk_mq_poll_hybrid(q, hctx, cookie)) return 1; hctx->poll_considered++; @@ -3353,7 +3380,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) hctx->poll_invoked++; - ret = q->mq_ops->poll(hctx, rq->tag); + ret = q->mq_ops->poll(hctx, -1U); if (ret > 0) { hctx->poll_success++; __set_current_state(TASK_RUNNING); @@ -3374,32 +3401,6 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) return 0; } -static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie) -{ - struct blk_mq_hw_ctx *hctx; - struct request *rq; - - if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - return 0; - - hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; - if (!blk_qc_t_is_internal(cookie)) - rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); - else { - rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); - /* - * With scheduling, if the request has completed, we'll - * get a NULL return here, as we clear the sched tag when - * that happens. The request still remains valid, like always, - * so we should be safe with just the NULL check. - */ - if (!rq) - return 0; - } - - return __blk_mq_poll(hctx, rq); -} - unsigned int blk_mq_rq_cpu(struct request *rq) { return rq->mq_ctx->cpu; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 57e790391b82..de50d80ecc84 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1012,15 +1012,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } } -static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, - u16 *end, int tag) +static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, + u16 *end, unsigned int tag) { - bool found = false; + int found = 0; *start = nvmeq->cq_head; - while (!found && nvme_cqe_pending(nvmeq)) { - if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) - found = true; + while (nvme_cqe_pending(nvmeq)) { + if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag) + found++; nvme_update_cq_head(nvmeq); } *end = nvmeq->cq_head; @@ -1062,7 +1062,7 @@ static irqreturn_t nvme_irq_check(int irq, void *data) static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { u16 start, end; - bool found; + int found; if (!nvme_cqe_pending(nvmeq)) return 0; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d181cafedc58..c2c3e1a5b7af 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1409,12 +1409,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) WARN_ON_ONCE(ret); } -static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, - struct nvme_completion *cqe, struct ib_wc *wc, int tag) +static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, + struct nvme_completion *cqe, struct ib_wc *wc) { struct request *rq; struct nvme_rdma_request *req; - int ret = 0; rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); if (!rq) { @@ -1422,7 +1421,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, "tag 0x%x on QP %#x not found\n", cqe->command_id, queue->qp->qp_num); nvme_rdma_error_recovery(queue->ctrl); - return ret; + return; } req = blk_mq_rq_to_pdu(rq); @@ -1437,6 +1436,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, nvme_rdma_error_recovery(queue->ctrl); } } else if (req->mr) { + int ret; + ret = nvme_rdma_inv_rkey(queue, req); if (unlikely(ret < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1445,19 +1446,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, nvme_rdma_error_recovery(queue->ctrl); } /* the local invalidation completion will end the request */ - return 0; + return; } - if (refcount_dec_and_test(&req->ref)) { - if (rq->tag == tag) - ret = 1; + if (refcount_dec_and_test(&req->ref)) nvme_end_request(rq, req->status, req->result); - } - - return ret; } -static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) +static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvme_rdma_qe *qe = container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); @@ -1465,11 +1461,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) struct ib_device *ibdev = queue->device->dev; struct nvme_completion *cqe = qe->data; const size_t len = sizeof(struct nvme_completion); - int ret = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { nvme_rdma_wr_error(cq, wc, "RECV"); - return 0; + return; } ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); @@ -1484,16 +1479,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else - ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag); + nvme_rdma_process_nvme_rsp(queue, cqe, wc); ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE); nvme_rdma_post_recv(queue, qe); - return ret; -} - -static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - __nvme_rdma_recv_done(cq, wc, -1); } static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) @@ -1758,10 +1747,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) struct ib_cqe *cqe = wc.wr_cqe; if (cqe) { - if (cqe->done == nvme_rdma_recv_done) - found |= __nvme_rdma_recv_done(cq, &wc, tag); - else + if (cqe->done == nvme_rdma_recv_done) { + nvme_rdma_recv_done(cq, &wc); + found++; + } else { cqe->done(cq, &wc); + } } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9b53db06ad08..f3015e9b5ae3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -bool blk_poll(struct request_queue *q, blk_qc_t cookie); +int blk_poll(struct request_queue *q, blk_qc_t cookie); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { -- cgit v1.2.3 From 9743139c5d11ab170f70a308dcb88c342390adfb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Nov 2018 09:48:21 -0700 Subject: blk-mq: remove 'tag' parameter from mq_ops->poll() We always pass in -1 now and none of the callers use the tag value, remove the parameter. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- drivers/nvme/host/pci.c | 8 ++++---- drivers/nvme/host/rdma.c | 2 +- include/linux/blk-mq.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index ec6c79578332..b66cca3ce1e5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3380,7 +3380,7 @@ static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie) hctx->poll_invoked++; - ret = q->mq_ops->poll(hctx, -1U); + ret = q->mq_ops->poll(hctx); if (ret > 0) { hctx->poll_success++; __set_current_state(TASK_RUNNING); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index de50d80ecc84..73effe586e5f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1075,14 +1075,14 @@ static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) return found; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int nvme_poll(struct blk_mq_hw_ctx *hctx) { struct nvme_queue *nvmeq = hctx->driver_data; - return __nvme_poll(nvmeq, tag); + return __nvme_poll(nvmeq, -1); } -static int nvme_poll_noirq(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int nvme_poll_noirq(struct blk_mq_hw_ctx *hctx) { struct nvme_queue *nvmeq = hctx->driver_data; u16 start, end; @@ -1092,7 +1092,7 @@ static int nvme_poll_noirq(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; spin_lock(&nvmeq->cq_lock); - found = nvme_process_cq(nvmeq, &start, &end, tag); + found = nvme_process_cq(nvmeq, &start, &end, -1); spin_unlock(&nvmeq->cq_lock); nvme_complete_cqes(nvmeq, start, end); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c2c3e1a5b7af..ccfde6c7c0a5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1736,7 +1736,7 @@ err: return BLK_STS_IOERR; } -static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) { struct nvme_rdma_queue *queue = hctx->driver_data; struct ib_cq *cq = queue->ib_cq; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 929e8abc5535..ca0520ca6437 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -132,7 +132,7 @@ typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); -typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef int (poll_fn)(struct blk_mq_hw_ctx *); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); typedef bool (busy_fn)(struct request_queue *); typedef void (complete_fn)(struct request *); -- cgit v1.2.3 From 0a1b8b87d064a47fad9ec475316002da28559207 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Nov 2018 08:24:43 -0700 Subject: block: make blk_poll() take a parameter on whether to spin or not blk_poll() has always kept spinning until it found an IO. This is fine for SYNC polling, since we need to find one request we have pending, but in preparation for ASYNC polling it can be beneficial to just check if we have any entries available or not. Existing callers are converted to pass in 'spin == true', to retain the old behavior. Signed-off-by: Jens Axboe --- block/blk-core.c | 9 ++++++--- block/blk-mq.c | 6 +++--- drivers/nvme/host/multipath.c | 4 ++-- fs/block_dev.c | 4 ++-- fs/direct-io.c | 2 +- fs/iomap.c | 2 +- include/linux/blkdev.h | 4 ++-- mm/page_io.c | 2 +- 8 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 03c4202b69bf..9af56dbb84f1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1277,19 +1277,22 @@ EXPORT_SYMBOL(submit_bio); * blk_poll - poll for IO completions * @q: the queue * @cookie: cookie passed back at IO submission time + * @spin: whether to spin for completions * * Description: * Poll for completions on the passed in queue. Returns number of - * completed entries found. + * completed entries found. If @spin is true, then blk_poll will continue + * looping until at least one completion is found, unless the task is + * otherwise marked running (or we need to reschedule). */ -int blk_poll(struct request_queue *q, blk_qc_t cookie) +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) { if (!q->poll_fn || !blk_qc_t_valid(cookie)) return 0; if (current->plug) blk_flush_plug_list(current->plug, false); - return q->poll_fn(q, cookie); + return q->poll_fn(q, cookie, spin); } EXPORT_SYMBOL_GPL(blk_poll); diff --git a/block/blk-mq.c b/block/blk-mq.c index b66cca3ce1e5..c2751f0a3ccc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -38,7 +38,7 @@ #include "blk-mq-sched.h" #include "blk-rq-qos.h" -static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie); +static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -3352,7 +3352,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, return blk_mq_poll_hybrid_sleep(q, hctx, rq); } -static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie) +static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin) { struct blk_mq_hw_ctx *hctx; long state; @@ -3392,7 +3392,7 @@ static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie) if (current->state == TASK_RUNNING) return 1; - if (ret < 0) + if (ret < 0 || !spin) break; cpu_relax(); } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f9eeb3b58632..ffebdd0ae34b 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -220,7 +220,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, return ret; } -static int nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) +static int nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc, bool spin) { struct nvme_ns_head *head = q->queuedata; struct nvme_ns *ns; @@ -230,7 +230,7 @@ static int nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) srcu_idx = srcu_read_lock(&head->srcu); ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu); if (likely(ns && nvme_path_is_optimized(ns))) - found = ns->queue->poll_fn(q, qc); + found = ns->queue->poll_fn(q, qc, spin); srcu_read_unlock(&head->srcu, srcu_idx); return found; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 64ba27b8b754..d233a59ea364 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -243,7 +243,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, break; if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc)) + !blk_poll(bdev_get_queue(bdev), qc, true)) io_schedule(); } __set_current_state(TASK_RUNNING); @@ -423,7 +423,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) break; if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc)) + !blk_poll(bdev_get_queue(bdev), qc, true)) io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/fs/direct-io.c b/fs/direct-io.c index ea07d5a34317..a5a4e5a1423e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -518,7 +518,7 @@ static struct bio *dio_await_one(struct dio *dio) dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(dio->bio_disk->queue, dio->bio_cookie)) + !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true)) io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); diff --git a/fs/iomap.c b/fs/iomap.c index c5df035ace6f..74c1f37f0fd6 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1896,7 +1896,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!(iocb->ki_flags & IOCB_HIPRI) || !dio->submit.last_queue || !blk_poll(dio->submit.last_queue, - dio->submit.cookie)) + dio->submit.cookie, true)) io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3015e9b5ae3..e3c0a8ec16a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,7 +283,7 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t); +typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t, bool spin); struct bio_vec; typedef int (dma_drain_needed_fn)(struct request *); @@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -int blk_poll(struct request_queue *q, blk_qc_t cookie); +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { diff --git a/mm/page_io.c b/mm/page_io.c index a7271fa481f6..5bdfd21c1bd9 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -410,7 +410,7 @@ int swap_readpage(struct page *page, bool synchronous) if (!READ_ONCE(bio->bi_private)) break; - if (!blk_poll(disk->queue, qc)) + if (!blk_poll(disk->queue, qc, true)) break; } __set_current_state(TASK_RUNNING); -- cgit v1.2.3 From 7847a1455fc4574d53e349d60feb1e1106cdc012 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 9 Nov 2018 17:21:35 +0300 Subject: ACPI / glue: Add acpi_platform_notify() function Instead of relying on the "platform_notify" callback hook, introducing separate notification function acpi_platform_notify() and calling that directly from drivers core when device entries are added and removed. Signed-off-by: Heikki Krogerus Acked-by: Linus Walleij Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 1 - drivers/acpi/glue.c | 21 +++++++++++++-------- drivers/acpi/internal.h | 1 - drivers/base/core.c | 7 +++++++ include/linux/acpi.h | 10 ++++++++++ 5 files changed, 30 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index bb3d96dea6db..99d820a693a8 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1237,7 +1237,6 @@ static int __init acpi_init(void) acpi_kobj = NULL; } - init_acpi_device_notify(); result = acpi_bus_init(); if (result) { disable_acpi(); diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 12ba2bee8789..edd10b3c7ec8 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -296,7 +296,7 @@ int acpi_unbind_one(struct device *dev) } EXPORT_SYMBOL_GPL(acpi_unbind_one); -static int acpi_platform_notify(struct device *dev) +static int acpi_device_notify(struct device *dev) { struct acpi_bus_type *type = acpi_get_bus_type(dev); struct acpi_device *adev; @@ -343,7 +343,7 @@ static int acpi_platform_notify(struct device *dev) return ret; } -static int acpi_platform_notify_remove(struct device *dev) +static int acpi_device_notify_remove(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); struct acpi_bus_type *type; @@ -361,12 +361,17 @@ static int acpi_platform_notify_remove(struct device *dev) return 0; } -void __init init_acpi_device_notify(void) +int acpi_platform_notify(struct device *dev, enum kobject_action action) { - if (platform_notify || platform_notify_remove) { - printk(KERN_ERR PREFIX "Can't use platform_notify\n"); - return; + switch (action) { + case KOBJ_ADD: + acpi_device_notify(dev); + break; + case KOBJ_REMOVE: + acpi_device_notify_remove(dev); + break; + default: + break; } - platform_notify = acpi_platform_notify; - platform_notify_remove = acpi_platform_notify_remove; + return 0; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 530a3f675490..83a7dfb7d1cf 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -23,7 +23,6 @@ int early_acpi_osi_init(void); int acpi_osi_init(void); acpi_status acpi_os_initialize1(void); -void init_acpi_device_notify(void); int acpi_scan_init(void); void acpi_pci_root_init(void); void acpi_pci_link_init(void); diff --git a/drivers/base/core.c b/drivers/base/core.c index 3972ef3f080b..260cbdf44f1d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -8,6 +8,7 @@ * Copyright (c) 2006 Novell, Inc. */ +#include #include #include #include @@ -731,6 +732,12 @@ static inline int device_is_not_partition(struct device *dev) static int device_platform_notify(struct device *dev, enum kobject_action action) { + int ret; + + ret = acpi_platform_notify(dev, action); + if (ret) + return ret; + if (platform_notify && action == KOBJ_ADD) platform_notify(dev); else if (platform_notify_remove && action == KOBJ_REMOVE) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ed80f147bd50..4ba2e2d24676 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1313,4 +1313,14 @@ static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) } #endif +#ifdef CONFIG_ACPI +extern int acpi_platform_notify(struct device *dev, enum kobject_action action); +#else +static inline int +acpi_platform_notify(struct device *dev, enum kobject_action action) +{ + return 0; +} +#endif + #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 59abd83672f70cac4b6bf9b237506c5bc6837606 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 9 Nov 2018 17:21:36 +0300 Subject: drivers: base: Introducing software nodes to the firmware node framework Software node is a new struct fwnode_handle type that can be used to describe devices in kernel (software). It is meant to complement fwnodes representing real firmware nodes when they are incomplete (for example missing device properties) and to supply the primary fwnode when the firmware lacks hardware description for a device completely. The software node type is really meant to replace the currently used "property_set" struct fwnode_handle type. The handling of struct property_set is glued to the generic device property handling code, and it is not possible to create a struct property_set independently from the device that it is bind to. struct property_set is only created when device properties are added to already initialized struct device, and control of it is only possible from the generic property handling code. Software nodes are instead designed to be created independently from the device entries (struct device). It makes them much more flexible, as then the device meant to be bind to the node can be created at a later time, and from another location. It is also possible to bind multiple devices to a single software node if needed. The software node implementation also includes support for node hierarchy, which was the main motivation for this commit. The node hierarchy was something that was requested for the struct property_set, but it did not seem reasonable to try to extend the property_set support for that purpose. struct property_set was really meant only for device property handling like the name suggests. Support for struct property_set is not yet removed in this commit, but it will be in the following one. Signed-off-by: Heikki Krogerus Acked-by: Linus Walleij Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- .../ABI/testing/sysfs-devices-software_node | 10 + drivers/base/Makefile | 2 +- drivers/base/core.c | 4 + drivers/base/swnode.c | 494 +++++++++++++++++++++ include/linux/property.h | 12 + 5 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-devices-software_node create mode 100644 drivers/base/swnode.c (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-devices-software_node b/Documentation/ABI/testing/sysfs-devices-software_node new file mode 100644 index 000000000000..85df37de359f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-software_node @@ -0,0 +1,10 @@ +What: /sys/devices/.../software_node/ +Date: January 2019 +Contact: Heikki Krogerus +Description: + This directory contains the details about the device that are + assigned in kernel (i.e. software), as opposed to the + firmware_node directory which contains the details that are + assigned for the device in firmware. The main attributes in the + directory will show the properties the device has, and the + relationship it has to some of the other devices. diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 704f44295810..157452080f3d 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,7 +6,7 @@ obj-y := component.o core.o bus.o dd.o syscore.o \ cpu.o firmware.o init.o map.o devres.o \ attribute_container.o transport_class.o \ topology.o container.o property.o cacheinfo.o \ - devcon.o + devcon.o swnode.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o obj-y += power/ obj-$(CONFIG_ISA_BUS_API) += isa.o diff --git a/drivers/base/core.c b/drivers/base/core.c index 260cbdf44f1d..a2f14098663f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -738,6 +738,10 @@ device_platform_notify(struct device *dev, enum kobject_action action) if (ret) return ret; + ret = software_node_notify(dev, action); + if (ret) + return ret; + if (platform_notify && action == KOBJ_ADD) platform_notify(dev); else if (platform_notify_remove && action == KOBJ_REMOVE) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c new file mode 100644 index 000000000000..95423b72a3f4 --- /dev/null +++ b/drivers/base/swnode.c @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Software nodes for the firmware node framework. + * + * Copyright (C) 2018, Intel Corporation + * Author: Heikki Krogerus + */ + +#include +#include +#include +#include + +struct software_node { + int id; + struct kobject kobj; + struct fwnode_handle fwnode; + + /* hierarchy */ + struct ida child_ids; + struct list_head entry; + struct list_head children; + struct software_node *parent; + + /* properties */ + const struct property_entry *properties; +}; + +static DEFINE_IDA(swnode_root_ids); +static struct kset *swnode_kset; + +#define kobj_to_swnode(_kobj_) container_of(_kobj_, struct software_node, kobj) + +static const struct fwnode_operations software_node_ops; + +bool is_software_node(const struct fwnode_handle *fwnode) +{ + return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &software_node_ops; +} + +#define to_software_node(__fwnode) \ + ({ \ + typeof(__fwnode) __to_software_node_fwnode = __fwnode; \ + \ + is_software_node(__to_software_node_fwnode) ? \ + container_of(__to_software_node_fwnode, \ + struct software_node, fwnode) : \ + NULL; \ + }) + +/* -------------------------------------------------------------------------- */ +/* property_entry processing */ + +static const struct property_entry * +property_entry_get(const struct property_entry *prop, const char *name) +{ + if (!prop) + return NULL; + + for (; prop->name; prop++) + if (!strcmp(name, prop->name)) + return prop; + + return NULL; +} + +static const void *property_get_pointer(const struct property_entry *prop) +{ + switch (prop->type) { + case DEV_PROP_U8: + if (prop->is_array) + return prop->pointer.u8_data; + return &prop->value.u8_data; + case DEV_PROP_U16: + if (prop->is_array) + return prop->pointer.u16_data; + return &prop->value.u16_data; + case DEV_PROP_U32: + if (prop->is_array) + return prop->pointer.u32_data; + return &prop->value.u32_data; + case DEV_PROP_U64: + if (prop->is_array) + return prop->pointer.u64_data; + return &prop->value.u64_data; + case DEV_PROP_STRING: + if (prop->is_array) + return prop->pointer.str; + return &prop->value.str; + default: + return NULL; + } +} + +static const void *property_entry_find(const struct property_entry *props, + const char *propname, size_t length) +{ + const struct property_entry *prop; + const void *pointer; + + prop = property_entry_get(props, propname); + if (!prop) + return ERR_PTR(-EINVAL); + pointer = property_get_pointer(prop); + if (!pointer) + return ERR_PTR(-ENODATA); + if (length > prop->length) + return ERR_PTR(-EOVERFLOW); + return pointer; +} + +static int property_entry_read_u8_array(const struct property_entry *props, + const char *propname, + u8 *values, size_t nval) +{ + const void *pointer; + size_t length = nval * sizeof(*values); + + pointer = property_entry_find(props, propname, length); + if (IS_ERR(pointer)) + return PTR_ERR(pointer); + + memcpy(values, pointer, length); + return 0; +} + +static int property_entry_read_u16_array(const struct property_entry *props, + const char *propname, + u16 *values, size_t nval) +{ + const void *pointer; + size_t length = nval * sizeof(*values); + + pointer = property_entry_find(props, propname, length); + if (IS_ERR(pointer)) + return PTR_ERR(pointer); + + memcpy(values, pointer, length); + return 0; +} + +static int property_entry_read_u32_array(const struct property_entry *props, + const char *propname, + u32 *values, size_t nval) +{ + const void *pointer; + size_t length = nval * sizeof(*values); + + pointer = property_entry_find(props, propname, length); + if (IS_ERR(pointer)) + return PTR_ERR(pointer); + + memcpy(values, pointer, length); + return 0; +} + +static int property_entry_read_u64_array(const struct property_entry *props, + const char *propname, + u64 *values, size_t nval) +{ + const void *pointer; + size_t length = nval * sizeof(*values); + + pointer = property_entry_find(props, propname, length); + if (IS_ERR(pointer)) + return PTR_ERR(pointer); + + memcpy(values, pointer, length); + return 0; +} + +static int +property_entry_count_elems_of_size(const struct property_entry *props, + const char *propname, size_t length) +{ + const struct property_entry *prop; + + prop = property_entry_get(props, propname); + if (!prop) + return -EINVAL; + + return prop->length / length; +} + +static int property_entry_read_int_array(const struct property_entry *props, + const char *name, + unsigned int elem_size, void *val, + size_t nval) +{ + if (!val) + return property_entry_count_elems_of_size(props, name, + elem_size); + switch (elem_size) { + case sizeof(u8): + return property_entry_read_u8_array(props, name, val, nval); + case sizeof(u16): + return property_entry_read_u16_array(props, name, val, nval); + case sizeof(u32): + return property_entry_read_u32_array(props, name, val, nval); + case sizeof(u64): + return property_entry_read_u64_array(props, name, val, nval); + } + + return -ENXIO; +} + +static int property_entry_read_string_array(const struct property_entry *props, + const char *propname, + const char **strings, size_t nval) +{ + const struct property_entry *prop; + const void *pointer; + size_t array_len, length; + + /* Find out the array length. */ + prop = property_entry_get(props, propname); + if (!prop) + return -EINVAL; + + if (prop->is_array) + /* Find the length of an array. */ + array_len = property_entry_count_elems_of_size(props, propname, + sizeof(const char *)); + else + /* The array length for a non-array string property is 1. */ + array_len = 1; + + /* Return how many there are if strings is NULL. */ + if (!strings) + return array_len; + + array_len = min(nval, array_len); + length = array_len * sizeof(*strings); + + pointer = property_entry_find(props, propname, length); + if (IS_ERR(pointer)) + return PTR_ERR(pointer); + + memcpy(strings, pointer, length); + + return array_len; +} + +/* -------------------------------------------------------------------------- */ +/* fwnode operations */ + +static struct fwnode_handle *software_node_get(struct fwnode_handle *fwnode) +{ + struct software_node *swnode = to_software_node(fwnode); + + kobject_get(&swnode->kobj); + + return &swnode->fwnode; +} + +static void software_node_put(struct fwnode_handle *fwnode) +{ + struct software_node *swnode = to_software_node(fwnode); + + kobject_put(&swnode->kobj); +} + +static bool software_node_property_present(const struct fwnode_handle *fwnode, + const char *propname) +{ + return !!property_entry_get(to_software_node(fwnode)->properties, + propname); +} + +static int software_node_read_int_array(const struct fwnode_handle *fwnode, + const char *propname, + unsigned int elem_size, void *val, + size_t nval) +{ + struct software_node *swnode = to_software_node(fwnode); + + return property_entry_read_int_array(swnode->properties, propname, + elem_size, val, nval); +} + +static int software_node_read_string_array(const struct fwnode_handle *fwnode, + const char *propname, + const char **val, size_t nval) +{ + struct software_node *swnode = to_software_node(fwnode); + + return property_entry_read_string_array(swnode->properties, propname, + val, nval); +} + +struct fwnode_handle * +software_node_get_parent(const struct fwnode_handle *fwnode) +{ + struct software_node *swnode = to_software_node(fwnode); + + return swnode->parent ? &swnode->parent->fwnode : NULL; +} + +struct fwnode_handle * +software_node_get_next_child(const struct fwnode_handle *fwnode, + struct fwnode_handle *child) +{ + struct software_node *p = to_software_node(fwnode); + struct software_node *c = to_software_node(child); + + if (list_empty(&p->children) || + (c && list_is_last(&c->entry, &p->children))) + return NULL; + + if (c) + c = list_next_entry(c, entry); + else + c = list_first_entry(&p->children, struct software_node, entry); + return &c->fwnode; +} + + +static const struct fwnode_operations software_node_ops = { + .get = software_node_get, + .put = software_node_put, + .property_present = software_node_property_present, + .property_read_int_array = software_node_read_int_array, + .property_read_string_array = software_node_read_string_array, + .get_parent = software_node_get_parent, + .get_next_child_node = software_node_get_next_child, +}; + +/* -------------------------------------------------------------------------- */ + +static int +software_node_register_properties(struct software_node *swnode, + const struct property_entry *properties) +{ + struct property_entry *props; + + props = property_entries_dup(properties); + if (IS_ERR(props)) + return PTR_ERR(props); + + swnode->properties = props; + + return 0; +} + +static void software_node_release(struct kobject *kobj) +{ + struct software_node *swnode = kobj_to_swnode(kobj); + + if (swnode->parent) { + ida_simple_remove(&swnode->parent->child_ids, swnode->id); + list_del(&swnode->entry); + } else { + ida_simple_remove(&swnode_root_ids, swnode->id); + } + + ida_destroy(&swnode->child_ids); + property_entries_free(swnode->properties); + kfree(swnode); +} + +static struct kobj_type software_node_type = { + .release = software_node_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +struct fwnode_handle * +fwnode_create_software_node(const struct property_entry *properties, + const struct fwnode_handle *parent) +{ + struct software_node *p = NULL; + struct software_node *swnode; + char node_name[20]; + int ret; + + if (parent) { + if (IS_ERR(parent)) + return ERR_CAST(parent); + if (!is_software_node(parent)) + return ERR_PTR(-EINVAL); + p = to_software_node(parent); + } + + swnode = kzalloc(sizeof(*swnode), GFP_KERNEL); + if (!swnode) + return ERR_PTR(-ENOMEM); + + ret = ida_simple_get(p ? &p->child_ids : &swnode_root_ids, 0, 0, + GFP_KERNEL); + if (ret < 0) { + kfree(swnode); + return ERR_PTR(ret); + } + + swnode->id = ret; + sprintf(node_name, "node%d", swnode->id); + + swnode->kobj.kset = swnode_kset; + swnode->fwnode.ops = &software_node_ops; + + ida_init(&swnode->child_ids); + INIT_LIST_HEAD(&swnode->entry); + INIT_LIST_HEAD(&swnode->children); + swnode->parent = p; + + if (p) + list_add_tail(&swnode->entry, &p->children); + + ret = kobject_init_and_add(&swnode->kobj, &software_node_type, + p ? &p->kobj : NULL, node_name); + if (ret) { + kobject_put(&swnode->kobj); + return ERR_PTR(ret); + } + + ret = software_node_register_properties(swnode, properties); + if (ret) { + kobject_put(&swnode->kobj); + return ERR_PTR(ret); + } + + kobject_uevent(&swnode->kobj, KOBJ_ADD); + return &swnode->fwnode; +} +EXPORT_SYMBOL_GPL(fwnode_create_software_node); + +void fwnode_remove_software_node(struct fwnode_handle *fwnode) +{ + struct software_node *swnode = to_software_node(fwnode); + + if (!swnode) + return; + + kobject_put(&swnode->kobj); +} +EXPORT_SYMBOL_GPL(fwnode_remove_software_node); + +int software_node_notify(struct device *dev, unsigned long action) +{ + struct fwnode_handle *fwnode = dev_fwnode(dev); + struct software_node *swnode; + int ret; + + if (!fwnode) + return 0; + + if (!is_software_node(fwnode)) + fwnode = fwnode->secondary; + if (!is_software_node(fwnode)) + return 0; + + swnode = to_software_node(fwnode); + + switch (action) { + case KOBJ_ADD: + ret = sysfs_create_link(&dev->kobj, &swnode->kobj, + "software_node"); + if (ret) + break; + + ret = sysfs_create_link(&swnode->kobj, &dev->kobj, + dev_name(dev)); + if (ret) { + sysfs_remove_link(&dev->kobj, "software_node"); + break; + } + kobject_get(&swnode->kobj); + break; + case KOBJ_REMOVE: + sysfs_remove_link(&swnode->kobj, dev_name(dev)); + sysfs_remove_link(&dev->kobj, "software_node"); + kobject_put(&swnode->kobj); + break; + default: + break; + } + + return 0; +} + +static int __init software_node_init(void) +{ + swnode_kset = kset_create_and_add("software_nodes", NULL, kernel_kobj); + if (!swnode_kset) + return -ENOMEM; + return 0; +} +postcore_initcall(software_node_init); + +static void __exit software_node_exit(void) +{ + ida_destroy(&swnode_root_ids); + kset_unregister(swnode_kset); +} +__exitcall(software_node_exit); diff --git a/include/linux/property.h b/include/linux/property.h index ac8a1ebc4c1b..3789ec755fb6 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -311,4 +311,16 @@ fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port, int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); +/* -------------------------------------------------------------------------- */ +/* Software fwnode support - when HW description is incomplete or missing */ + +bool is_software_node(const struct fwnode_handle *fwnode); + +int software_node_notify(struct device *dev, unsigned long action); + +struct fwnode_handle * +fwnode_create_software_node(const struct property_entry *properties, + const struct fwnode_handle *parent); +void fwnode_remove_software_node(struct fwnode_handle *fwnode); + #endif /* _LINUX_PROPERTY_H_ */ -- cgit v1.2.3 From f8c6d1402b89f22a3647705d63cbd171aa19a77e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 23 Nov 2018 23:07:14 +0300 Subject: ACPI: fix acpi_find_child_device() invocation in acpi_preset_companion() acpi_find_child_device() accepts boolean not pointer as last argument. Signed-off-by: Alexey Dobriyan [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ed80f147bd50..f788cdbbd1b0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -101,7 +101,7 @@ static inline bool has_acpi_companion(struct device *dev) static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { - ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, NULL)); + ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false)); } static inline const char *acpi_dev_name(struct acpi_device *adev) -- cgit v1.2.3 From 16c15eb16a793f2d81ae52f41f43fb6831b34212 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Nov 2018 09:54:28 -0700 Subject: blk-mq: Return true if request was completed A driver may have internal state to cleanup if we're pretending a request didn't complete. Return 'false' if the command wasn't actually completed due to the timeout error injection, and true otherwise. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++-- include/linux/blk-mq.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 37674c1766a7..7c8cfa0cd420 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -638,11 +638,12 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) * Ends all I/O on a request. It does not handle partial completions. * The actual completion happens out-of-order, through a IPI handler. **/ -void blk_mq_complete_request(struct request *rq) +bool blk_mq_complete_request(struct request *rq) { if (unlikely(blk_should_fake_timeout(rq->q))) - return; + return false; __blk_mq_complete_request(rq); + return true; } EXPORT_SYMBOL(blk_mq_complete_request); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ca0520ca6437..6e3da356a8eb 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -298,7 +298,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_complete_request(struct request *rq); +bool blk_mq_complete_request(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3 From af78ff7c6e66832afcdf5418f67b11c409f9e7a1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Nov 2018 09:54:30 -0700 Subject: blk-mq: Simplify request completion state There are no more users relying on blk-mq request states to prevent double completions, so replace the relatively expensive cmpxchg operation with WRITE_ONCE. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +--- include/linux/blk-mq.h | 14 -------------- 2 files changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 7c8cfa0cd420..cda698804422 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -568,9 +568,7 @@ static void __blk_mq_complete_request(struct request *rq) bool shared = false; int cpu; - if (!blk_mq_mark_complete(rq)) - return; - + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); /* * Most of single queue controllers, there is only one irq vector * for handling IO completion, and the only irq's affinity is set diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 6e3da356a8eb..b8de11e0603b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -329,20 +329,6 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); -/** - * blk_mq_mark_complete() - Set request state to complete - * @rq: request to set to complete state - * - * Returns true if request state was successfully set to complete. If - * successful, the caller is responsibile for seeing this request is ended, as - * blk_mq_complete_request will not work again. - */ -static inline bool blk_mq_mark_complete(struct request *rq) -{ - return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) == - MQ_RQ_IN_FLIGHT; -} - /* * Driver command data is immediately after the request. So subtract request * size to get back to the original request, add request size to get the PDU. -- cgit v1.2.3 From 5f0ed774ed2914decfd397569fface997532e94d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Nov 2018 22:04:33 -0700 Subject: block: sum requests in the plug structure This isn't exactly the same as the previous count, as it includes requests for all devices. But that really doesn't matter, if we have more than the threshold (16) queued up, flush it. It's not worth it to have an expensive list loop for this. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 30 ++++-------------------------- block/blk-mq.c | 16 +++++----------- block/blk.h | 2 -- include/linux/blkdev.h | 1 + 4 files changed, 10 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 9af56dbb84f1..be9233400314 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -736,7 +736,6 @@ no_merge: * Caller must ensure !blk_queue_nomerges(q) beforehand. */ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count, struct request **same_queue_rq) { struct blk_plug *plug; @@ -746,22 +745,19 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, plug = current->plug; if (!plug) return false; - *request_count = 0; plug_list = &plug->mq_list; list_for_each_entry_reverse(rq, plug_list, queuelist) { bool merged = false; - if (rq->q == q) { - (*request_count)++; + if (rq->q == q && same_queue_rq) { /* * Only blk-mq multiple hardware queues case checks the * rq in the same queue, there should be only one such * rq in a queue **/ - if (same_queue_rq) - *same_queue_rq = rq; + *same_queue_rq = rq; } if (rq->q != q || !blk_rq_merge_ok(rq, bio)) @@ -788,26 +784,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, return false; } -unsigned int blk_plug_queued_count(struct request_queue *q) -{ - struct blk_plug *plug; - struct request *rq; - struct list_head *plug_list; - unsigned int ret = 0; - - plug = current->plug; - if (!plug) - goto out; - - plug_list = &plug->mq_list; - list_for_each_entry(rq, plug_list, queuelist) { - if (rq->q == q) - ret++; - } -out: - return ret; -} - void blk_init_request_from_bio(struct request *req, struct bio *bio) { if (bio->bi_opf & REQ_RAHEAD) @@ -1803,6 +1779,8 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); + plug->rq_count = 0; + /* * Store ordering should not be needed here, since a potential * preempt will imply a full memory barrier diff --git a/block/blk-mq.c b/block/blk-mq.c index cda698804422..7b7dff85cf6c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1675,6 +1675,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) unsigned int depth; list_splice_init(&plug->mq_list, &list); + plug->rq_count = 0; list_sort(NULL, &list, plug_rq_cmp); @@ -1871,7 +1872,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) const int is_flush_fua = op_is_flush(bio->bi_opf); struct blk_mq_alloc_data data = { .flags = 0, .cmd_flags = bio->bi_opf }; struct request *rq; - unsigned int request_count = 0; struct blk_plug *plug; struct request *same_queue_rq = NULL; blk_qc_t cookie; @@ -1884,7 +1884,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; if (!is_flush_fua && !blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) + blk_attempt_plug_merge(q, bio, &same_queue_rq)) return BLK_QC_T_NONE; if (blk_mq_sched_bio_merge(q, bio)) @@ -1915,20 +1915,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); } else if (plug && q->nr_hw_queues == 1) { + unsigned int request_count = plug->rq_count; struct request *last = NULL; blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - /* - * @request_count may become stale because of schedule - * out, so check the list again. - */ - if (list_empty(&plug->mq_list)) - request_count = 0; - else if (blk_queue_nomerges(q)) - request_count = blk_plug_queued_count(q); - if (!request_count) trace_block_plug(q); else @@ -1941,6 +1933,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } list_add_tail(&rq->queuelist, &plug->mq_list); + plug->rq_count++; } else if (plug && !blk_queue_nomerges(q)) { blk_mq_bio_to_request(rq, bio); @@ -1956,6 +1949,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (same_queue_rq) list_del_init(&same_queue_rq->queuelist); list_add_tail(&rq->queuelist, &plug->mq_list); + plug->rq_count++; blk_mq_put_ctx(data.ctx); diff --git a/block/blk.h b/block/blk.h index 610948157a5b..848278c52030 100644 --- a/block/blk.h +++ b/block/blk.h @@ -161,9 +161,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req, bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count, struct request **same_queue_rq); -unsigned int blk_plug_queued_count(struct request_queue *q); void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e3c0a8ec16a7..02732cae6080 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1130,6 +1130,7 @@ extern void blk_set_queue_dying(struct request_queue *); struct blk_plug { struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ + unsigned short rq_count; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) -- cgit v1.2.3 From 7ca5ce896524f5292e610b27d168269e5ab74951 Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Tue, 13 Nov 2018 12:14:01 -0600 Subject: firmware: add Intel Stratix10 service layer driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some features of the Intel Stratix10 SoC require a level of privilege higher than the kernel is granted. Such secure features include FPGA programming. In terms of the ARMv8 architecture, the kernel runs at Exception Level 1 (EL1), access to the features requires Exception Level 3 (EL3). The Intel Stratix10 SoC service layer provides an in kernel API for drivers to request access to the secure features. The requests are queued and processed one by one. ARM’s SMCCC is used to pass the execution of the requests on to a secure monitor (EL3). The header file stratix10-sve-client.h defines the interface between service providers (FPGA manager is one of them) and service layer. The header file stratix10-smc.h defines the secure monitor call (SMC) message protocols used for service layer driver in normal world (EL1) to communicate with secure monitor SW in secure monitor exception level 3 (EL3). Signed-off-by: Richard Gong Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/Kconfig | 12 + drivers/firmware/Makefile | 1 + drivers/firmware/stratix10-svc.c | 1013 ++++++++++++++++++++ include/linux/firmware/intel/stratix10-smc.h | 265 +++++ .../linux/firmware/intel/stratix10-svc-client.h | 201 ++++ 5 files changed, 1492 insertions(+) create mode 100644 drivers/firmware/stratix10-svc.c create mode 100644 include/linux/firmware/intel/stratix10-smc.h create mode 100644 include/linux/firmware/intel/stratix10-svc-client.h (limited to 'include/linux') diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 7273e5082b41..f754578414f0 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -216,6 +216,18 @@ config FW_CFG_SYSFS_CMDLINE WARNING: Using incorrect parameters (base address in particular) may crash your system. +config INTEL_STRATIX10_SERVICE + tristate "Intel Stratix10 Service Layer" + depends on HAVE_ARM_SMCCC + default n + help + Intel Stratix10 service layer runs at privileged exception level, + interfaces with the service providers (FPGA manager is one of them) + and manages secure monitor call to communicate with secure monitor + software at secure monitor exception level. + + Say Y here if you want Stratix10 service layer support. + config QCOM_SCM bool depends on ARM || ARM64 diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 3158dffd9914..80feb635120f 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o obj-$(CONFIG_EDD) += edd.o obj-$(CONFIG_EFI_PCDP) += pcdp.o obj-$(CONFIG_DMIID) += dmi-id.o +obj-$(CONFIG_INTEL_STRATIX10_SERVICE) += stratix10-svc.o obj-$(CONFIG_ISCSI_IBFT_FIND) += iscsi_ibft_find.o obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c new file mode 100644 index 000000000000..168f52314963 --- /dev/null +++ b/drivers/firmware/stratix10-svc.c @@ -0,0 +1,1013 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017-2018, Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * SVC_NUM_DATA_IN_FIFO - number of struct stratix10_svc_data in the FIFO + * + * SVC_NUM_CHANNEL - number of channel supported by service layer driver + * + * FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS - claim back the submitted buffer(s) + * from the secure world for FPGA manager to reuse, or to free the buffer(s) + * when all bit-stream data had be send. + * + * FPGA_CONFIG_STATUS_TIMEOUT_SEC - poll the FPGA configuration status, + * service layer will return error to FPGA manager when timeout occurs, + * timeout is set to 30 seconds (30 * 1000) at Intel Stratix10 SoC. + */ +#define SVC_NUM_DATA_IN_FIFO 32 +#define SVC_NUM_CHANNEL 1 +#define FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS 200 +#define FPGA_CONFIG_STATUS_TIMEOUT_SEC 30 + +typedef void (svc_invoke_fn)(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, + struct arm_smccc_res *); +struct stratix10_svc_chan; + +/** + * struct stratix10_svc_sh_memory - service shared memory structure + * @sync_complete: state for a completion + * @addr: physical address of shared memory block + * @size: size of shared memory block + * @invoke_fn: function to issue secure monitor or hypervisor call + * + * This struct is used to save physical address and size of shared memory + * block. The shared memory blocked is allocated by secure monitor software + * at secure world. + * + * Service layer driver uses the physical address and size to create a memory + * pool, then allocates data buffer from that memory pool for service client. + */ +struct stratix10_svc_sh_memory { + struct completion sync_complete; + unsigned long addr; + unsigned long size; + svc_invoke_fn *invoke_fn; +}; + +/** + * struct stratix10_svc_data_mem - service memory structure + * @vaddr: virtual address + * @paddr: physical address + * @size: size of memory + * @node: link list head node + * + * This struct is used in a list that keeps track of buffers which have + * been allocated or freed from the memory pool. Service layer driver also + * uses this struct to transfer physical address to virtual address. + */ +struct stratix10_svc_data_mem { + void *vaddr; + phys_addr_t paddr; + size_t size; + struct list_head node; +}; + +/** + * struct stratix10_svc_data - service data structure + * @chan: service channel + * @paddr: playload physical address + * @size: playload size + * @command: service command requested by client + * @flag: configuration type (full or partial) + * @arg: args to be passed via registers and not physically mapped buffers + * + * This struct is used in service FIFO for inter-process communication. + */ +struct stratix10_svc_data { + struct stratix10_svc_chan *chan; + phys_addr_t paddr; + size_t size; + u32 command; + u32 flag; + u64 arg[3]; +}; + +/** + * struct stratix10_svc_controller - service controller + * @dev: device + * @chans: array of service channels + * @num_chans: number of channels in 'chans' array + * @num_active_client: number of active service client + * @node: list management + * @genpool: memory pool pointing to the memory region + * @task: pointer to the thread task which handles SMC or HVC call + * @svc_fifo: a queue for storing service message data + * @complete_status: state for completion + * @svc_fifo_lock: protect access to service message data queue + * @invoke_fn: function to issue secure monitor call or hypervisor call + * + * This struct is used to create communication channels for service clients, to + * handle secure monitor or hypervisor call. + */ +struct stratix10_svc_controller { + struct device *dev; + struct stratix10_svc_chan *chans; + int num_chans; + int num_active_client; + struct list_head node; + struct gen_pool *genpool; + struct task_struct *task; + struct kfifo svc_fifo; + struct completion complete_status; + spinlock_t svc_fifo_lock; + svc_invoke_fn *invoke_fn; +}; + +/** + * struct stratix10_svc_chan - service communication channel + * @ctrl: pointer to service controller which is the provider of this channel + * @scl: pointer to service client which owns the channel + * @name: service client name associated with the channel + * @lock: protect access to the channel + * + * This struct is used by service client to communicate with service layer, each + * service client has its own channel created by service controller. + */ +struct stratix10_svc_chan { + struct stratix10_svc_controller *ctrl; + struct stratix10_svc_client *scl; + char *name; + spinlock_t lock; +}; + +static LIST_HEAD(svc_ctrl); +static LIST_HEAD(svc_data_mem); + +/** + * svc_pa_to_va() - translate physical address to virtual address + * @addr: to be translated physical address + * + * Return: valid virtual address or NULL if the provided physical + * address doesn't exist. + */ +static void *svc_pa_to_va(unsigned long addr) +{ + struct stratix10_svc_data_mem *pmem; + + pr_debug("claim back P-addr=0x%016x\n", (unsigned int)addr); + list_for_each_entry(pmem, &svc_data_mem, node) + if (pmem->paddr == addr) + return pmem->vaddr; + + /* physical address is not found */ + return NULL; +} + +/** + * svc_thread_cmd_data_claim() - claim back buffer from the secure world + * @ctrl: pointer to service layer controller + * @p_data: pointer to service data structure + * @cb_data: pointer to callback data structure to service client + * + * Claim back the submitted buffers from the secure world and pass buffer + * back to service client (FPGA manager, etc) for reuse. + */ +static void svc_thread_cmd_data_claim(struct stratix10_svc_controller *ctrl, + struct stratix10_svc_data *p_data, + struct stratix10_svc_cb_data *cb_data) +{ + struct arm_smccc_res res; + unsigned long timeout; + + reinit_completion(&ctrl->complete_status); + timeout = msecs_to_jiffies(FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS); + + pr_debug("%s: claim back the submitted buffer\n", __func__); + do { + ctrl->invoke_fn(INTEL_SIP_SMC_FPGA_CONFIG_COMPLETED_WRITE, + 0, 0, 0, 0, 0, 0, 0, &res); + + if (res.a0 == INTEL_SIP_SMC_STATUS_OK) { + if (!res.a1) { + complete(&ctrl->complete_status); + break; + } + cb_data->status = BIT(SVC_STATUS_RECONFIG_BUFFER_DONE); + cb_data->kaddr1 = svc_pa_to_va(res.a1); + cb_data->kaddr2 = (res.a2) ? + svc_pa_to_va(res.a2) : NULL; + cb_data->kaddr3 = (res.a3) ? + svc_pa_to_va(res.a3) : NULL; + p_data->chan->scl->receive_cb(p_data->chan->scl, + cb_data); + } else { + pr_debug("%s: secure world busy, polling again\n", + __func__); + } + } while (res.a0 == INTEL_SIP_SMC_STATUS_OK || + res.a0 == INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY || + wait_for_completion_timeout(&ctrl->complete_status, timeout)); +} + +/** + * svc_thread_cmd_config_status() - check configuration status + * @ctrl: pointer to service layer controller + * @p_data: pointer to service data structure + * @cb_data: pointer to callback data structure to service client + * + * Check whether the secure firmware at secure world has finished the FPGA + * configuration, and then inform FPGA manager the configuration status. + */ +static void svc_thread_cmd_config_status(struct stratix10_svc_controller *ctrl, + struct stratix10_svc_data *p_data, + struct stratix10_svc_cb_data *cb_data) +{ + struct arm_smccc_res res; + int count_in_sec; + + cb_data->kaddr1 = NULL; + cb_data->kaddr2 = NULL; + cb_data->kaddr3 = NULL; + cb_data->status = BIT(SVC_STATUS_RECONFIG_ERROR); + + pr_debug("%s: polling config status\n", __func__); + + count_in_sec = FPGA_CONFIG_STATUS_TIMEOUT_SEC; + while (count_in_sec) { + ctrl->invoke_fn(INTEL_SIP_SMC_FPGA_CONFIG_ISDONE, + 0, 0, 0, 0, 0, 0, 0, &res); + if ((res.a0 == INTEL_SIP_SMC_STATUS_OK) || + (res.a0 == INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR)) + break; + + /* + * configuration is still in progress, wait one second then + * poll again + */ + msleep(1000); + count_in_sec--; + }; + + if (res.a0 == INTEL_SIP_SMC_STATUS_OK && count_in_sec) + cb_data->status = BIT(SVC_STATUS_RECONFIG_COMPLETED); + + p_data->chan->scl->receive_cb(p_data->chan->scl, cb_data); +} + +/** + * svc_thread_recv_status_ok() - handle the successful status + * @p_data: pointer to service data structure + * @cb_data: pointer to callback data structure to service client + * @res: result from SMC or HVC call + * + * Send back the correspond status to the service client (FPGA manager etc). + */ +static void svc_thread_recv_status_ok(struct stratix10_svc_data *p_data, + struct stratix10_svc_cb_data *cb_data, + struct arm_smccc_res res) +{ + cb_data->kaddr1 = NULL; + cb_data->kaddr2 = NULL; + cb_data->kaddr3 = NULL; + + switch (p_data->command) { + case COMMAND_RECONFIG: + cb_data->status = BIT(SVC_STATUS_RECONFIG_REQUEST_OK); + break; + case COMMAND_RECONFIG_DATA_SUBMIT: + cb_data->status = BIT(SVC_STATUS_RECONFIG_BUFFER_SUBMITTED); + break; + case COMMAND_NOOP: + cb_data->status = BIT(SVC_STATUS_RECONFIG_BUFFER_SUBMITTED); + cb_data->kaddr1 = svc_pa_to_va(res.a1); + break; + case COMMAND_RECONFIG_STATUS: + cb_data->status = BIT(SVC_STATUS_RECONFIG_COMPLETED); + break; + default: + pr_warn("it shouldn't happen\n"); + break; + } + + pr_debug("%s: call receive_cb\n", __func__); + p_data->chan->scl->receive_cb(p_data->chan->scl, cb_data); +} + +/** + * svc_normal_to_secure_thread() - the function to run in the kthread + * @data: data pointer for kthread function + * + * Service layer driver creates stratix10_svc_smc_hvc_call kthread on CPU + * node 0, its function stratix10_svc_secure_call_thread is used to handle + * SMC or HVC calls between kernel driver and secure monitor software. + * + * Return: 0 for success or -ENOMEM on error. + */ +static int svc_normal_to_secure_thread(void *data) +{ + struct stratix10_svc_controller + *ctrl = (struct stratix10_svc_controller *)data; + struct stratix10_svc_data *pdata; + struct stratix10_svc_cb_data *cbdata; + struct arm_smccc_res res; + unsigned long a0, a1, a2; + int ret_fifo = 0; + + pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + cbdata = kmalloc(sizeof(*cbdata), GFP_KERNEL); + if (!cbdata) { + kfree(pdata); + return -ENOMEM; + } + + /* default set, to remove build warning */ + a0 = INTEL_SIP_SMC_FPGA_CONFIG_LOOPBACK; + a1 = 0; + a2 = 0; + + pr_debug("smc_hvc_shm_thread is running\n"); + + while (!kthread_should_stop()) { + ret_fifo = kfifo_out_spinlocked(&ctrl->svc_fifo, + pdata, sizeof(*pdata), + &ctrl->svc_fifo_lock); + + if (!ret_fifo) + continue; + + pr_debug("get from FIFO pa=0x%016x, command=%u, size=%u\n", + (unsigned int)pdata->paddr, pdata->command, + (unsigned int)pdata->size); + + switch (pdata->command) { + case COMMAND_RECONFIG_DATA_CLAIM: + svc_thread_cmd_data_claim(ctrl, pdata, cbdata); + continue; + case COMMAND_RECONFIG: + a0 = INTEL_SIP_SMC_FPGA_CONFIG_START; + pr_debug("conf_type=%u\n", (unsigned int)pdata->flag); + a1 = pdata->flag; + a2 = 0; + break; + case COMMAND_RECONFIG_DATA_SUBMIT: + a0 = INTEL_SIP_SMC_FPGA_CONFIG_WRITE; + a1 = (unsigned long)pdata->paddr; + a2 = (unsigned long)pdata->size; + break; + case COMMAND_RECONFIG_STATUS: + a0 = INTEL_SIP_SMC_FPGA_CONFIG_ISDONE; + a1 = 0; + a2 = 0; + break; + default: + pr_warn("it shouldn't happen\n"); + break; + } + pr_debug("%s: before SMC call -- a0=0x%016x a1=0x%016x", + __func__, (unsigned int)a0, (unsigned int)a1); + pr_debug(" a2=0x%016x\n", (unsigned int)a2); + + ctrl->invoke_fn(a0, a1, a2, 0, 0, 0, 0, 0, &res); + + pr_debug("%s: after SMC call -- res.a0=0x%016x", + __func__, (unsigned int)res.a0); + pr_debug(" res.a1=0x%016x, res.a2=0x%016x", + (unsigned int)res.a1, (unsigned int)res.a2); + pr_debug(" res.a3=0x%016x\n", (unsigned int)res.a3); + + switch (res.a0) { + case INTEL_SIP_SMC_STATUS_OK: + svc_thread_recv_status_ok(pdata, cbdata, res); + break; + case INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY: + switch (pdata->command) { + case COMMAND_RECONFIG_DATA_SUBMIT: + svc_thread_cmd_data_claim(ctrl, + pdata, cbdata); + break; + case COMMAND_RECONFIG_STATUS: + svc_thread_cmd_config_status(ctrl, + pdata, cbdata); + break; + default: + pr_warn("it shouldn't happen\n"); + break; + } + break; + case INTEL_SIP_SMC_FPGA_CONFIG_STATUS_REJECTED: + pr_debug("%s: STATUS_REJECTED\n", __func__); + break; + case INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR: + pr_err("%s: STATUS_ERROR\n", __func__); + cbdata->status = BIT(SVC_STATUS_RECONFIG_ERROR); + cbdata->kaddr1 = NULL; + cbdata->kaddr2 = NULL; + cbdata->kaddr3 = NULL; + pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata); + break; + default: + pr_warn("it shouldn't happen\n"); + break; + } + }; + + kfree(cbdata); + kfree(pdata); + + return 0; +} + +/** + * svc_normal_to_secure_shm_thread() - the function to run in the kthread + * @data: data pointer for kthread function + * + * Service layer driver creates stratix10_svc_smc_hvc_shm kthread on CPU + * node 0, its function stratix10_svc_secure_shm_thread is used to query the + * physical address of memory block reserved by secure monitor software at + * secure world. + * + * svc_normal_to_secure_shm_thread() calls do_exit() directly since it is a + * standlone thread for which no one will call kthread_stop() or return when + * 'kthread_should_stop()' is true. + */ +static int svc_normal_to_secure_shm_thread(void *data) +{ + struct stratix10_svc_sh_memory + *sh_mem = (struct stratix10_svc_sh_memory *)data; + struct arm_smccc_res res; + + /* SMC or HVC call to get shared memory info from secure world */ + sh_mem->invoke_fn(INTEL_SIP_SMC_FPGA_CONFIG_GET_MEM, + 0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 == INTEL_SIP_SMC_STATUS_OK) { + sh_mem->addr = res.a1; + sh_mem->size = res.a2; + } else { + pr_err("%s: after SMC call -- res.a0=0x%016x", __func__, + (unsigned int)res.a0); + sh_mem->addr = 0; + sh_mem->size = 0; + } + + complete(&sh_mem->sync_complete); + do_exit(0); +} + +/** + * svc_get_sh_memory() - get memory block reserved by secure monitor SW + * @pdev: pointer to service layer device + * @sh_memory: pointer to service shared memory structure + * + * Return: zero for successfully getting the physical address of memory block + * reserved by secure monitor software, or negative value on error. + */ +static int svc_get_sh_memory(struct platform_device *pdev, + struct stratix10_svc_sh_memory *sh_memory) +{ + struct device *dev = &pdev->dev; + struct task_struct *sh_memory_task; + unsigned int cpu = 0; + + init_completion(&sh_memory->sync_complete); + + /* smc or hvc call happens on cpu 0 bound kthread */ + sh_memory_task = kthread_create_on_node(svc_normal_to_secure_shm_thread, + (void *)sh_memory, + cpu_to_node(cpu), + "svc_smc_hvc_shm_thread"); + if (IS_ERR(sh_memory_task)) { + dev_err(dev, "fail to create stratix10_svc_smc_shm_thread\n"); + return -EINVAL; + } + + wake_up_process(sh_memory_task); + + if (!wait_for_completion_timeout(&sh_memory->sync_complete, 10 * HZ)) { + dev_err(dev, + "timeout to get sh-memory paras from secure world\n"); + return -ETIMEDOUT; + } + + if (!sh_memory->addr || !sh_memory->size) { + dev_err(dev, + "fails to get shared memory info from secure world\n"); + return -ENOMEM; + } + + dev_dbg(dev, "SM software provides paddr: 0x%016x, size: 0x%08x\n", + (unsigned int)sh_memory->addr, + (unsigned int)sh_memory->size); + + return 0; +} + +/** + * svc_create_memory_pool() - create a memory pool from reserved memory block + * @pdev: pointer to service layer device + * @sh_memory: pointer to service shared memory structure + * + * Return: pool allocated from reserved memory block or ERR_PTR() on error. + */ +static struct gen_pool * +svc_create_memory_pool(struct platform_device *pdev, + struct stratix10_svc_sh_memory *sh_memory) +{ + struct device *dev = &pdev->dev; + struct gen_pool *genpool; + unsigned long vaddr; + phys_addr_t paddr; + size_t size; + phys_addr_t begin; + phys_addr_t end; + void *va; + size_t page_mask = PAGE_SIZE - 1; + int min_alloc_order = 3; + int ret; + + begin = roundup(sh_memory->addr, PAGE_SIZE); + end = rounddown(sh_memory->addr + sh_memory->size, PAGE_SIZE); + paddr = begin; + size = end - begin; + va = memremap(paddr, size, MEMREMAP_WC); + if (!va) { + dev_err(dev, "fail to remap shared memory\n"); + return ERR_PTR(-EINVAL); + } + vaddr = (unsigned long)va; + dev_dbg(dev, + "reserved memory vaddr: %p, paddr: 0x%16x size: 0x%8x\n", + va, (unsigned int)paddr, (unsigned int)size); + if ((vaddr & page_mask) || (paddr & page_mask) || + (size & page_mask)) { + dev_err(dev, "page is not aligned\n"); + return ERR_PTR(-EINVAL); + } + genpool = gen_pool_create(min_alloc_order, -1); + if (!genpool) { + dev_err(dev, "fail to create genpool\n"); + return ERR_PTR(-ENOMEM); + } + gen_pool_set_algo(genpool, gen_pool_best_fit, NULL); + ret = gen_pool_add_virt(genpool, vaddr, paddr, size, -1); + if (ret) { + dev_err(dev, "fail to add memory chunk to the pool\n"); + gen_pool_destroy(genpool); + return ERR_PTR(ret); + } + + return genpool; +} + +/** + * svc_smccc_smc() - secure monitor call between normal and secure world + * @a0: argument passed in registers 0 + * @a1: argument passed in registers 1 + * @a2: argument passed in registers 2 + * @a3: argument passed in registers 3 + * @a4: argument passed in registers 4 + * @a5: argument passed in registers 5 + * @a6: argument passed in registers 6 + * @a7: argument passed in registers 7 + * @res: result values from register 0 to 3 + */ +static void svc_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + unsigned long a6, unsigned long a7, + struct arm_smccc_res *res) +{ + arm_smccc_smc(a0, a1, a2, a3, a4, a5, a6, a7, res); +} + +/** + * svc_smccc_hvc() - hypervisor call between normal and secure world + * @a0: argument passed in registers 0 + * @a1: argument passed in registers 1 + * @a2: argument passed in registers 2 + * @a3: argument passed in registers 3 + * @a4: argument passed in registers 4 + * @a5: argument passed in registers 5 + * @a6: argument passed in registers 6 + * @a7: argument passed in registers 7 + * @res: result values from register 0 to 3 + */ +static void svc_smccc_hvc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + unsigned long a6, unsigned long a7, + struct arm_smccc_res *res) +{ + arm_smccc_hvc(a0, a1, a2, a3, a4, a5, a6, a7, res); +} + +/** + * get_invoke_func() - invoke SMC or HVC call + * @dev: pointer to device + * + * Return: function pointer to svc_smccc_smc or svc_smccc_hvc. + */ +static svc_invoke_fn *get_invoke_func(struct device *dev) +{ + const char *method; + + if (of_property_read_string(dev->of_node, "method", &method)) { + dev_warn(dev, "missing \"method\" property\n"); + return ERR_PTR(-ENXIO); + } + + if (!strcmp(method, "smc")) + return svc_smccc_smc; + if (!strcmp(method, "hvc")) + return svc_smccc_hvc; + + dev_warn(dev, "invalid \"method\" property: %s\n", method); + + return ERR_PTR(-EINVAL); +} + +/** + * stratix10_svc_request_channel_byname() - request a service channel + * @client: pointer to service client + * @name: service client name + * + * This function is used by service client to request a service channel. + * + * Return: a pointer to channel assigned to the client on success, + * or ERR_PTR() on error. + */ +struct stratix10_svc_chan *stratix10_svc_request_channel_byname( + struct stratix10_svc_client *client, const char *name) +{ + struct device *dev = client->dev; + struct stratix10_svc_controller *controller; + struct stratix10_svc_chan *chan = NULL; + unsigned long flag; + int i; + + /* if probe was called after client's, or error on probe */ + if (list_empty(&svc_ctrl)) + return ERR_PTR(-EPROBE_DEFER); + + controller = list_first_entry(&svc_ctrl, + struct stratix10_svc_controller, node); + for (i = 0; i < SVC_NUM_CHANNEL; i++) { + if (!strcmp(controller->chans[i].name, name)) { + chan = &controller->chans[i]; + break; + } + } + + /* if there was no channel match */ + if (i == SVC_NUM_CHANNEL) { + dev_err(dev, "%s: channel not allocated\n", __func__); + return ERR_PTR(-EINVAL); + } + + if (chan->scl || !try_module_get(controller->dev->driver->owner)) { + dev_dbg(dev, "%s: svc not free\n", __func__); + return ERR_PTR(-EBUSY); + } + + spin_lock_irqsave(&chan->lock, flag); + chan->scl = client; + chan->ctrl->num_active_client++; + spin_unlock_irqrestore(&chan->lock, flag); + + return chan; +} +EXPORT_SYMBOL_GPL(stratix10_svc_request_channel_byname); + +/** + * stratix10_svc_free_channel() - free service channel + * @chan: service channel to be freed + * + * This function is used by service client to free a service channel. + */ +void stratix10_svc_free_channel(struct stratix10_svc_chan *chan) +{ + unsigned long flag; + + spin_lock_irqsave(&chan->lock, flag); + chan->scl = NULL; + chan->ctrl->num_active_client--; + module_put(chan->ctrl->dev->driver->owner); + spin_unlock_irqrestore(&chan->lock, flag); +} +EXPORT_SYMBOL_GPL(stratix10_svc_free_channel); + +/** + * stratix10_svc_send() - send a message data to the remote + * @chan: service channel assigned to the client + * @msg: message data to be sent, in the format of + * "struct stratix10_svc_client_msg" + * + * This function is used by service client to add a message to the service + * layer driver's queue for being sent to the secure world. + * + * Return: 0 for success, -ENOMEM or -ENOBUFS on error. + */ +int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg) +{ + struct stratix10_svc_client_msg + *p_msg = (struct stratix10_svc_client_msg *)msg; + struct stratix10_svc_data_mem *p_mem; + struct stratix10_svc_data *p_data; + int ret = 0; + unsigned int cpu = 0; + + p_data = kzalloc(sizeof(*p_data), GFP_KERNEL); + if (!p_data) + return -ENOMEM; + + /* first client will create kernel thread */ + if (!chan->ctrl->task) { + chan->ctrl->task = + kthread_create_on_node(svc_normal_to_secure_thread, + (void *)chan->ctrl, + cpu_to_node(cpu), + "svc_smc_hvc_thread"); + if (IS_ERR(chan->ctrl->task)) { + dev_err(chan->ctrl->dev, + "fails to create svc_smc_hvc_thread\n"); + kfree(p_data); + return -EINVAL; + } + kthread_bind(chan->ctrl->task, cpu); + wake_up_process(chan->ctrl->task); + } + + pr_debug("%s: sent P-va=%p, P-com=%x, P-size=%u\n", __func__, + p_msg->payload, p_msg->command, + (unsigned int)p_msg->payload_length); + + if (list_empty(&svc_data_mem)) { + if (p_msg->command == COMMAND_RECONFIG) { + struct stratix10_svc_command_config_type *ct = + (struct stratix10_svc_command_config_type *) + p_msg->payload; + p_data->flag = ct->flags; + } + } else { + list_for_each_entry(p_mem, &svc_data_mem, node) + if (p_mem->vaddr == p_msg->payload) { + p_data->paddr = p_mem->paddr; + break; + } + } + + p_data->command = p_msg->command; + p_data->arg[0] = p_msg->arg[0]; + p_data->arg[1] = p_msg->arg[1]; + p_data->arg[2] = p_msg->arg[2]; + p_data->size = p_msg->payload_length; + p_data->chan = chan; + pr_debug("%s: put to FIFO pa=0x%016x, cmd=%x, size=%u\n", __func__, + (unsigned int)p_data->paddr, p_data->command, + (unsigned int)p_data->size); + ret = kfifo_in_spinlocked(&chan->ctrl->svc_fifo, p_data, + sizeof(*p_data), + &chan->ctrl->svc_fifo_lock); + + kfree(p_data); + + if (!ret) + return -ENOBUFS; + + return 0; +} +EXPORT_SYMBOL_GPL(stratix10_svc_send); + +/** + * stratix10_svc_done() - complete service request transactions + * @chan: service channel assigned to the client + * + * This function should be called when client has finished its request + * or there is an error in the request process. It allows the service layer + * to stop the running thread to have maximize savings in kernel resources. + */ +void stratix10_svc_done(struct stratix10_svc_chan *chan) +{ + /* stop thread when thread is running AND only one active client */ + if (chan->ctrl->task && chan->ctrl->num_active_client <= 1) { + pr_debug("svc_smc_hvc_shm_thread is stopped\n"); + kthread_stop(chan->ctrl->task); + chan->ctrl->task = NULL; + } +} +EXPORT_SYMBOL_GPL(stratix10_svc_done); + +/** + * stratix10_svc_allocate_memory() - allocate memory + * @chan: service channel assigned to the client + * @size: memory size requested by a specific service client + * + * Service layer allocates the requested number of bytes buffer from the + * memory pool, service client uses this function to get allocated buffers. + * + * Return: address of allocated memory on success, or ERR_PTR() on error. + */ +void *stratix10_svc_allocate_memory(struct stratix10_svc_chan *chan, + size_t size) +{ + struct stratix10_svc_data_mem *pmem; + unsigned long va; + phys_addr_t pa; + struct gen_pool *genpool = chan->ctrl->genpool; + size_t s = roundup(size, 1 << genpool->min_alloc_order); + + pmem = devm_kzalloc(chan->ctrl->dev, sizeof(*pmem), GFP_KERNEL); + if (!pmem) + return ERR_PTR(-ENOMEM); + + va = gen_pool_alloc(genpool, s); + if (!va) + return ERR_PTR(-ENOMEM); + + memset((void *)va, 0, s); + pa = gen_pool_virt_to_phys(genpool, va); + + pmem->vaddr = (void *)va; + pmem->paddr = pa; + pmem->size = s; + list_add_tail(&pmem->node, &svc_data_mem); + pr_debug("%s: va=%p, pa=0x%016x\n", __func__, + pmem->vaddr, (unsigned int)pmem->paddr); + + return (void *)va; +} +EXPORT_SYMBOL_GPL(stratix10_svc_allocate_memory); + +/** + * stratix10_svc_free_memory() - free allocated memory + * @chan: service channel assigned to the client + * @kaddr: memory to be freed + * + * This function is used by service client to free allocated buffers. + */ +void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr) +{ + struct stratix10_svc_data_mem *pmem; + size_t size = 0; + + list_for_each_entry(pmem, &svc_data_mem, node) + if (pmem->vaddr == kaddr) { + size = pmem->size; + break; + } + + gen_pool_free(chan->ctrl->genpool, (unsigned long)kaddr, size); + pmem->vaddr = NULL; + list_del(&pmem->node); +} +EXPORT_SYMBOL_GPL(stratix10_svc_free_memory); + +static const struct of_device_id stratix10_svc_drv_match[] = { + {.compatible = "intel,stratix10-svc"}, + {}, +}; + +static int stratix10_svc_drv_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct stratix10_svc_controller *controller; + struct stratix10_svc_chan *chans; + struct gen_pool *genpool; + struct stratix10_svc_sh_memory *sh_memory; + svc_invoke_fn *invoke_fn; + size_t fifo_size; + int ret; + + /* get SMC or HVC function */ + invoke_fn = get_invoke_func(dev); + if (IS_ERR(invoke_fn)) + return -EINVAL; + + sh_memory = devm_kzalloc(dev, sizeof(*sh_memory), GFP_KERNEL); + if (!sh_memory) + return -ENOMEM; + + sh_memory->invoke_fn = invoke_fn; + ret = svc_get_sh_memory(pdev, sh_memory); + if (ret) + return ret; + + genpool = svc_create_memory_pool(pdev, sh_memory); + if (!genpool) + return -ENOMEM; + + /* allocate service controller and supporting channel */ + controller = devm_kzalloc(dev, sizeof(*controller), GFP_KERNEL); + if (!controller) + return -ENOMEM; + + chans = devm_kmalloc_array(dev, SVC_NUM_CHANNEL, + sizeof(*chans), GFP_KERNEL | __GFP_ZERO); + if (!chans) + return -ENOMEM; + + controller->dev = dev; + controller->num_chans = SVC_NUM_CHANNEL; + controller->num_active_client = 0; + controller->chans = chans; + controller->genpool = genpool; + controller->task = NULL; + controller->invoke_fn = invoke_fn; + init_completion(&controller->complete_status); + + fifo_size = sizeof(struct stratix10_svc_data) * SVC_NUM_DATA_IN_FIFO; + ret = kfifo_alloc(&controller->svc_fifo, fifo_size, GFP_KERNEL); + if (ret) { + dev_err(dev, "fails to allocate FIFO\n"); + return ret; + } + spin_lock_init(&controller->svc_fifo_lock); + + chans[0].scl = NULL; + chans[0].ctrl = controller; + chans[0].name = SVC_CLIENT_FPGA; + spin_lock_init(&chans[0].lock); + + list_add_tail(&controller->node, &svc_ctrl); + platform_set_drvdata(pdev, controller); + + pr_info("Intel Service Layer Driver Initialized\n"); + + return ret; +} + +static int stratix10_svc_drv_remove(struct platform_device *pdev) +{ + struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev); + + kfifo_free(&ctrl->svc_fifo); + if (ctrl->task) { + kthread_stop(ctrl->task); + ctrl->task = NULL; + } + if (ctrl->genpool) + gen_pool_destroy(ctrl->genpool); + list_del(&ctrl->node); + + return 0; +} + +static struct platform_driver stratix10_svc_driver = { + .probe = stratix10_svc_drv_probe, + .remove = stratix10_svc_drv_remove, + .driver = { + .name = "stratix10-svc", + .of_match_table = stratix10_svc_drv_match, + }, +}; + +static int __init stratix10_svc_init(void) +{ + struct device_node *fw_np; + struct device_node *np; + int ret; + + fw_np = of_find_node_by_name(NULL, "firmware"); + if (!fw_np) + return -ENODEV; + + np = of_find_matching_node(fw_np, stratix10_svc_drv_match); + if (!np) { + of_node_put(fw_np); + return -ENODEV; + } + + of_node_put(np); + ret = of_platform_populate(fw_np, stratix10_svc_drv_match, NULL, NULL); + of_node_put(fw_np); + if (ret) + return ret; + + return platform_driver_register(&stratix10_svc_driver); +} + +static void __exit stratix10_svc_exit(void) +{ + return platform_driver_unregister(&stratix10_svc_driver); +} + +subsys_initcall(stratix10_svc_init); +module_exit(stratix10_svc_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel Stratix10 Service Layer Driver"); +MODULE_AUTHOR("Richard Gong "); +MODULE_ALIAS("platform:stratix10-svc"); diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h new file mode 100644 index 000000000000..a109e4ccbc7e --- /dev/null +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2017-2018, Intel Corporation + */ + +#ifndef __STRATIX10_SMC_H +#define __STRATIX10_SMC_H + +#include +#include + +/** + * This file defines the Secure Monitor Call (SMC) message protocol used for + * service layer driver in normal world (EL1) to communicate with secure + * monitor software in Secure Monitor Exception Level 3 (EL3). + * + * This file is shared with secure firmware (FW) which is out of kernel tree. + * + * An ARM SMC instruction takes a function identifier and up to 6 64-bit + * register values as arguments, and can return up to 4 64-bit register + * value. The operation of the secure monitor is determined by the parameter + * values passed in through registers. + * + * EL1 and EL3 communicates pointer as physical address rather than the + * virtual address. + * + * Functions specified by ARM SMC Calling convention: + * + * FAST call executes atomic operations, returns when the requested operation + * has completed. + * STD call starts a operation which can be preempted by a non-secure + * interrupt. The call can return before the requested operation has + * completed. + * + * a0..a7 is used as register names in the descriptions below, on arm32 + * that translates to r0..r7 and on arm64 to w0..w7. + */ + +/** + * @func_num: function ID + */ +#define INTEL_SIP_SMC_STD_CALL_VAL(func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_SIP, (func_num)) + +#define INTEL_SIP_SMC_FAST_CALL_VAL(func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_SIP, (func_num)) + +/** + * Return values in INTEL_SIP_SMC_* call + * + * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION: + * Secure monitor software doesn't recognize the request. + * + * INTEL_SIP_SMC_STATUS_OK: + * FPGA configuration completed successfully, + * In case of FPGA configuration write operation, it means secure monitor + * software can accept the next chunk of FPGA configuration data. + * + * INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY: + * In case of FPGA configuration write operation, it means secure monitor + * software is still processing previous data & can't accept the next chunk + * of data. Service driver needs to issue + * INTEL_SIP_SMC_FPGA_CONFIG_COMPLETED_WRITE call to query the + * completed block(s). + * + * INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR: + * There is error during the FPGA configuration process. + */ +#define INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF +#define INTEL_SIP_SMC_STATUS_OK 0x0 +#define INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY 0x1 +#define INTEL_SIP_SMC_FPGA_CONFIG_STATUS_REJECTED 0x2 +#define INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR 0x4 +#define INTEL_SIP_SMC_REG_ERROR 0x5 + +/** + * Request INTEL_SIP_SMC_FPGA_CONFIG_START + * + * Sync call used by service driver at EL1 to request the FPGA in EL3 to + * be prepare to receive a new configuration. + * + * Call register usage: + * a0: INTEL_SIP_SMC_FPGA_CONFIG_START. + * a1: flag for full or partial configuration. 0 for full and 1 for partial + * configuration. + * a2-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK, or INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR. + * a1-3: not used. + */ +#define INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_START 1 +#define INTEL_SIP_SMC_FPGA_CONFIG_START \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_START) + +/** + * Request INTEL_SIP_SMC_FPGA_CONFIG_WRITE + * + * Async call used by service driver at EL1 to provide FPGA configuration data + * to secure world. + * + * Call register usage: + * a0: INTEL_SIP_SMC_FPGA_CONFIG_WRITE. + * a1: 64bit physical address of the configuration data memory block + * a2: Size of configuration data block. + * a3-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY or + * INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR. + * a1: 64bit physical address of 1st completed memory block if any completed + * block, otherwise zero value. + * a2: 64bit physical address of 2nd completed memory block if any completed + * block, otherwise zero value. + * a3: 64bit physical address of 3rd completed memory block if any completed + * block, otherwise zero value. + */ +#define INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_WRITE 2 +#define INTEL_SIP_SMC_FPGA_CONFIG_WRITE \ + INTEL_SIP_SMC_STD_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_WRITE) + +/** + * Request INTEL_SIP_SMC_FPGA_CONFIG_COMPLETED_WRITE + * + * Sync call used by service driver at EL1 to track the completed write + * transactions. This request is called after INTEL_SIP_SMC_FPGA_CONFIG_WRITE + * call returns INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY. + * + * Call register usage: + * a0: INTEL_SIP_SMC_FPGA_CONFIG_COMPLETED_WRITE. + * a1-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY or + * INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR. + * a1: 64bit physical address of 1st completed memory block. + * a2: 64bit physical address of 2nd completed memory block if + * any completed block, otherwise zero value. + * a3: 64bit physical address of 3rd completed memory block if + * any completed block, otherwise zero value. + */ +#define INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE 3 +#define INTEL_SIP_SMC_FPGA_CONFIG_COMPLETED_WRITE \ +INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) + +/** + * Request INTEL_SIP_SMC_FPGA_CONFIG_ISDONE + * + * Sync call used by service driver at EL1 to inform secure world that all + * data are sent, to check whether or not the secure world had completed + * the FPGA configuration process. + * + * Call register usage: + * a0: INTEL_SIP_SMC_FPGA_CONFIG_ISDONE. + * a1-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_FPGA_CONFIG_STATUS_BUSY or + * INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR. + * a1-3: not used. + */ +#define INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_ISDONE 4 +#define INTEL_SIP_SMC_FPGA_CONFIG_ISDONE \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_ISDONE) + +/** + * Request INTEL_SIP_SMC_FPGA_CONFIG_GET_MEM + * + * Sync call used by service driver at EL1 to query the physical address of + * memory block reserved by secure monitor software. + * + * Call register usage: + * a0:INTEL_SIP_SMC_FPGA_CONFIG_GET_MEM. + * a1-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK or INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR. + * a1: start of physical address of reserved memory block. + * a2: size of reserved memory block. + * a3: not used. + */ +#define INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_GET_MEM 5 +#define INTEL_SIP_SMC_FPGA_CONFIG_GET_MEM \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_GET_MEM) + +/** + * Request INTEL_SIP_SMC_FPGA_CONFIG_LOOPBACK + * + * For SMC loop-back mode only, used for internal integration, debugging + * or troubleshooting. + * + * Call register usage: + * a0: INTEL_SIP_SMC_FPGA_CONFIG_LOOPBACK. + * a1-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK or INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR. + * a1-3: not used. + */ +#define INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_LOOPBACK 6 +#define INTEL_SIP_SMC_FPGA_CONFIG_LOOPBACK \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_LOOPBACK) + +/* + * Request INTEL_SIP_SMC_REG_READ + * + * Read a protected register at EL3 + * + * Call register usage: + * a0: INTEL_SIP_SMC_REG_READ. + * a1: register address. + * a2-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK or INTEL_SIP_SMC_REG_ERROR. + * a1: value in the register + * a2-3: not used. + */ +#define INTEL_SIP_SMC_FUNCID_REG_READ 7 +#define INTEL_SIP_SMC_REG_READ \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_READ) + +/* + * Request INTEL_SIP_SMC_REG_WRITE + * + * Write a protected register at EL3 + * + * Call register usage: + * a0: INTEL_SIP_SMC_REG_WRITE. + * a1: register address + * a2: value to program into register. + * a3-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK or INTEL_SIP_SMC_REG_ERROR. + * a1-3: not used. + */ +#define INTEL_SIP_SMC_FUNCID_REG_WRITE 8 +#define INTEL_SIP_SMC_REG_WRITE \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE) + +/* + * Request INTEL_SIP_SMC_FUNCID_REG_UPDATE + * + * Update one or more bits in a protected register at EL3 using a + * read-modify-write operation. + * + * Call register usage: + * a0: INTEL_SIP_SMC_REG_UPDATE. + * a1: register address + * a2: write Mask. + * a3: value to write. + * a4-7: not used. + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK or INTEL_SIP_SMC_REG_ERROR. + * a1-3: Not used. + */ +#define INTEL_SIP_SMC_FUNCID_REG_UPDATE 9 +#define INTEL_SIP_SMC_REG_UPDATE \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_UPDATE) + +#endif diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h new file mode 100644 index 000000000000..f2fda7e1ca52 --- /dev/null +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2017-2018, Intel Corporation + */ + +#ifndef __STRATIX10_SVC_CLIENT_H +#define __STRATIX10_SVC_CLIENT_H + +/** + * Service layer driver supports client names + * + * fpga: for FPGA configuration + */ +#define SVC_CLIENT_FPGA "fpga" + +/** + * Status of the sent command, in bit number + * + * SVC_COMMAND_STATUS_RECONFIG_REQUEST_OK: + * Secure firmware accepts the request of FPGA reconfiguration. + * + * SVC_STATUS_RECONFIG_BUFFER_SUBMITTED: + * Service client successfully submits FPGA configuration + * data buffer to secure firmware. + * + * SVC_COMMAND_STATUS_RECONFIG_BUFFER_DONE: + * Secure firmware completes data process, ready to accept the + * next WRITE transaction. + * + * SVC_COMMAND_STATUS_RECONFIG_COMPLETED: + * Secure firmware completes FPGA configuration successfully, FPGA should + * be in user mode. + * + * SVC_COMMAND_STATUS_RECONFIG_BUSY: + * FPGA configuration is still in process. + * + * SVC_COMMAND_STATUS_RECONFIG_ERROR: + * Error encountered during FPGA configuration. + */ +#define SVC_STATUS_RECONFIG_REQUEST_OK 0 +#define SVC_STATUS_RECONFIG_BUFFER_SUBMITTED 1 +#define SVC_STATUS_RECONFIG_BUFFER_DONE 2 +#define SVC_STATUS_RECONFIG_COMPLETED 3 +#define SVC_STATUS_RECONFIG_BUSY 4 +#define SVC_STATUS_RECONFIG_ERROR 5 + +/** + * Flag bit for COMMAND_RECONFIG + * + * COMMAND_RECONFIG_FLAG_PARTIAL: + * Set to FPGA configuration type (full or partial), the default + * is full reconfig. + */ +#define COMMAND_RECONFIG_FLAG_PARTIAL 0 + +/** + * Timeout settings for service clients: + * timeout value used in Stratix10 FPGA manager driver. + */ +#define SVC_RECONFIG_REQUEST_TIMEOUT_MS 100 +#define SVC_RECONFIG_BUFFER_TIMEOUT_MS 240 + +struct stratix10_svc_chan; + +/** + * enum stratix10_svc_command_code - supported service commands + * + * @COMMAND_NOOP: do 'dummy' request for integration/debug/trouble-shooting + * + * @COMMAND_RECONFIG: ask for FPGA configuration preparation, return status + * is SVC_STATUS_RECONFIG_REQUEST_OK + * + * @COMMAND_RECONFIG_DATA_SUBMIT: submit buffer(s) of bit-stream data for the + * FPGA configuration, return status is SVC_STATUS_RECONFIG_BUFFER_SUBMITTED, + * or SVC_STATUS_RECONFIG_ERROR + * + * @COMMAND_RECONFIG_DATA_CLAIM: check the status of the configuration, return + * status is SVC_STATUS_RECONFIG_COMPLETED, or SVC_STATUS_RECONFIG_BUSY, or + * SVC_STATUS_RECONFIG_ERROR + * + * @COMMAND_RECONFIG_STATUS: check the status of the configuration, return + * status is SVC_STATUS_RECONFIG_COMPLETED, or SVC_STATUS_RECONFIG_BUSY, or + * SVC_STATUS_RECONFIG_ERROR + */ +enum stratix10_svc_command_code { + COMMAND_NOOP = 0, + COMMAND_RECONFIG, + COMMAND_RECONFIG_DATA_SUBMIT, + COMMAND_RECONFIG_DATA_CLAIM, + COMMAND_RECONFIG_STATUS +}; + +/** + * struct stratix10_svc_client_msg - message sent by client to service + * @payload: starting address of data need be processed + * @payload_length: data size in bytes + * @command: service command + * @arg: args to be passed via registers and not physically mapped buffers + */ +struct stratix10_svc_client_msg { + void *payload; + size_t payload_length; + enum stratix10_svc_command_code command; + u64 arg[3]; +}; + +/** + * struct stratix10_svc_command_config_type - config type + * @flags: flag bit for the type of FPGA configuration + */ +struct stratix10_svc_command_config_type { + u32 flags; +}; + +/** + * struct stratix10_svc_cb_data - callback data structure from service layer + * @status: the status of sent command + * @kaddr1: address of 1st completed data block + * @kaddr2: address of 2nd completed data block + * @kaddr3: address of 3rd completed data block + */ +struct stratix10_svc_cb_data { + u32 status; + void *kaddr1; + void *kaddr2; + void *kaddr3; +}; + +/** + * struct stratix10_svc_client - service client structure + * @dev: the client device + * @receive_cb: callback to provide service client the received data + * @priv: client private data + */ +struct stratix10_svc_client { + struct device *dev; + void (*receive_cb)(struct stratix10_svc_client *client, + struct stratix10_svc_cb_data *cb_data); + void *priv; +}; + +/** + * stratix10_svc_request_channel_byname() - request service channel + * @client: identity of the client requesting the channel + * @name: supporting client name defined above + * + * Return: a pointer to channel assigned to the client on success, + * or ERR_PTR() on error. + */ +struct stratix10_svc_chan +*stratix10_svc_request_channel_byname(struct stratix10_svc_client *client, + const char *name); + +/** + * stratix10_svc_free_channel() - free service channel. + * @chan: service channel to be freed + */ +void stratix10_svc_free_channel(struct stratix10_svc_chan *chan); + +/** + * stratix10_svc_allocate_memory() - allocate the momory + * @chan: service channel assigned to the client + * @size: number of bytes client requests + * + * Service layer allocates the requested number of bytes from the memory + * pool for the client. + * + * Return: the starting address of allocated memory on success, or + * ERR_PTR() on error. + */ +void *stratix10_svc_allocate_memory(struct stratix10_svc_chan *chan, + size_t size); + +/** + * stratix10_svc_free_memory() - free allocated memory + * @chan: service channel assigned to the client + * @kaddr: starting address of memory to be free back to pool + */ +void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr); + +/** + * stratix10_svc_send() - send a message to the remote + * @chan: service channel assigned to the client + * @msg: message data to be sent, in the format of + * struct stratix10_svc_client_msg + * + * Return: 0 for success, -ENOMEM or -ENOBUFS on error. + */ +int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg); + +/** + * intel_svc_done() - complete service request + * @chan: service channel assigned to the client + * + * This function is used by service client to inform service layer that + * client's service requests are completed, or there is an error in the + * request process. + */ +void stratix10_svc_done(struct stratix10_svc_chan *chan); +#endif + -- cgit v1.2.3 From 6b50d882d38d5a1e4c0c476712384067c19c744b Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Tue, 13 Nov 2018 12:14:06 -0600 Subject: firmware: add remote status update client support Extend Intel Stratix10 service layer to support the second service layer client, Remote Status Update (RSU). RSU is used to provide our customers with protection against loading bad bitstreams onto their devices when those devices are booting from flash. Signed-off-by: Richard Gong Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/stratix10-svc.c | 35 +++++++++++++++- include/linux/firmware/intel/stratix10-smc.h | 47 ++++++++++++++++++++++ .../linux/firmware/intel/stratix10-svc-client.h | 20 ++++++++- 3 files changed, 98 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 168f52314963..81f3182e290d 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -34,7 +34,7 @@ * timeout is set to 30 seconds (30 * 1000) at Intel Stratix10 SoC. */ #define SVC_NUM_DATA_IN_FIFO 32 -#define SVC_NUM_CHANNEL 1 +#define SVC_NUM_CHANNEL 2 #define FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS 200 #define FPGA_CONFIG_STATUS_TIMEOUT_SEC 30 @@ -271,7 +271,7 @@ static void svc_thread_cmd_config_status(struct stratix10_svc_controller *ctrl, * @cb_data: pointer to callback data structure to service client * @res: result from SMC or HVC call * - * Send back the correspond status to the service client (FPGA manager etc). + * Send back the correspond status to the service clients. */ static void svc_thread_recv_status_ok(struct stratix10_svc_data *p_data, struct stratix10_svc_cb_data *cb_data, @@ -295,6 +295,9 @@ static void svc_thread_recv_status_ok(struct stratix10_svc_data *p_data, case COMMAND_RECONFIG_STATUS: cb_data->status = BIT(SVC_STATUS_RECONFIG_COMPLETED); break; + case COMMAND_RSU_UPDATE: + cb_data->status = BIT(SVC_STATUS_RSU_OK); + break; default: pr_warn("it shouldn't happen\n"); break; @@ -373,6 +376,16 @@ static int svc_normal_to_secure_thread(void *data) a1 = 0; a2 = 0; break; + case COMMAND_RSU_STATUS: + a0 = INTEL_SIP_SMC_RSU_STATUS; + a1 = 0; + a2 = 0; + break; + case COMMAND_RSU_UPDATE: + a0 = INTEL_SIP_SMC_RSU_UPDATE; + a1 = pdata->arg[0]; + a2 = 0; + break; default: pr_warn("it shouldn't happen\n"); break; @@ -389,6 +402,19 @@ static int svc_normal_to_secure_thread(void *data) (unsigned int)res.a1, (unsigned int)res.a2); pr_debug(" res.a3=0x%016x\n", (unsigned int)res.a3); + if (pdata->command == COMMAND_RSU_STATUS) { + if (res.a0 == INTEL_SIP_SMC_RSU_ERROR) + cbdata->status = BIT(SVC_STATUS_RSU_ERROR); + else + cbdata->status = BIT(SVC_STATUS_RSU_OK); + + cbdata->kaddr1 = &res; + cbdata->kaddr2 = NULL; + cbdata->kaddr3 = NULL; + pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata); + continue; + } + switch (res.a0) { case INTEL_SIP_SMC_STATUS_OK: svc_thread_recv_status_ok(pdata, cbdata, res); @@ -941,6 +967,11 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) chans[0].name = SVC_CLIENT_FPGA; spin_lock_init(&chans[0].lock); + chans[1].scl = NULL; + chans[1].ctrl = controller; + chans[1].name = SVC_CLIENT_RSU; + spin_lock_init(&chans[1].lock); + list_add_tail(&controller->node, &svc_ctrl); platform_set_drvdata(pdev, controller); diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index a109e4ccbc7e..5be5dab50b13 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -67,6 +67,12 @@ * * INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR: * There is error during the FPGA configuration process. + * + * INTEL_SIP_SMC_REG_ERROR: + * There is error during a read or write operation of the protected registers. + * + * INTEL_SIP_SMC_RSU_ERROR: + * There is error during a remote status update. */ #define INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF #define INTEL_SIP_SMC_STATUS_OK 0x0 @@ -74,6 +80,7 @@ #define INTEL_SIP_SMC_FPGA_CONFIG_STATUS_REJECTED 0x2 #define INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR 0x4 #define INTEL_SIP_SMC_REG_ERROR 0x5 +#define INTEL_SIP_SMC_RSU_ERROR 0x7 /** * Request INTEL_SIP_SMC_FPGA_CONFIG_START @@ -262,4 +269,44 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_REG_UPDATE \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_UPDATE) +/* + * Request INTEL_SIP_SMC_RSU_STATUS + * + * Request remote status update boot log, call is synchronous. + * + * Call register usage: + * a0 INTEL_SIP_SMC_RSU_STATUS + * a1-7 not used + * + * Return status + * a0: Current Image + * a1: Last Failing Image + * a2: Version | State + * a3: Error details | Error location + * + * Or + * + * a0: INTEL_SIP_SMC_RSU_ERROR + */ +#define INTEL_SIP_SMC_FUNCID_RSU_STATUS 11 +#define INTEL_SIP_SMC_RSU_STATUS \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_STATUS) + +/* + * Request INTEL_SIP_SMC_RSU_UPDATE + * + * Request to set the offset of the bitstream to boot after reboot, call + * is synchronous. + * + * Call register usage: + * a0 INTEL_SIP_SMC_RSU_UPDATE + * a1 64bit physical address of the configuration data memory in flash + * a2-7 not used + * + * Return status + * a0 INTEL_SIP_SMC_STATUS_OK + */ +#define INTEL_SIP_SMC_FUNCID_RSU_UPDATE 12 +#define INTEL_SIP_SMC_RSU_UPDATE \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_UPDATE) #endif diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index f2fda7e1ca52..e521f172a47a 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -10,8 +10,10 @@ * Service layer driver supports client names * * fpga: for FPGA configuration + * rsu: for remote status update */ #define SVC_CLIENT_FPGA "fpga" +#define SVC_CLIENT_RSU "rsu" /** * Status of the sent command, in bit number @@ -36,6 +38,9 @@ * * SVC_COMMAND_STATUS_RECONFIG_ERROR: * Error encountered during FPGA configuration. + * + * SVC_STATUS_RSU_OK: + * Secure firmware accepts the request of remote status update (RSU). */ #define SVC_STATUS_RECONFIG_REQUEST_OK 0 #define SVC_STATUS_RECONFIG_BUFFER_SUBMITTED 1 @@ -43,7 +48,8 @@ #define SVC_STATUS_RECONFIG_COMPLETED 3 #define SVC_STATUS_RECONFIG_BUSY 4 #define SVC_STATUS_RECONFIG_ERROR 5 - +#define SVC_STATUS_RSU_OK 6 +#define SVC_STATUS_RSU_ERROR 7 /** * Flag bit for COMMAND_RECONFIG * @@ -56,9 +62,11 @@ /** * Timeout settings for service clients: * timeout value used in Stratix10 FPGA manager driver. + * timeout value used in RSU driver */ #define SVC_RECONFIG_REQUEST_TIMEOUT_MS 100 #define SVC_RECONFIG_BUFFER_TIMEOUT_MS 240 +#define SVC_RSU_REQUEST_TIMEOUT_MS 300 struct stratix10_svc_chan; @@ -81,13 +89,21 @@ struct stratix10_svc_chan; * @COMMAND_RECONFIG_STATUS: check the status of the configuration, return * status is SVC_STATUS_RECONFIG_COMPLETED, or SVC_STATUS_RECONFIG_BUSY, or * SVC_STATUS_RECONFIG_ERROR + * + * @COMMAND_RSU_STATUS: request remote system update boot log, return status + * is log data or SVC_STATUS_RSU_ERROR + * + * @COMMAND_RSU_UPDATE: set the offset of the bitstream to boot after reboot, + * return status is SVC_STATUS_RSU_OK or SVC_STATUS_RSU_ERROR */ enum stratix10_svc_command_code { COMMAND_NOOP = 0, COMMAND_RECONFIG, COMMAND_RECONFIG_DATA_SUBMIT, COMMAND_RECONFIG_DATA_CLAIM, - COMMAND_RECONFIG_STATUS + COMMAND_RECONFIG_STATUS, + COMMAND_RSU_STATUS, + COMMAND_RSU_UPDATE }; /** -- cgit v1.2.3 From 4d3c5c69191f98c7f7e699ff08d2fd96d7070ddb Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 26 Nov 2018 02:17:56 +0000 Subject: Drivers: hv: vmbus: Remove the useless API vmbus_get_outgoing_channel() Commit d86adf482b84 ("scsi: storvsc: Enable multi-queue support") removed the usage of the API in Jan 2017, and the API is not used since then. netvsc and storvsc have their own algorithms to determine the outgoing channel, so this API is useless. And the API is potentially unsafe, because it reads primary->num_sc without any lock held. This can be risky considering the RESCIND-OFFER message. Let's remove the API. Cc: Long Li Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 1 - drivers/hv/channel_mgmt.c | 45 --------------------------------------------- include/linux/hyperv.h | 17 ----------------- 3 files changed, 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index de8193f3b838..f96a77b18bb9 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -703,7 +703,6 @@ int vmbus_disconnect_ring(struct vmbus_channel *channel) /* Snapshot the list of subchannels */ spin_lock_irqsave(&channel->lock, flags); list_splice_init(&channel->sc_list, &list); - channel->num_sc = 0; spin_unlock_irqrestore(&channel->lock, flags); list_for_each_entry_safe(cur_channel, tmp, &list, sc_list) { diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 6277597d3d58..82e673671087 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -405,7 +405,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel) primary_channel = channel->primary_channel; spin_lock_irqsave(&primary_channel->lock, flags); list_del(&channel->sc_list); - primary_channel->num_sc--; spin_unlock_irqrestore(&primary_channel->lock, flags); } @@ -483,7 +482,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) newchannel->primary_channel = channel; spin_lock_irqsave(&channel->lock, flags); list_add_tail(&newchannel->sc_list, &channel->sc_list); - channel->num_sc++; spin_unlock_irqrestore(&channel->lock, flags); } else { goto err_free_chan; @@ -1239,49 +1237,6 @@ cleanup: return ret; } -/* - * Retrieve the (sub) channel on which to send an outgoing request. - * When a primary channel has multiple sub-channels, we try to - * distribute the load equally amongst all available channels. - */ -struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) -{ - struct list_head *cur, *tmp; - int cur_cpu; - struct vmbus_channel *cur_channel; - struct vmbus_channel *outgoing_channel = primary; - int next_channel; - int i = 1; - - if (list_empty(&primary->sc_list)) - return outgoing_channel; - - next_channel = primary->next_oc++; - - if (next_channel > (primary->num_sc)) { - primary->next_oc = 0; - return outgoing_channel; - } - - cur_cpu = hv_cpu_number_to_vp_number(smp_processor_id()); - list_for_each_safe(cur, tmp, &primary->sc_list) { - cur_channel = list_entry(cur, struct vmbus_channel, sc_list); - if (cur_channel->state != CHANNEL_OPENED_STATE) - continue; - - if (cur_channel->target_vp == cur_cpu) - return cur_channel; - - if (i == next_channel) - return cur_channel; - - i++; - } - - return outgoing_channel; -} -EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel); - static void invoke_sc_cb(struct vmbus_channel *primary_channel) { struct list_head *cur, *tmp; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b3e24368930a..07a367f5e22f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -830,15 +830,6 @@ struct vmbus_channel { * All Sub-channels of a primary channel are linked here. */ struct list_head sc_list; - /* - * Current number of sub-channels. - */ - int num_sc; - /* - * Number of a sub-channel (position within sc_list) which is supposed - * to be used as the next outgoing channel. - */ - int next_oc; /* * The primary channel this sub-channel belongs to. * This will be NULL for the primary channel. @@ -965,14 +956,6 @@ void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, void (*chn_rescind_cb)(struct vmbus_channel *)); -/* - * Retrieve the (sub) channel on which to send an outgoing request. - * When a primary channel has multiple sub-channels, we choose a - * channel whose VCPU binding is closest to the VCPU on which - * this call is being made. - */ -struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary); - /* * Check if sub-channels have already been offerred. This API will be useful * when the driver is unloaded after establishing sub-channels. In this case, -- cgit v1.2.3 From 0f597ed435b9ea1296e25474b762bedceba97a50 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:18 -0800 Subject: net/mlx5: EQ, Introduce atomic notifier chain subscription API Use atomic_notifier_chain to fire firmware events at internal mlx5 core components such as eswitch/fpga/clock/FW tracer/etc.., this is to avoid explicit calls from low level mlx5_core to upper components and to simplify the mlx5_core API for future developments. Simply provide register/unregister notifiers API and call the notifier chain on firmware async events. Example: to subscribe to a FW event: struct mlx5_nb port_event; MLX5_NB_INIT(&port_event, port_event_handler, PORT_CHANGE); mlx5_eq_notifier_register(mdev, &port_event); where: - port_event_handler is the notifier block callback. - PORT_EVENT is the suffix of MLX5_EVENT_TYPE_PORT_CHANGE. The above will guarantee that port_event_handler will receive all FW events of the type MLX5_EVENT_TYPE_PORT_CHANGE. To receive all FW/HW events one can subscribe to MLX5_EVENT_TYPE_NOTIFY_ANY. The next few patches will start moving all mlx5 core components to use this new API and cleanup mlx5_eq_async_int misx handler from component explicit calls and specific logic. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 42 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 5 +++ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++ include/linux/mlx5/device.h | 10 +++++- include/linux/mlx5/eq.h | 16 +++++++-- 5 files changed, 72 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6ba8e401a0c7..34e4b2c246ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -68,8 +69,10 @@ struct mlx5_irq_info { struct mlx5_eq_table { struct list_head comp_eqs_list; struct mlx5_eq pages_eq; - struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; + struct mlx5_eq async_eq; + + struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; struct mutex lock; /* sync async eqs creations */ int num_comp_vectors; @@ -316,13 +319,17 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; - struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eq_table *eqt; + struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; u32 cqn = -1; u32 rsn; u8 port; + dev = eq->dev; + eqt = dev->priv.eq_table; + while ((eqe = next_eqe_sw(eq))) { /* * Make sure we read EQ entry contents after we've @@ -437,6 +444,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) break; } + if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); + else + mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); + + atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); + ++eq->cons_index; ++set_ci; @@ -625,7 +639,7 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) int mlx5_eq_table_init(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table; - int err; + int i, err; eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); if (!eq_table) @@ -638,6 +652,8 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) goto kvfree_eq_table; mutex_init(&eq_table->lock); + for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); return 0; @@ -1202,3 +1218,23 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) destroy_async_eqs(dev); free_irq_vectors(dev); } + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); +} + +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 6d8c8a57d52b..c0fb6d72b695 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -4,6 +4,8 @@ #ifndef __LIB_MLX5_EQ_H__ #define __LIB_MLX5_EQ_H__ #include +#include +#include #define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) @@ -90,4 +92,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 21727d9eeb84..e06c6e16ffc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -78,6 +78,11 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_warn_once(__dev, format, ...) \ + dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e326524bafcc..f7c8bebfe472 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -301,9 +301,15 @@ enum { MLX5_EVENT_QUEUE_TYPE_DCT = 6, }; +/* mlx5 components can subscribe to any one of these events via + * mlx5_eq_notifier_register API. + */ enum mlx5_event { + /* Special value to subscribe to any event */ + MLX5_EVENT_TYPE_NOTIFY_ANY = 0x0, + /* HW events enum start: comp events are not subscribable */ MLX5_EVENT_TYPE_COMP = 0x0, - + /* HW Async events enum start: subscribable events */ MLX5_EVENT_TYPE_PATH_MIG = 0x01, MLX5_EVENT_TYPE_COMM_EST = 0x02, MLX5_EVENT_TYPE_SQ_DRAINED = 0x03, @@ -341,6 +347,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_FPGA_QP_ERROR = 0x21, MLX5_EVENT_TYPE_DEVICE_TRACER = 0x26, + + MLX5_EVENT_TYPE_MAX = MLX5_EVENT_TYPE_DEVICE_TRACER + 1, }; enum { diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 71d82c5a1a02..00045cc4ea11 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -4,8 +4,6 @@ #ifndef MLX5_CORE_EQ_H #define MLX5_CORE_EQ_H -#include - enum { MLX5_EQ_PAGEREQ_IDX = 0, MLX5_EQ_CMD_IDX = 1, @@ -22,6 +20,7 @@ enum { #define MLX5_NUM_SPARE_EQE (0x80) struct mlx5_eq; +struct mlx5_core_dev; struct mlx5_eq_param { u8 index; @@ -57,4 +56,17 @@ static inline u32 mlx5_eq_update_cc(struct mlx5_eq *eq, u32 cc) return cc; } +struct mlx5_nb { + struct notifier_block nb; + u8 event_type; +}; + +#define mlx5_nb_cof(ptr, type, member) \ + (container_of(container_of(ptr, struct mlx5_nb, nb), type, member)) + +#define MLX5_NB_INIT(name, handler, event) do { \ + (name)->nb.notifier_call = handler; \ + (name)->event_type = MLX5_EVENT_TYPE_##event; \ +} while (0) + #endif /* MLX5_CORE_EQ_H */ -- cgit v1.2.3 From 41069256e93045a45a2c359c9715439be0b47bf4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:21 -0800 Subject: net/mlx5: Clock, Use async events chain Remove the explicit call to mlx5_pps_event on MLX5_EVENT_TYPE_PPS_EVENT and let clock logic to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 ---- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 24 +++++++++++++++------- .../net/ethernet/mellanox/mlx5/core/lib/clock.h | 3 --- include/linux/mlx5/driver.h | 4 +++- 4 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8aabd23d2166..e5fcce9ca107 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -417,10 +417,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_port_module_event(dev, eqe); break; - case MLX5_EVENT_TYPE_PPS_EVENT: - mlx5_pps_event(dev, eqe); - break; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: mlx5_temp_warning_event(dev, eqe); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0d90b1b4a3d3..d27c239e7d6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -33,6 +33,7 @@ #include #include #include +#include "lib/eq.h" #include "en.h" #include "clock.h" @@ -439,16 +440,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); } -void mlx5_pps_event(struct mlx5_core_dev *mdev, - struct mlx5_eqe *eqe) +static int mlx5_pps_event(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_core_dev *mdev = clock->mdev; struct ptp_clock_event ptp_event; - struct timespec64 ts; - u64 nsec_now, nsec_delta; u64 cycles_now, cycles_delta; + u64 nsec_now, nsec_delta, ns; + struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - s64 ns; + struct timespec64 ts; unsigned long flags; switch (clock->ptp_info.pin_config[pin].func) { @@ -463,6 +465,7 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, } else { ptp_event.type = PTP_CLOCK_EXTTS; } + /* TODOL clock->ptp can be NULL if ptp_clock_register failes */ ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: @@ -481,8 +484,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, write_sequnlock_irqrestore(&clock->lock, flags); break; default: - mlx5_core_err(mdev, " Unhandled event\n"); + mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", + clock->ptp_info.pin_config[pin].func); } + + return NOTIFY_OK; } void mlx5_init_clock(struct mlx5_core_dev *mdev) @@ -567,6 +573,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) PTR_ERR(clock->ptp)); clock->ptp = NULL; } + + MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &clock->pps_nb); } void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) @@ -576,6 +585,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) return; + mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index 263cb6e2aeee..31600924bdc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -36,7 +36,6 @@ #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) void mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); -void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { @@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, #else static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} -static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {} - static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f41e6713df10..99a23db9a929 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -671,6 +672,8 @@ struct mlx5_pps { }; struct mlx5_clock { + struct mlx5_core_dev *mdev; + struct mlx5_nb pps_nb; seqlock_t lock; struct cyclecounter cycles; struct timecounter tc; @@ -678,7 +681,6 @@ struct mlx5_clock { u32 nominal_c_mult; unsigned long overflow_period; struct delayed_work overflow_work; - struct mlx5_core_dev *mdev; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; struct mlx5_pps pps_info; -- cgit v1.2.3 From 0cf53c1247565b339a23d82a1853a0c41e9a2a34 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:23 -0800 Subject: net/mlx5: FWPage, Use async events chain Remove the explicit call to mlx5_core_req_pages_handler on MLX5_EVENT_TYPE_PAGE_REQUEST and let FW page logic to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 11 ------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 27 +++++++------ .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 44 +++++++++++++++------- include/linux/mlx5/driver.h | 5 ++- 4 files changed, 47 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7c8b2d89645b..7f6a644700eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -398,17 +398,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_eq_cq_event(eq, cqn, eqe->type); break; - case MLX5_EVENT_TYPE_PAGE_REQUEST: - { - u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); - s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); - - mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", - func_id, npages); - mlx5_core_req_pages_handler(dev, func_id, npages); - } - break; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 91022f141855..9e4cd2757ea8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -916,16 +916,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = mlx5_pagealloc_start(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); - goto reclaim_boot_pages; - } - err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { dev_err(&pdev->dev, "init hca failed\n"); - goto err_pagealloc_stop; + goto reclaim_boot_pages; } mlx5_set_driver_version(dev); @@ -953,6 +947,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_get_uars; } + mlx5_pagealloc_start(dev); + err = mlx5_eq_table_create(dev); if (err) { dev_err(&pdev->dev, "Failed to create EQs\n"); @@ -1039,6 +1035,7 @@ err_fw_tracer: mlx5_eq_table_destroy(dev); err_eq_table: + mlx5_pagealloc_stop(dev); mlx5_put_uars_page(dev, priv->uar); err_get_uars: @@ -1052,9 +1049,6 @@ err_stop_poll: goto out_err; } -err_pagealloc_stop: - mlx5_pagealloc_stop(dev); - reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); @@ -1100,16 +1094,18 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_fpga_device_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); + mlx5_pagealloc_stop(dev); mlx5_put_uars_page(dev, priv->uar); + if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); + err = mlx5_cmd_teardown_hca(dev); if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); goto out; } - mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); mlx5_cmd_cleanup(dev); @@ -1186,12 +1182,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - mlx5_pagealloc_init(dev); + err = mlx5_pagealloc_init(dev); + if (err) + goto err_pagealloc_init; err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); - goto clean_health; + goto err_load_one; } request_module_nowait(MLX5_IB_MOD); @@ -1205,8 +1203,9 @@ static int init_one(struct pci_dev *pdev, clean_load: mlx5_unload_one(dev, priv, true); -clean_health: +err_load_one: mlx5_pagealloc_cleanup(dev); +err_pagealloc_init: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index e36d3e3675f9..a83b517b0714 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -37,6 +37,7 @@ #include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { MLX5_PAGES_CANT_GIVE = 0, @@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages) +static int req_pages_handler(struct notifier_block *nb, + unsigned long type, void *data) { struct mlx5_pages_req *req; - + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u16 func_id; + s32 npages; + + priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb); + dev = container_of(priv, struct mlx5_core_dev, priv); + eqe = data; + + func_id = be16_to_cpu(eqe->data.req_pages.func_id); + npages = be32_to_cpu(eqe->data.req_pages.num_pages); + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", + func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); if (!req) { mlx5_core_warn(dev, "failed to allocate pages request\n"); - return; + return NOTIFY_DONE; } req->dev = dev; @@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, req->npages = npages; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); + return NOTIFY_OK; } int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) @@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) return 0; } -void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +int mlx5_pagealloc_init(struct mlx5_core_dev *dev) { dev->priv.page_root = RB_ROOT; INIT_LIST_HEAD(&dev->priv.free_list); + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) { - /* nothing */ + destroy_workqueue(dev->priv.pg_wq); } -int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +void mlx5_pagealloc_start(struct mlx5_core_dev *dev) { - dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); - if (!dev->priv.pg_wq) - return -ENOMEM; - - return 0; + MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST); + mlx5_eq_notifier_register(dev, &dev->priv.pg_nb); } void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { - destroy_workqueue(dev->priv.pg_wq); + mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb); + flush_workqueue(dev->priv.pg_wq); } int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 99a23db9a929..61088ad33500 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -564,6 +564,7 @@ struct mlx5_priv { struct mlx5_eq_table *eq_table; /* pages stuff */ + struct mlx5_nb pg_nb; struct workqueue_struct *pg_wq; struct rb_root page_root; int fw_pages; @@ -962,9 +963,9 @@ int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); -void mlx5_pagealloc_init(struct mlx5_core_dev *dev); +int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); -int mlx5_pagealloc_start(struct mlx5_core_dev *dev); +void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); -- cgit v1.2.3 From 71edc69ca1a78ce18411a540c550a4ef1eb017cd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:24 -0800 Subject: net/mlx5: CmdIF, Use async events chain Remove the explicit call to mlx5_cmd_comp_handler on MLX5_EVENT_TYPE_CMD and let command interface to register its own handler when its ready. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 48 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 -- drivers/net/ethernet/mellanox/mlx5/core/health.c | 25 +---------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- include/linux/mlx5/driver.h | 2 + 5 files changed, 50 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 7b18aff955f1..8ab636d59edb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -40,9 +40,11 @@ #include #include #include +#include #include #include "mlx5_core.h" +#include "lib/eq.h" enum { CMD_IF_REV = 5, @@ -805,6 +807,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in) return MLX5_GET(mbox_in, in->first.data, opcode); } +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + static void cb_timeout_handler(struct work_struct *work) { struct delayed_work *dwork = container_of(work, struct delayed_work, @@ -1412,14 +1416,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) up(&cmd->sem); } +static int cmd_comp_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_core_dev *dev; + struct mlx5_cmd *cmd; + struct mlx5_eqe *eqe; + + cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb); + dev = container_of(cmd, struct mlx5_core_dev, cmd); + eqe = data; + + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + + return NOTIFY_OK; +} void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { + MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD); + mlx5_eq_notifier_register(dev, &dev->cmd.nb); mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); } void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) { mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); + mlx5_eq_notifier_unregister(dev, &dev->cmd.nb); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) @@ -1435,7 +1457,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1533,7 +1555,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) } } } -EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +{ + unsigned long flags; + u64 vector; + + /* wait for pending handlers to complete */ + mlx5_eq_synchronize_cmd_irq(dev); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + if (!vector) + goto no_trig; + + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} static int status_to_err(u8 status) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7f6a644700eb..b28869aa1a4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -368,10 +368,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_srq_event(dev, rsn, eqe->type); break; - case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); - break; - case MLX5_EVENT_TYPE_PORT_CHANGE: port = (eqe->data.port.port >> 4) & 0xf; switch (eqe->sub_type) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 066883003aea..4e42bd290959 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -79,29 +79,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static void trigger_cmd_completions(struct mlx5_core_dev *dev) -{ - unsigned long flags; - u64 vector; - - /* wait for pending handlers to complete */ - mlx5_eq_synchronize_cmd_irq(dev); - spin_lock_irqsave(&dev->cmd.alloc_lock, flags); - vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); - if (!vector) - goto no_trig; - - vector |= MLX5_TRIGGERED_CMD_COMP; - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); - - mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector, true); - return; - -no_trig: - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); -} - static int in_fatal(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -125,7 +102,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - trigger_cmd_completions(dev); + mlx5_cmd_trigger_completions(dev); } mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index e06c6e16ffc9..5dd453e47a04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -127,7 +127,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 61088ad33500..a8d638134fc8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -278,6 +278,8 @@ struct mlx5_cmd_stats { }; struct mlx5_cmd { + struct mlx5_nb nb; + void *cmd_alloc_buf; dma_addr_t alloc_dma; int alloc_size; -- cgit v1.2.3 From 221c14f3d12489ced0f2ca8b31b2221c5dbbf145 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:25 -0800 Subject: net/mlx5: Resource tables, Use async events chain Remove the explicit call to QP/SRQ resources events handlers on several FW events and let resources logic register resources events notifiers via the new API. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 29 ------------ drivers/net/ethernet/mellanox/mlx5/core/qp.c | 68 ++++++++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 55 +++++++++++++++++++--- include/linux/mlx5/driver.h | 6 ++- 4 files changed, 108 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index b28869aa1a4e..0cf448575ebd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -324,7 +324,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) struct mlx5_eqe *eqe; int set_ci = 0; u32 cqn = -1; - u32 rsn; u8 port; dev = eq->dev; @@ -340,34 +339,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); switch (eqe->type) { - case MLX5_EVENT_TYPE_DCT_DRAINED: - rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; - rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - case MLX5_EVENT_TYPE_PATH_MIG: - case MLX5_EVENT_TYPE_COMM_EST: - case MLX5_EVENT_TYPE_SQ_DRAINED: - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); - mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_srq_event(dev, rsn, eqe->type); - break; - case MLX5_EVENT_TYPE_PORT_CHANGE: port = (eqe->data.port.port >> 4) & 0xf; switch (eqe->sub_type) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index cba4a435043a..28726c63101f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -38,11 +38,11 @@ #include #include "mlx5_core.h" +#include "lib/eq.h" -static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, - u32 rsn) +static struct mlx5_core_rsc_common * +mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) { - struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_core_rsc_common *common; spin_lock(&table->lock); @@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, spin_unlock(&table->lock); - if (!common) { - mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", - rsn); - return NULL; - } return common; } @@ -120,14 +115,52 @@ static bool is_event_type_allowed(int rsc_type, int event_type) } } -void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) +static int rsc_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_rsc_common *common; + struct mlx5_qp_table *table; + struct mlx5_core_dev *dev; struct mlx5_core_dct *dct; + u8 event_type = (u8)type; struct mlx5_core_qp *qp; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u32 rsn; + + switch (event_type) { + case MLX5_EVENT_TYPE_DCT_DRAINED: + eqe = data; + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + break; + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + eqe = data; + rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); + break; + default: + return NOTIFY_DONE; + } + + table = mlx5_nb_cof(nb, struct mlx5_qp_table, nb); + priv = container_of(table, struct mlx5_priv, qp_table); + dev = container_of(priv, struct mlx5_core_dev, priv); - if (!common) - return; + mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn); + + common = mlx5_get_rsc(table, rsn); + if (!common) { + mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn); + return NOTIFY_OK; + } if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", @@ -152,6 +185,8 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) } out: mlx5_core_put_rsc(common); + + return NOTIFY_OK; } static int create_resource_common(struct mlx5_core_dev *dev, @@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); + + MLX5_NB_INIT(&table->nb, rsc_event_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev, &table->nb); } void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) { + struct mlx5_qp_table *table = &dev->priv.qp_table; + + mlx5_eq_notifier_unregister(dev, &table->nb); mlx5_qp_debugfs_cleanup(dev); } @@ -676,8 +717,9 @@ struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, enum mlx5_res_type res_type) { u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN); + struct mlx5_qp_table *table = &dev->priv.qp_table; - return mlx5_get_rsc(dev, rsn); + return mlx5_get_rsc(table, rsn); } EXPORT_SYMBOL_GPL(mlx5_core_res_hold); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 6a6fc9be01e6..0563866c13f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -36,13 +36,25 @@ #include #include #include -#include "mlx5_core.h" #include +#include "mlx5_core.h" +#include "lib/eq.h" -void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) +static int srq_event_notifier(struct mlx5_srq_table *table, + unsigned long type, void *data) { - struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_dev *dev; struct mlx5_core_srq *srq; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u32 srqn; + + priv = container_of(table, struct mlx5_priv, srq_table); + dev = container_of(priv, struct mlx5_core_dev, priv); + + eqe = data; + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + mlx5_core_dbg(dev, "SRQ event (%d): srqn 0x%x\n", eqe->type, srqn); spin_lock(&table->lock); @@ -54,13 +66,35 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) if (!srq) { mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); - return; + return NOTIFY_OK; } - srq->event(srq, event_type); + srq->event(srq, eqe->type); if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); + + return NOTIFY_OK; +} + +static int catas_err_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + + table = mlx5_nb_cof(nb, struct mlx5_srq_table, catas_err_nb); + /* type == MLX5_EVENT_TYPE_SRQ_CATAS_ERROR */ + return srq_event_notifier(table, type, data); +} + +static int rq_limit_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + + table = mlx5_nb_cof(nb, struct mlx5_srq_table, rq_limit_nb); + /* type == MLX5_EVENT_TYPE_SRQ_RQ_LIMIT */ + return srq_event_notifier(table, type, data); } static int get_pas_size(struct mlx5_srq_attr *in) @@ -708,9 +742,18 @@ void mlx5_init_srq_table(struct mlx5_core_dev *dev) memset(table, 0, sizeof(*table)); spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + + MLX5_NB_INIT(&table->catas_err_nb, catas_err_notifier, SRQ_CATAS_ERROR); + mlx5_eq_notifier_register(dev, &table->catas_err_nb); + + MLX5_NB_INIT(&table->rq_limit_nb, rq_limit_notifier, SRQ_RQ_LIMIT); + mlx5_eq_notifier_register(dev, &table->rq_limit_nb); } void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) { - /* nothing */ + struct mlx5_srq_table *table = &dev->priv.srq_table; + + mlx5_eq_notifier_unregister(dev, &table->rq_limit_nb); + mlx5_eq_notifier_unregister(dev, &table->catas_err_nb); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a8d638134fc8..afba0864f45c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -464,6 +464,8 @@ struct mlx5_core_health { }; struct mlx5_qp_table { + struct mlx5_nb nb; + /* protect radix tree */ spinlock_t lock; @@ -471,6 +473,8 @@ struct mlx5_qp_table { }; struct mlx5_srq_table { + struct mlx5_nb catas_err_nb; + struct mlx5_nb rq_limit_nb; /* protect radix tree */ spinlock_t lock; @@ -978,8 +982,6 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); -void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); -- cgit v1.2.3 From 69c1280b1f3b9123bc5154b2062507abcc14c3ef Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 20 Nov 2018 14:12:27 -0800 Subject: net/mlx5: Device events, Use async events chain Move all the generic async events handling into new specific events handling file events.c to keep eq.c file clean from concrete event logic handling. Use new API to register for NOTIFY_ANY to handle generic events and dispatch allowed events to mlx5_core consumers (mlx5_ib and mlx5e) Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 157 ------------ drivers/net/ethernet/mellanox/mlx5/core/events.c | 283 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 34 +++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 16 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 6 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 57 ----- include/linux/mlx5/driver.h | 29 +-- include/linux/mlx5/port.h | 3 - 10 files changed, 344 insertions(+), 252 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/events.c (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d324a3884462..26afe0779a0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ diag/fs_tracepoint.o diag/fw_tracer.o # diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1e55b9c27ffc..748d23806391 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "lib/mlx5.h" #include "en.h" #include "en_accel/ipsec.h" #include "en_accel/tls.h" @@ -1120,15 +1121,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { - struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + struct mlx5_pme_stats pme_stats; int i; + mlx5_get_pme_stats(priv->mdev, &pme_stats); + for (i = 0; i < NUM_PME_STATUS_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, mlx5e_pme_status_desc, i); for (i = 0; i < NUM_PME_ERR_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, mlx5e_pme_error_desc, i); return idx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 4e3febbf639d..4aa39a1fe23f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -108,121 +108,6 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static const char *eqe_type_str(u8 type) -{ - switch (type) { - case MLX5_EVENT_TYPE_COMP: - return "MLX5_EVENT_TYPE_COMP"; - case MLX5_EVENT_TYPE_PATH_MIG: - return "MLX5_EVENT_TYPE_PATH_MIG"; - case MLX5_EVENT_TYPE_COMM_EST: - return "MLX5_EVENT_TYPE_COMM_EST"; - case MLX5_EVENT_TYPE_SQ_DRAINED: - return "MLX5_EVENT_TYPE_SQ_DRAINED"; - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; - case MLX5_EVENT_TYPE_CQ_ERROR: - return "MLX5_EVENT_TYPE_CQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_INTERNAL_ERROR: - return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; - case MLX5_EVENT_TYPE_PORT_CHANGE: - return "MLX5_EVENT_TYPE_PORT_CHANGE"; - case MLX5_EVENT_TYPE_GPIO_EVENT: - return "MLX5_EVENT_TYPE_GPIO_EVENT"; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; - case MLX5_EVENT_TYPE_REMOTE_CONFIG: - return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; - case MLX5_EVENT_TYPE_DB_BF_CONGESTION: - return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; - case MLX5_EVENT_TYPE_STALL_EVENT: - return "MLX5_EVENT_TYPE_STALL_EVENT"; - case MLX5_EVENT_TYPE_CMD: - return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_PAGE_REQUEST: - return "MLX5_EVENT_TYPE_PAGE_REQUEST"; - case MLX5_EVENT_TYPE_PAGE_FAULT: - return "MLX5_EVENT_TYPE_PAGE_FAULT"; - case MLX5_EVENT_TYPE_PPS_EVENT: - return "MLX5_EVENT_TYPE_PPS_EVENT"; - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; - case MLX5_EVENT_TYPE_FPGA_ERROR: - return "MLX5_EVENT_TYPE_FPGA_ERROR"; - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; - case MLX5_EVENT_TYPE_GENERAL_EVENT: - return "MLX5_EVENT_TYPE_GENERAL_EVENT"; - case MLX5_EVENT_TYPE_DEVICE_TRACER: - return "MLX5_EVENT_TYPE_DEVICE_TRACER"; - default: - return "Unrecognized event"; - } -} - -static enum mlx5_dev_event port_subtype_event(u8 subtype) -{ - switch (subtype) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - return MLX5_DEV_EVENT_PORT_DOWN; - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - return MLX5_DEV_EVENT_PORT_UP; - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - return MLX5_DEV_EVENT_PORT_INITIALIZED; - case MLX5_PORT_CHANGE_SUBTYPE_LID: - return MLX5_DEV_EVENT_LID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - return MLX5_DEV_EVENT_PKEY_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - return MLX5_DEV_EVENT_GUID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - return MLX5_DEV_EVENT_CLIENT_REREG; - } - return -1; -} - -static void general_event_handler(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } -} - -static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - u64 value_lsb; - u64 value_msb; - - value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); - value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - - mlx5_core_warn(dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); -} - /* caller must eventually call mlx5_cq_put on the returned cq */ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) { @@ -312,7 +197,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; - u8 port; dev = eq->dev; eqt = dev->priv.eq_table; @@ -324,47 +208,6 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) */ dma_rmb(); - mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", - eq->eqn, eqe_type_str(eqe->type)); - switch (eqe->type) { - case MLX5_EVENT_TYPE_PORT_CHANGE: - port = (eqe->data.port.port >> 4) & 0xf; - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - if (dev->event) - dev->event(dev, port_subtype_event(eqe->sub_type), - (unsigned long)port); - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - break; - - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - mlx5_port_module_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - mlx5_temp_warning_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_GENERAL_EVENT: - general_event_handler(dev, eqe); - break; - - default: - mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", - eqe->type, eq->eqn); - break; - } - if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c new file mode 100644 index 000000000000..d3ab86bd394b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +struct mlx5_events { + struct mlx5_nb nb; + struct mlx5_core_dev *dev; + + /* port module evetns stats */ + struct mlx5_pme_stats pme_stats; +}; + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + default: + return "Unrecognized event"; + } +} + +static enum mlx5_dev_event port_subtype2dev(u8 subtype) +{ + switch (subtype) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + return MLX5_DEV_EVENT_PORT_DOWN; + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + return MLX5_DEV_EVENT_PORT_UP; + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + return MLX5_DEV_EVENT_PORT_INITIALIZED; + case MLX5_PORT_CHANGE_SUBTYPE_LID: + return MLX5_DEV_EVENT_LID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + return MLX5_DEV_EVENT_PKEY_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + return MLX5_DEV_EVENT_GUID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + return MLX5_DEV_EVENT_CLIENT_REREG; + } + return -1; +} + +static void temp_warning_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +{ + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); +} + +static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { + "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ + "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ + "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ +}; + +static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { + "Power budget exceeded", + "Long Range for non MLNX cable", + "Bus stuck(I2C or data shorted)", + "No EEPROM/retry timeout", + "Enforce part number list", + "Unknown identifier", + "High Temperature", + "Bad or shorted cable/module", + "Unknown status", +}; + +static void port_module_event(struct mlx5_events *events, struct mlx5_eqe *eqe) +{ + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + struct mlx5_core_dev *dev = events->dev; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_num = module_event_eqe->module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_ERROR) { + events->pme_stats.status_counters[module_status - 1]++; + } else if (module_status == MLX5_MODULE_STATUS_ERROR) { + if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) + /* Unknown error type */ + error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; + events->pme_stats.error_counters[error_type]++; + } + + if (!printk_ratelimit()) + return; + + if (module_status < MLX5_MODULE_STATUS_ERROR) + mlx5_core_info(dev, + "Port module event: module %u, %s\n", + module_num, mlx5_pme_status[module_status - 1]); + + else if (module_status == MLX5_MODULE_STATUS_ERROR) + mlx5_core_info(dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, mlx5_pme_status[module_status - 1], + mlx5_pme_error[error_type]); +} + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) +{ + *stats = dev->priv.events->pme_stats; +} + +/* Event handler for the low level mlx5_core driver. + * This handler will process/filter _some_ events and sometimes dispatch + * the equivalent mlx5_dev_event to the HCA interfaces (mlx5_ib and mlx5e) + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int events_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + bool dev_event_dispatch = false; + enum mlx5_dev_event dev_event; + unsigned long dev_event_data; + + struct mlx5_eqe *eqe = data; + struct mlx5_events *events; + struct mlx5_core_dev *dev; + u8 port; + + events = mlx5_nb_cof(nb, struct mlx5_events, nb); + dev = events->dev; + + mlx5_core_dbg(dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + switch (eqe->type) { + case MLX5_EVENT_TYPE_PORT_CHANGE: + port = (eqe->data.port.port >> 4) & 0xf; + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_LID: + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + dev_event = port_subtype2dev(eqe->sub_type); + dev_event_data = (unsigned long)port; + dev_event_dispatch = true; + break; + default: + mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", + port, eqe->sub_type); + } + break; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + dev_event = MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT; + dev_event_data = 0; + dev_event_dispatch = true; + break; + default: + mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } + break; + + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + port_module_event(events, eqe); + break; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + temp_warning_event(dev, eqe); + break; + default: + return NOTIFY_DONE; + } + + if (dev->event && dev_event_dispatch) + dev->event(dev, dev_event, dev_event_data); + + return NOTIFY_OK; +} + +int mlx5_events_init(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); + + if (!events) + return -ENOMEM; + + events->dev = dev; + dev->priv.events = events; + return 0; +} + +void mlx5_events_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.events); +} + +void mlx5_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + + MLX5_NB_INIT(&events->nb, events_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev, &events->nb); +} + +void mlx5_events_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + + mlx5_eq_notifier_unregister(dev, &events->nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 7550b1cc8c6a..23317e328b0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -33,6 +33,8 @@ #ifndef __LIB_MLX5_H__ #define __LIB_MLX5_H__ +#include "mlx5_core.h" + void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); @@ -40,4 +42,36 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +/* TODO move to lib/events.h */ + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_NUM = 0x3, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE, + MLX5_MODULE_EVENT_ERROR_UNKNOWN, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 9e4cd2757ea8..e56278ead4eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -735,10 +735,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } + err = mlx5_events_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize events\n"); + goto err_eq_cleanup; + } + err = mlx5_cq_debugfs_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); - goto err_eq_cleanup; + goto err_events_cleanup; } mlx5_init_qp_table(dev); @@ -801,7 +807,8 @@ err_tables_cleanup: mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - +err_events_cleanup: + mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); @@ -824,6 +831,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); + mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); } @@ -947,6 +955,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_get_uars; } + mlx5_events_start(dev); mlx5_pagealloc_start(dev); err = mlx5_eq_table_create(dev); @@ -1036,6 +1045,7 @@ err_fw_tracer: err_eq_table: mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); err_get_uars: @@ -1095,8 +1105,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); - if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 5dd453e47a04..c70bd94e18d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -105,7 +105,6 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); @@ -141,6 +140,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_events_init(struct mlx5_core_dev *dev); +void mlx5_events_cleanup(struct mlx5_core_dev *dev); +void mlx5_events_start(struct mlx5_core_dev *dev); +void mlx5_events_stop(struct mlx5_core_dev *dev); + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 31a9cbd85689..2b82f35f4c35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); } -static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { - "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ - "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ - "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ -}; - -static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { - "Power budget exceeded", - "Long Range for non MLNX cable", - "Bus stuck(I2C or data shorted)", - "No EEPROM/retry timeout", - "Enforce part number list", - "Unknown identifier", - "High Temperature", - "Bad or shorted cable/module", - "Unknown status", -}; - -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) -{ - enum port_module_event_status_type module_status; - enum port_module_event_error_type error_type; - struct mlx5_eqe_port_module *module_event_eqe; - struct mlx5_priv *priv = &dev->priv; - u8 module_num; - - module_event_eqe = &eqe->data.port_module; - module_num = module_event_eqe->module; - module_status = module_event_eqe->module_status & - PORT_MODULE_EVENT_MODULE_STATUS_MASK; - error_type = module_event_eqe->error_type & - PORT_MODULE_EVENT_ERROR_TYPE_MASK; - - if (module_status < MLX5_MODULE_STATUS_ERROR) { - priv->pme_stats.status_counters[module_status - 1]++; - } else if (module_status == MLX5_MODULE_STATUS_ERROR) { - if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) - /* Unknown error type */ - error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; - priv->pme_stats.error_counters[error_type]++; - } - - if (!printk_ratelimit()) - return; - - if (module_status < MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event: module %u, %s\n", - module_num, mlx5_pme_status[module_status - 1]); - - else if (module_status == MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event[error]: module %u, %s, %s\n", - module_num, mlx5_pme_status[module_status - 1], - mlx5_pme_error[error_type]); -} - int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) { u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index afba0864f45c..ba64ecf72478 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -514,6 +514,7 @@ struct mlx5_fc_stats { unsigned long sampling_interval; /* jiffies */ }; +struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; @@ -540,31 +541,6 @@ struct mlx5_rl_table { struct mlx5_rl_entry *rl_entry; }; -enum port_module_event_status_type { - MLX5_MODULE_STATUS_PLUGGED = 0x1, - MLX5_MODULE_STATUS_UNPLUGGED = 0x2, - MLX5_MODULE_STATUS_ERROR = 0x3, - MLX5_MODULE_STATUS_NUM = 0x3, -}; - -enum port_module_event_error_type { - MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED, - MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE, - MLX5_MODULE_EVENT_ERROR_BUS_STUCK, - MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT, - MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST, - MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER, - MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE, - MLX5_MODULE_EVENT_ERROR_BAD_CABLE, - MLX5_MODULE_EVENT_ERROR_UNKNOWN, - MLX5_MODULE_EVENT_ERROR_NUM, -}; - -struct mlx5_port_module_event_stats { - u64 status_counters[MLX5_MODULE_STATUS_NUM]; - u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; -}; - struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table *eq_table; @@ -614,6 +590,7 @@ struct mlx5_priv { struct list_head waiting_events_list; bool is_accum_events; + struct mlx5_events *events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; @@ -624,8 +601,6 @@ struct mlx5_priv { struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; - struct mlx5_port_module_event_stats pme_stats; - struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; }; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 34aed6032f86..bf4bc01ffb0c 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -107,9 +107,6 @@ enum mlx5e_connector_type { #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) -#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF -#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF - int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -- cgit v1.2.3 From b1ab95c63622e9d9bd0ce685e149034d393afc2e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 5 Nov 2018 14:54:27 -0800 Subject: arch: Make phys_initrd_start and phys_initrd_size global variables Make phys_initrd_start and phys_initrd_size global variables declared in init/do_mounts_initrd.c such that we can later have generic code in drivers/of/fdt.c populate those variables for us. This requires both the ARM and unicore32 implementations to be properly guarded against CONFIG_BLK_DEV_INITRD, and also initialize the variables to the expected default values (unicore32). Signed-off-by: Florian Fainelli Reviewed-by: Mike Rapoport Signed-off-by: Rob Herring --- arch/arm/mm/init.c | 5 ++--- arch/unicore32/mm/init.c | 10 +++++++--- include/linux/initrd.h | 3 +++ init/do_mounts_initrd.c | 3 +++ 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 32e4845af2b6..438625764ccd 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -50,9 +50,7 @@ unsigned long __init __clear_cr(unsigned long mask) } #endif -static phys_addr_t phys_initrd_start __initdata = 0; -static unsigned long phys_initrd_size __initdata = 0; - +#ifdef CONFIG_BLK_DEV_INITRD static int __init early_initrd(char *p) { phys_addr_t start; @@ -89,6 +87,7 @@ static int __init parse_tag_initrd2(const struct tag *tag) } __tagtable(ATAG_INITRD2, parse_tag_initrd2); +#endif static void __init find_limits(unsigned long *min, unsigned long *max_low, unsigned long *max_high) diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index cf4eb9481fd6..02aa2c0b295e 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -30,9 +30,7 @@ #include "mm.h" -static unsigned long phys_initrd_start __initdata = 0x01000000; -static unsigned long phys_initrd_size __initdata = SZ_8M; - +#ifdef CONFIG_BLK_DEV_INITRD static int __init early_initrd(char *p) { unsigned long start, size; @@ -48,6 +46,7 @@ static int __init early_initrd(char *p) return 0; } early_param("initrd", early_initrd); +#endif /* * This keeps memory configuration data used by a couple memory @@ -156,6 +155,11 @@ void __init uc32_memblock_init(struct meminfo *mi) memblock_reserve(__pa(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD + if (!phys_initrd_size) { + phys_initrd_start = 0x01000000; + phys_initrd_size = SZ_8M; + } + if (phys_initrd_size) { memblock_reserve(phys_initrd_start, phys_initrd_size); diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 84b423044088..14beaff9b445 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -21,4 +21,7 @@ extern int initrd_below_start_ok; extern unsigned long initrd_start, initrd_end; extern void free_initrd_mem(unsigned long, unsigned long); +extern phys_addr_t phys_initrd_start; +extern unsigned long phys_initrd_size; + extern unsigned int real_root_dev; diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index d1a5d885ce13..45865b72f4ea 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -16,6 +16,9 @@ int initrd_below_start_ok; unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ static int __initdata mount_initrd = 1; +phys_addr_t phys_initrd_start __initdata; +unsigned long phys_initrd_size __initdata; + static int __init no_initrd(char *str) { mount_initrd = 0; -- cgit v1.2.3 From 2a1fe215e7300c7ebd6a7a24afcab71db5107bb0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 26 Nov 2018 18:40:07 -0500 Subject: audit: use current whenever possible There are many places, notably audit_log_task_info() and audit_log_exit(), that take task_struct pointers but in reality they are always working on the current task. This patch eliminates the task_struct arguments and uses current directly which allows a number of cleanups as well. Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore --- drivers/tty/tty_audit.c | 13 ++-- include/linux/audit.h | 6 +- kernel/audit.c | 34 +++++----- kernel/audit.h | 2 +- kernel/auditsc.c | 131 +++++++++++++++++++-------------------- security/integrity/ima/ima_api.c | 2 +- 6 files changed, 90 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 50f567b6a66e..28f87fd6a28e 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -61,20 +61,19 @@ static void tty_audit_log(const char *description, dev_t dev, unsigned char *data, size_t size) { struct audit_buffer *ab; - struct task_struct *tsk = current; - pid_t pid = task_pid_nr(tsk); - uid_t uid = from_kuid(&init_user_ns, task_uid(tsk)); - uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(tsk)); - unsigned int sessionid = audit_get_sessionid(tsk); + pid_t pid = task_pid_nr(current); + uid_t uid = from_kuid(&init_user_ns, task_uid(current)); + uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current)); + unsigned int sessionid = audit_get_sessionid(current); ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_TTY); if (ab) { - char name[sizeof(tsk->comm)]; + char name[sizeof(current->comm)]; audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u major=%d" " minor=%d comm=", description, pid, uid, loginuid, sessionid, MAJOR(dev), MINOR(dev)); - get_task_comm(name, tsk); + get_task_comm(name, current); audit_log_untrustedstring(ab, name); audit_log_format(ab, " data="); audit_log_n_hex(ab, data, size); diff --git a/include/linux/audit.h b/include/linux/audit.h index 58cf665f597e..a625c29a2ea2 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -151,8 +151,7 @@ extern void audit_log_link_denied(const char *operation); extern void audit_log_lost(const char *message); extern int audit_log_task_context(struct audit_buffer *ab); -extern void audit_log_task_info(struct audit_buffer *ab, - struct task_struct *tsk); +extern void audit_log_task_info(struct audit_buffer *ab); extern int audit_update_lsm_rules(void); @@ -200,8 +199,7 @@ static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; } -static inline void audit_log_task_info(struct audit_buffer *ab, - struct task_struct *tsk) +static inline void audit_log_task_info(struct audit_buffer *ab) { } #define audit_enabled AUDIT_OFF #endif /* CONFIG_AUDIT */ diff --git a/kernel/audit.c b/kernel/audit.c index d09298d3c2d2..779671883349 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1096,10 +1096,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature if (audit_enabled == AUDIT_OFF) return; + ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_FEATURE_CHANGE); if (!ab) return; - audit_log_task_info(ab, current); + audit_log_task_info(ab); audit_log_format(ab, " feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", audit_feature_names[which], !!old_feature, !!new_feature, !!old_lock, !!new_lock, res); @@ -2246,15 +2247,15 @@ out_null: audit_log_format(ab, " exe=(null)"); } -struct tty_struct *audit_get_tty(struct task_struct *tsk) +struct tty_struct *audit_get_tty(void) { struct tty_struct *tty = NULL; unsigned long flags; - spin_lock_irqsave(&tsk->sighand->siglock, flags); - if (tsk->signal) - tty = tty_kref_get(tsk->signal->tty); - spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); + if (current->signal) + tty = tty_kref_get(current->signal->tty); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); return tty; } @@ -2263,25 +2264,24 @@ void audit_put_tty(struct tty_struct *tty) tty_kref_put(tty); } -void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) +void audit_log_task_info(struct audit_buffer *ab) { const struct cred *cred; - char comm[sizeof(tsk->comm)]; + char comm[sizeof(current->comm)]; struct tty_struct *tty; if (!ab) return; - /* tsk == current */ cred = current_cred(); - tty = audit_get_tty(tsk); + tty = audit_get_tty(); audit_log_format(ab, " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", - task_ppid_nr(tsk), - task_tgid_nr(tsk), - from_kuid(&init_user_ns, audit_get_loginuid(tsk)), + task_ppid_nr(current), + task_tgid_nr(current), + from_kuid(&init_user_ns, audit_get_loginuid(current)), from_kuid(&init_user_ns, cred->uid), from_kgid(&init_user_ns, cred->gid), from_kuid(&init_user_ns, cred->euid), @@ -2291,11 +2291,11 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), tty ? tty_name(tty) : "(none)", - audit_get_sessionid(tsk)); + audit_get_sessionid(current)); audit_put_tty(tty); audit_log_format(ab, " comm="); - audit_log_untrustedstring(ab, get_task_comm(comm, tsk)); - audit_log_d_path_exe(ab, tsk->mm); + audit_log_untrustedstring(ab, get_task_comm(comm, current)); + audit_log_d_path_exe(ab, current->mm); audit_log_task_context(ab); } EXPORT_SYMBOL(audit_log_task_info); @@ -2316,7 +2316,7 @@ void audit_log_link_denied(const char *operation) if (!ab) return; audit_log_format(ab, "op=%s", operation); - audit_log_task_info(ab, current); + audit_log_task_info(ab); audit_log_format(ab, " res=0"); audit_log_end(ab); } diff --git a/kernel/audit.h b/kernel/audit.h index 0b5295aeaebb..91421679a168 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -264,7 +264,7 @@ extern struct audit_entry *audit_dupe_rule(struct audit_krule *old); extern void audit_log_d_path_exe(struct audit_buffer *ab, struct mm_struct *mm); -extern struct tty_struct *audit_get_tty(struct task_struct *tsk); +extern struct tty_struct *audit_get_tty(void); extern void audit_put_tty(struct tty_struct *tty); /* audit watch functions */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 51e735aedf58..6593a5207fb0 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -830,44 +830,6 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) rcu_read_unlock(); } -/* Transfer the audit context pointer to the caller, clearing it in the tsk's struct */ -static inline struct audit_context *audit_take_context(struct task_struct *tsk, - int return_valid, - long return_code) -{ - struct audit_context *context = tsk->audit_context; - - if (!context) - return NULL; - context->return_valid = return_valid; - - /* - * we need to fix up the return code in the audit logs if the actual - * return codes are later going to be fixed up by the arch specific - * signal handlers - * - * This is actually a test for: - * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || - * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) - * - * but is faster than a bunch of || - */ - if (unlikely(return_code <= -ERESTARTSYS) && - (return_code >= -ERESTART_RESTARTBLOCK) && - (return_code != -ENOIOCTLCMD)) - context->return_code = -EINTR; - else - context->return_code = return_code; - - if (context->in_syscall && !context->dummy) { - audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); - audit_filter_inodes(tsk, context); - } - - audit_set_context(tsk, NULL); - return context; -} - static inline void audit_proctitle_free(struct audit_context *context) { kfree(context->proctitle.value); @@ -1296,15 +1258,18 @@ static inline int audit_proctitle_rtrim(char *proctitle, int len) return len; } -static void audit_log_proctitle(struct task_struct *tsk, - struct audit_context *context) +static void audit_log_proctitle(void) { int res; char *buf; char *msg = "(null)"; int len = strlen(msg); + struct audit_context *context = audit_context(); struct audit_buffer *ab; + if (!context || context->dummy) + return; + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE); if (!ab) return; /* audit_panic or being filtered */ @@ -1317,7 +1282,7 @@ static void audit_log_proctitle(struct task_struct *tsk, if (!buf) goto out; /* Historically called this from procfs naming */ - res = get_cmdline(tsk, buf, MAX_PROCTITLE_AUDIT_LEN); + res = get_cmdline(current, buf, MAX_PROCTITLE_AUDIT_LEN); if (res == 0) { kfree(buf); goto out; @@ -1337,15 +1302,15 @@ out: audit_log_end(ab); } -static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) +static void audit_log_exit(void) { int i, call_panic = 0; + struct audit_context *context = audit_context(); struct audit_buffer *ab; struct audit_aux_data *aux; struct audit_names *n; - /* tsk == current */ - context->personality = tsk->personality; + context->personality = current->personality; ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); if (!ab) @@ -1367,7 +1332,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->argv[3], context->name_count); - audit_log_task_info(ab, tsk); + audit_log_task_info(ab); audit_log_key(ab, context->filterkey); audit_log_end(ab); @@ -1456,7 +1421,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_name(context, n, NULL, i++, &call_panic); } - audit_log_proctitle(tsk, context); + audit_log_proctitle(); /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); @@ -1474,22 +1439,31 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts */ void __audit_free(struct task_struct *tsk) { - struct audit_context *context; + struct audit_context *context = tsk->audit_context; - context = audit_take_context(tsk, 0, 0); if (!context) return; - /* Check for system calls that do not go through the exit - * function (e.g., exit_group), then free context block. - * We use GFP_ATOMIC here because we might be doing this - * in the context of the idle thread */ - /* that can happen only if we are called from do_exit() */ - if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) - audit_log_exit(context, tsk); + /* We are called either by do_exit() or the fork() error handling code; + * in the former case tsk == current and in the latter tsk is a + * random task_struct that doesn't doesn't have any meaningful data we + * need to log via audit_log_exit(). + */ + if (tsk == current && !context->dummy && context->in_syscall) { + context->return_valid = 0; + context->return_code = 0; + + audit_filter_syscall(tsk, context, + &audit_filter_list[AUDIT_FILTER_EXIT]); + audit_filter_inodes(tsk, context); + if (context->current_state == AUDIT_RECORD_CONTEXT) + audit_log_exit(); + } + if (!list_empty(&context->killed_trees)) audit_kill_trees(&context->killed_trees); + audit_set_context(tsk, NULL); audit_free_context(context); } @@ -1559,17 +1533,40 @@ void __audit_syscall_exit(int success, long return_code) { struct audit_context *context; - if (success) - success = AUDITSC_SUCCESS; - else - success = AUDITSC_FAILURE; - - context = audit_take_context(current, success, return_code); + context = audit_context(); if (!context) return; - if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) - audit_log_exit(context, current); + if (!context->dummy && context->in_syscall) { + if (success) + context->return_valid = AUDITSC_SUCCESS; + else + context->return_valid = AUDITSC_FAILURE; + + /* + * we need to fix up the return code in the audit logs if the + * actual return codes are later going to be fixed up by the + * arch specific signal handlers + * + * This is actually a test for: + * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || + * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) + * + * but is faster than a bunch of || + */ + if (unlikely(return_code <= -ERESTARTSYS) && + (return_code >= -ERESTART_RESTARTBLOCK) && + (return_code != -ENOIOCTLCMD)) + context->return_code = -EINTR; + else + context->return_code = return_code; + + audit_filter_syscall(current, context, + &audit_filter_list[AUDIT_FILTER_EXIT]); + audit_filter_inodes(current, context); + if (context->current_state == AUDIT_RECORD_CONTEXT) + audit_log_exit(); + } context->in_syscall = 0; context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; @@ -1591,7 +1588,6 @@ void __audit_syscall_exit(int success, long return_code) kfree(context->filterkey); context->filterkey = NULL; } - audit_set_context(current, context); } static inline void handle_one(const struct inode *inode) @@ -2025,7 +2021,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid), - tty = audit_get_tty(current); + tty = audit_get_tty(); audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); audit_log_task_context(ab); @@ -2046,7 +2042,6 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, */ int audit_set_loginuid(kuid_t loginuid) { - struct task_struct *task = current; unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET; kuid_t oldloginuid; int rc; @@ -2065,8 +2060,8 @@ int audit_set_loginuid(kuid_t loginuid) sessionid = (unsigned int)atomic_inc_return(&session_id); } - task->sessionid = sessionid; - task->loginuid = loginuid; + current->sessionid = sessionid; + current->loginuid = loginuid; out: audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); return rc; diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 99dd1d53fc35..af134588ab4e 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -336,7 +336,7 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, audit_log_untrustedstring(ab, filename); audit_log_format(ab, " hash=\"%s:%s\"", algo_name, hash); - audit_log_task_info(ab, current); + audit_log_task_info(ab); audit_log_end(ab); iint->flags |= IMA_AUDITED; -- cgit v1.2.3 From ba64e7d8525236aa56ab58ba3a3a71615c4ee289 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 24 Nov 2018 23:20:44 -0800 Subject: bpf: btf: support proper non-jit func info Commit 838e96904ff3 ("bpf: Introduce bpf_func_info") added bpf func info support. The userspace is able to get better ksym's for bpf programs with jit, and is able to print out func prototypes. For a program containing func-to-func calls, the existing implementation returns user specified number of function calls and BTF types if jit is enabled. If the jit is not enabled, it only returns the type for the main function. This is undesirable. Interpreter may still be used and we should keep feature identical regardless of whether jit is enabled or not. This patch fixed this discrepancy. Fixes: 838e96904ff3 ("bpf: Introduce bpf_func_info") Signed-off-by: Yonghong Song Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 +++-- include/linux/bpf_verifier.h | 1 - kernel/bpf/core.c | 3 ++- kernel/bpf/syscall.c | 33 +++++++------------------- kernel/bpf/verifier.c | 55 ++++++++++++++++++++++++++++++-------------- 5 files changed, 52 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7f0e225bf630..e82b7039fc66 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -299,7 +299,8 @@ struct bpf_prog_aux { u32 max_pkt_offset; u32 stack_depth; u32 id; - u32 func_cnt; + u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ bool offload_requested; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ @@ -317,7 +318,8 @@ struct bpf_prog_aux { #endif struct bpf_prog_offload *offload; struct btf *btf; - u32 type_id; /* type id for this prog/func */ + struct bpf_func_info *func_info; + u32 func_info_cnt; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 204382f46fd8..11f5df1092d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -204,7 +204,6 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u16 stack_depth; /* max. stack depth used by this function */ - u32 type_id; /* btf type_id for this subprog */ }; /* single container for all structs diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 16d77012ad3e..002d67c62c8b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -411,7 +411,8 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) /* prog->aux->name will be ignored if full btf name is available */ if (prog->aux->btf) { - type = btf_type_by_id(prog->aux->btf, prog->aux->type_id); + type = btf_type_by_id(prog->aux->btf, + prog->aux->func_info[prog->aux->func_idx].type_id); func_name = btf_name_by_offset(prog->aux->btf, type->name_off); snprintf(sym, (size_t)(end - sym), "_%s", func_name); return; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 998377808102..85cbeec06e50 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1214,6 +1214,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del_all(prog); btf_put(prog->aux->btf); + kvfree(prog->aux->func_info); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } @@ -2219,46 +2220,28 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } if (prog->aux->btf) { + u32 krec_size = sizeof(struct bpf_func_info); u32 ucnt, urec_size; info.btf_id = btf_id(prog->aux->btf); ucnt = info.func_info_cnt; - info.func_info_cnt = prog->aux->func_cnt ? : 1; + info.func_info_cnt = prog->aux->func_info_cnt; urec_size = info.func_info_rec_size; - info.func_info_rec_size = sizeof(struct bpf_func_info); + info.func_info_rec_size = krec_size; if (ucnt) { /* expect passed-in urec_size is what the kernel expects */ if (urec_size != info.func_info_rec_size) return -EINVAL; if (bpf_dump_raw_ok()) { - struct bpf_func_info kern_finfo; char __user *user_finfo; - u32 i, insn_offset; user_finfo = u64_to_user_ptr(info.func_info); - if (prog->aux->func_cnt) { - ucnt = min_t(u32, info.func_info_cnt, ucnt); - insn_offset = 0; - for (i = 0; i < ucnt; i++) { - kern_finfo.insn_offset = insn_offset; - kern_finfo.type_id = prog->aux->func[i]->aux->type_id; - if (copy_to_user(user_finfo, &kern_finfo, - sizeof(kern_finfo))) - return -EFAULT; - - /* func[i]->len holds the prog len */ - insn_offset += prog->aux->func[i]->len; - user_finfo += urec_size; - } - } else { - kern_finfo.insn_offset = 0; - kern_finfo.type_id = prog->aux->type_id; - if (copy_to_user(user_finfo, &kern_finfo, - sizeof(kern_finfo))) - return -EFAULT; - } + ucnt = min_t(u32, info.func_info_cnt, ucnt); + if (copy_to_user(user_finfo, prog->aux->func_info, + krec_size * ucnt)) + return -EFAULT; } else { info.func_info_cnt = 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f102c4fd0c5a..05d95c0e4a26 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4650,7 +4650,7 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, { u32 i, nfuncs, urec_size, min_size, prev_offset; u32 krec_size = sizeof(struct bpf_func_info); - struct bpf_func_info krecord = {}; + struct bpf_func_info *krecord = NULL; const struct btf_type *type; void __user *urecord; struct btf *btf; @@ -4682,6 +4682,12 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, urecord = u64_to_user_ptr(attr->func_info); min_size = min_t(u32, krec_size, urec_size); + krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); + if (!krecord) { + ret = -ENOMEM; + goto free_btf; + } + for (i = 0; i < nfuncs; i++) { ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); if (ret) { @@ -4696,59 +4702,69 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, goto free_btf; } - if (copy_from_user(&krecord, urecord, min_size)) { + if (copy_from_user(&krecord[i], urecord, min_size)) { ret = -EFAULT; goto free_btf; } /* check insn_offset */ if (i == 0) { - if (krecord.insn_offset) { + if (krecord[i].insn_offset) { verbose(env, "nonzero insn_offset %u for the first func info record", - krecord.insn_offset); + krecord[i].insn_offset); ret = -EINVAL; goto free_btf; } - } else if (krecord.insn_offset <= prev_offset) { + } else if (krecord[i].insn_offset <= prev_offset) { verbose(env, "same or smaller insn offset (%u) than previous func info record (%u)", - krecord.insn_offset, prev_offset); + krecord[i].insn_offset, prev_offset); ret = -EINVAL; goto free_btf; } - if (env->subprog_info[i].start != krecord.insn_offset) { + if (env->subprog_info[i].start != krecord[i].insn_offset) { verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); ret = -EINVAL; goto free_btf; } /* check type_id */ - type = btf_type_by_id(btf, krecord.type_id); + type = btf_type_by_id(btf, krecord[i].type_id); if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) { verbose(env, "invalid type id %d in func info", - krecord.type_id); + krecord[i].type_id); ret = -EINVAL; goto free_btf; } - if (i == 0) - prog->aux->type_id = krecord.type_id; - env->subprog_info[i].type_id = krecord.type_id; - - prev_offset = krecord.insn_offset; + prev_offset = krecord[i].insn_offset; urecord += urec_size; } prog->aux->btf = btf; + prog->aux->func_info = krecord; + prog->aux->func_info_cnt = nfuncs; return 0; free_btf: btf_put(btf); + kvfree(krecord); return ret; } +static void adjust_btf_func(struct bpf_verifier_env *env) +{ + int i; + + if (!env->prog->aux->func_info) + return; + + for (i = 0; i < env->subprog_cnt; i++) + env->prog->aux->func_info[i].insn_offset = env->subprog_info[i].start; +} + /* check %cur's range satisfies %old's */ static bool range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur) @@ -6043,15 +6059,17 @@ static int jit_subprogs(struct bpf_verifier_env *env) if (bpf_prog_calc_tag(func[i])) goto out_free; func[i]->is_func = 1; + func[i]->aux->func_idx = i; + /* the btf and func_info will be freed only at prog->aux */ + func[i]->aux->btf = prog->aux->btf; + func[i]->aux->func_info = prog->aux->func_info; + /* Use bpf_prog_F_tag to indicate functions in stack traces. * Long term would need debug info to populate names */ func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; - /* the btf will be freed only at prog->aux */ - func[i]->aux->btf = prog->aux->btf; - func[i]->aux->type_id = env->subprog_info[i].type_id; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -6572,6 +6590,9 @@ skip_full_check: convert_pseudo_ld_imm64(env); } + if (ret == 0) + adjust_btf_func(env); + err_release_maps: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release -- cgit v1.2.3 From c9d76d0655c06b8c1f944e46c4fd9e9cf4b331c0 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Wed, 29 Aug 2018 23:29:21 +0200 Subject: dma-mapping: fix return type of dma_set_max_seg_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function dma_set_max_seg_size() can return either 0 on success or -EIO on error. Change its return type from unsigned int to int to capture this. Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 15bd41447025..0f81c713f6e9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -676,8 +676,7 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline unsigned int dma_set_max_seg_size(struct device *dev, - unsigned int size) +static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) { if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; -- cgit v1.2.3 From 7440172974e85b1828bdd84ac6b23b5bcad9c5eb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 6 Nov 2018 18:44:52 -0800 Subject: tracing: Replace synchronize_sched() and call_rcu_sched() Now that synchronize_rcu() waits for preempt-disable regions of code as well as RCU read-side critical sections, synchronize_sched() can be replaced by synchronize_rcu(). Similarly, call_rcu_sched() can be replaced by call_rcu(). This commit therefore makes these changes. Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Cc: Acked-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 2 +- kernel/trace/ftrace.c | 24 ++++++++++++------------ kernel/trace/ring_buffer.c | 12 ++++++------ kernel/trace/trace.c | 10 +++++----- kernel/trace/trace_events_filter.c | 4 ++-- kernel/trace/trace_kprobe.c | 2 +- kernel/tracepoint.c | 4 ++-- 7 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 538ba1a58f5b..432080b59c26 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -82,7 +82,7 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb) static inline void tracepoint_synchronize_unregister(void) { synchronize_srcu(&tracepoint_srcu); - synchronize_sched(); + synchronize_rcu(); } #else static inline void tracepoint_synchronize_unregister(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f536f601bd46..5b4f73e4fd56 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -173,7 +173,7 @@ static void ftrace_sync(struct work_struct *work) { /* * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing + * of synchronize_rcu(). This requires synchronizing * tasks even in userspace and idle. * * Yes, function tracing is rude. @@ -934,7 +934,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, ftrace_profile_enabled = 0; /* * unregister_ftrace_profiler calls stop_machine - * so this acts like an synchronize_sched. + * so this acts like an synchronize_rcu. */ unregister_ftrace_profiler(); } @@ -1086,7 +1086,7 @@ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) /* * Some of the ops may be dynamically allocated, - * they are freed after a synchronize_sched(). + * they are freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -1286,7 +1286,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) { if (!hash || hash == EMPTY_HASH) return; - call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); + call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } void ftrace_free_filter(struct ftrace_ops *ops) @@ -1501,7 +1501,7 @@ static bool hash_contains_ip(unsigned long ip, * the ip is not in the ops->notrace_hash. * * This needs to be called with preemption disabled as - * the hashes are freed with call_rcu_sched(). + * the hashes are freed with call_rcu(). */ static int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) @@ -4496,7 +4496,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, if (ftrace_enabled && !ftrace_hash_empty(hash)) ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); - synchronize_sched(); + synchronize_rcu(); hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); @@ -5314,7 +5314,7 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); /* Wait till all users are no longer using the old hash */ - synchronize_sched(); + synchronize_rcu(); free_ftrace_hash(old_hash); } @@ -5707,7 +5707,7 @@ void ftrace_release_mod(struct module *mod) list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { if (mod_map->mod == mod) { list_del_rcu(&mod_map->list); - call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); + call_rcu(&mod_map->rcu, ftrace_free_mod_map); break; } } @@ -5927,7 +5927,7 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, struct ftrace_mod_map *mod_map; const char *ret = NULL; - /* mod_map is freed via call_rcu_sched() */ + /* mod_map is freed via call_rcu() */ preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); @@ -6262,7 +6262,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, /* * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). + * they must be freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -6433,7 +6433,7 @@ static void clear_ftrace_pids(struct trace_array *tr) rcu_assign_pointer(tr->function_pids, NULL); /* Wait till all users are no longer using pid filtering */ - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(pid_list); } @@ -6580,7 +6580,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, rcu_assign_pointer(tr->function_pids, pid_list); if (filtered_pids) { - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(filtered_pids); } else if (pid_list) { /* Register a probe to set whether to ignore the tracing of a task */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 65bd4616220d..4f3247a53259 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1834,7 +1834,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, * There could have been a race between checking * record_disable and incrementing it. */ - synchronize_sched(); + synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; rb_check_pages(cpu_buffer); @@ -3151,7 +3151,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable(struct ring_buffer *buffer) { @@ -3253,7 +3253,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) { @@ -4191,7 +4191,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); void ring_buffer_read_prepare_sync(void) { - synchronize_sched(); + synchronize_rcu(); } EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); @@ -4363,7 +4363,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); @@ -4496,7 +4496,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, goto out; /* - * We can't do a synchronize_sched here because this + * We can't do a synchronize_rcu here because this * function can be called in atomic context. * Normally this will be called from the same CPU as cpu. * If not it's up to the caller to protect this. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff1c4b20cd0a..51612b4a603f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1681,7 +1681,7 @@ void tracing_reset(struct trace_buffer *buf, int cpu) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); ring_buffer_reset_cpu(buffer, cpu); ring_buffer_record_enable(buffer); @@ -1698,7 +1698,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); buf->time_start = buffer_ftrace_now(buf, buf->cpu); @@ -2250,7 +2250,7 @@ void trace_buffered_event_disable(void) preempt_enable(); /* Wait for all current users to finish */ - synchronize_sched(); + synchronize_rcu(); for_each_tracing_cpu(cpu) { free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); @@ -5398,7 +5398,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); - /* Current trace needs to be nop_trace before synchronize_sched */ + /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; #ifdef CONFIG_TRACER_MAX_TRACE @@ -5412,7 +5412,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) * The update_max_tr is called from interrupts disabled * so a synchronized_sched() is sufficient. */ - synchronize_sched(); + synchronize_rcu(); free_snapshot(tr); } #endif diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 84a65173b1e9..35f3aa55be85 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1614,7 +1614,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, /* * The calls can still be using the old filters. - * Do a synchronize_sched() and to ensure all calls are + * Do a synchronize_rcu() and to ensure all calls are * done with them before we free them. */ tracepoint_synchronize_unregister(); @@ -1845,7 +1845,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, if (filter) { /* * No event actually uses the system filter - * we can free it without synchronize_sched(). + * we can free it without synchronize_rcu(). */ __free_filter(system->filter); system->filter = filter; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fec67188c4d2..adc153ab51c0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -333,7 +333,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) * event_call related objects, which will be accessed in * the kprobe_trace_func/kretprobe_trace_func. */ - synchronize_sched(); + synchronize_rcu(); kfree(link); /* Ignored if link == NULL */ } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index a3be42304485..46f2ab1e08a9 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -92,7 +92,7 @@ static __init int release_early_probes(void) while (early_probes) { tmp = early_probes; early_probes = tmp->next; - call_rcu_sched(tmp, rcu_free_old_probes); + call_rcu(tmp, rcu_free_old_probes); } return 0; @@ -123,7 +123,7 @@ static inline void release_probes(struct tracepoint_func *old) * cover both cases. So let us chain the SRCU and sched RCU * callbacks to wait for both grace periods. */ - call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes); + call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } -- cgit v1.2.3 From aacb5d91ab1bfbb0e8123da59a2e333d52ba7f60 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 28 Oct 2018 10:32:51 -0700 Subject: srcu: Use "ssp" instead of "sp" for srcu_struct pointer In RCU, the distinction between "rsp", "rnp", and "rdp" has served well for a great many years, but in SRCU, "sp" vs. "sdp" has proven confusing. This commit therefore renames SRCU's "sp" pointers to "ssp", so that there is "ssp" for srcu_struct pointer, "snp" for srcu_node pointer, and "sdp" for srcu_data pointer. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 78 ++++---- include/linux/srcutiny.h | 24 +-- include/linux/srcutree.h | 8 +- kernel/rcu/srcutiny.c | 120 ++++++------ kernel/rcu/srcutree.c | 488 +++++++++++++++++++++++------------------------ 5 files changed, 359 insertions(+), 359 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index ebd5f1511690..c614375cd264 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -38,20 +38,20 @@ struct srcu_struct; #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key); -#define init_srcu_struct(sp) \ +#define init_srcu_struct(ssp) \ ({ \ static struct lock_class_key __srcu_key; \ \ - __init_srcu_struct((sp), #sp, &__srcu_key); \ + __init_srcu_struct((ssp), #ssp, &__srcu_key); \ }) #define __SRCU_DEP_MAP_INIT(srcu_name) .dep_map = { .name = #srcu_name }, #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -int init_srcu_struct(struct srcu_struct *sp); +int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -67,28 +67,28 @@ int init_srcu_struct(struct srcu_struct *sp); struct srcu_struct { }; #endif -void call_srcu(struct srcu_struct *sp, struct rcu_head *head, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void (*func)(struct rcu_head *head)); -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced); -int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); -void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); -void synchronize_srcu(struct srcu_struct *sp); +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced); +int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); +void synchronize_srcu(struct srcu_struct *ssp); /** * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -static inline void cleanup_srcu_struct(struct srcu_struct *sp) +static inline void cleanup_srcu_struct(struct srcu_struct *ssp) { - _cleanup_srcu_struct(sp, false); + _cleanup_srcu_struct(ssp, false); } /** * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. Also, @@ -103,16 +103,16 @@ static inline void cleanup_srcu_struct(struct srcu_struct *sp) * (with high probability, anyway), and will also cause the srcu_struct * to be leaked. */ -static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) +static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp) { - _cleanup_srcu_struct(sp, true); + _cleanup_srcu_struct(ssp, true); } #ifdef CONFIG_DEBUG_LOCK_ALLOC /** * srcu_read_lock_held - might we be in SRCU read-side critical section? - * @sp: The srcu_struct structure to check + * @ssp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, @@ -126,16 +126,16 @@ static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) * relies on normal RCU, it can be called from the CPU which * is in the idle loop from an RCU point of view or offline. */ -static inline int srcu_read_lock_held(const struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *ssp) { if (!debug_lockdep_rcu_enabled()) return 1; - return lock_is_held(&sp->dep_map); + return lock_is_held(&ssp->dep_map); } #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -static inline int srcu_read_lock_held(const struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *ssp) { return 1; } @@ -145,7 +145,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing - * @sp: pointer to the srcu_struct, which is used to check that we + * @ssp: pointer to the srcu_struct, which is used to check that we * really are in an SRCU read-side critical section. * @c: condition to check for update-side use * @@ -154,32 +154,32 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) * to 1. The @c argument will normally be a logical expression containing * lockdep_is_held() calls. */ -#define srcu_dereference_check(p, sp, c) \ - __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu) +#define srcu_dereference_check(p, ssp, c) \ + __rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing - * @sp: pointer to the srcu_struct, which is used to check that we + * @ssp: pointer to the srcu_struct, which is used to check that we * really are in an SRCU read-side critical section. * * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU * is enabled, invoking this outside of an RCU read-side critical * section will result in an RCU-lockdep splat. */ -#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) +#define srcu_dereference(p, ssp) srcu_dereference_check((p), (ssp), 0) /** * srcu_dereference_notrace - no tracing and no lockdep calls from here * @p: the pointer to fetch and protect for later dereferencing - * @sp: pointer to the srcu_struct, which is used to check that we + * @ssp: pointer to the srcu_struct, which is used to check that we * really are in an SRCU read-side critical section. */ -#define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1) +#define srcu_dereference_notrace(p, ssp) srcu_dereference_check((p), (ssp), 1) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. - * @sp: srcu_struct in which to register the new reader. + * @ssp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side * critical sections may be nested. However, it is illegal to @@ -194,44 +194,44 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) * srcu_read_unlock() in an irq handler if the matching srcu_read_lock() * was invoked in process context. */ -static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) +static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; - retval = __srcu_read_lock(sp); - rcu_lock_acquire(&(sp)->dep_map); + retval = __srcu_read_lock(ssp); + rcu_lock_acquire(&(ssp)->dep_map); return retval; } /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int -srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp) +srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; - retval = __srcu_read_lock(sp); + retval = __srcu_read_lock(ssp); return retval; } /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. - * @sp: srcu_struct in which to unregister the old reader. + * @ssp: srcu_struct in which to unregister the old reader. * @idx: return value from corresponding srcu_read_lock(). * * Exit an SRCU read-side critical section. */ -static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) - __releases(sp) +static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) + __releases(ssp) { - rcu_lock_release(&(sp)->dep_map); - __srcu_read_unlock(sp, idx); + rcu_lock_release(&(ssp)->dep_map); + __srcu_read_unlock(ssp, idx); } /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void -srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp) +srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { - __srcu_read_unlock(sp, idx); + __srcu_read_unlock(ssp, idx); } /** diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index f41d2fb09f87..b19216aaaef2 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -60,7 +60,7 @@ void srcu_drive_gp(struct work_struct *wp); #define DEFINE_STATIC_SRCU(name) \ static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) -void synchronize_srcu(struct srcu_struct *sp); +void synchronize_srcu(struct srcu_struct *ssp); /* * Counts the new reader in the appropriate per-CPU element of the @@ -68,36 +68,36 @@ void synchronize_srcu(struct srcu_struct *sp); * __srcu_read_unlock() must be in the same handler instance. Returns an * index that must be passed to the matching srcu_read_unlock(). */ -static inline int __srcu_read_lock(struct srcu_struct *sp) +static inline int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - idx = READ_ONCE(sp->srcu_idx); - WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); + idx = READ_ONCE(ssp->srcu_idx); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1); return idx; } -static inline void synchronize_srcu_expedited(struct srcu_struct *sp) +static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { - synchronize_srcu(sp); + synchronize_srcu(ssp); } -static inline void srcu_barrier(struct srcu_struct *sp) +static inline void srcu_barrier(struct srcu_struct *ssp) { - synchronize_srcu(sp); + synchronize_srcu(ssp); } /* Defined here to avoid size increase for non-torture kernels. */ -static inline void srcu_torture_stats_print(struct srcu_struct *sp, +static inline void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) { int idx; - idx = READ_ONCE(sp->srcu_idx) & 0x1; + idx = READ_ONCE(ssp->srcu_idx) & 0x1; pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", tt, tf, idx, - READ_ONCE(sp->srcu_lock_nesting[!idx]), - READ_ONCE(sp->srcu_lock_nesting[idx])); + READ_ONCE(ssp->srcu_lock_nesting[!idx]), + READ_ONCE(ssp->srcu_lock_nesting[idx])); } #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 0ae91b3a7406..6f292bd3e7db 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -51,7 +51,7 @@ struct srcu_data { unsigned long grpmask; /* Mask for leaf srcu_node */ /* ->srcu_data_have_cbs[]. */ int cpu; - struct srcu_struct *sp; + struct srcu_struct *ssp; }; /* @@ -138,8 +138,8 @@ struct srcu_struct { #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) -void synchronize_srcu_expedited(struct srcu_struct *sp); -void srcu_barrier(struct srcu_struct *sp); -void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf); +void synchronize_srcu_expedited(struct srcu_struct *ssp); +void srcu_barrier(struct srcu_struct *ssp); +void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); #endif diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index b46e6683f8c9..32dfd6522548 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -37,30 +37,30 @@ int rcu_scheduler_active __read_mostly; static LIST_HEAD(srcu_boot_list); static bool srcu_init_done; -static int init_srcu_struct_fields(struct srcu_struct *sp) +static int init_srcu_struct_fields(struct srcu_struct *ssp) { - sp->srcu_lock_nesting[0] = 0; - sp->srcu_lock_nesting[1] = 0; - init_swait_queue_head(&sp->srcu_wq); - sp->srcu_cb_head = NULL; - sp->srcu_cb_tail = &sp->srcu_cb_head; - sp->srcu_gp_running = false; - sp->srcu_gp_waiting = false; - sp->srcu_idx = 0; - INIT_WORK(&sp->srcu_work, srcu_drive_gp); - INIT_LIST_HEAD(&sp->srcu_work.entry); + ssp->srcu_lock_nesting[0] = 0; + ssp->srcu_lock_nesting[1] = 0; + init_swait_queue_head(&ssp->srcu_wq); + ssp->srcu_cb_head = NULL; + ssp->srcu_cb_tail = &ssp->srcu_cb_head; + ssp->srcu_gp_running = false; + ssp->srcu_gp_waiting = false; + ssp->srcu_idx = 0; + INIT_WORK(&ssp->srcu_work, srcu_drive_gp); + INIT_LIST_HEAD(&ssp->srcu_work.entry); return 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key) { /* Don't re-initialize a lock while it is held. */ - debug_check_no_locks_freed((void *)sp, sizeof(*sp)); - lockdep_init_map(&sp->dep_map, name, key, 0); - return init_srcu_struct_fields(sp); + debug_check_no_locks_freed((void *)ssp, sizeof(*ssp)); + lockdep_init_map(&ssp->dep_map, name, key, 0); + return init_srcu_struct_fields(ssp); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -68,15 +68,15 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); /* * init_srcu_struct - initialize a sleep-RCU structure - * @sp: structure to initialize. + * @ssp: structure to initialize. * * Must invoke this on a given srcu_struct before passing that srcu_struct * to any other function. Each srcu_struct represents a separate domain * of SRCU protection. */ -int init_srcu_struct(struct srcu_struct *sp) +int init_srcu_struct(struct srcu_struct *ssp) { - return init_srcu_struct_fields(sp); + return init_srcu_struct_fields(ssp); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -84,22 +84,22 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); /* * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) { - WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); + WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); if (quiesced) - WARN_ON(work_pending(&sp->srcu_work)); + WARN_ON(work_pending(&ssp->srcu_work)); else - flush_work(&sp->srcu_work); - WARN_ON(sp->srcu_gp_running); - WARN_ON(sp->srcu_gp_waiting); - WARN_ON(sp->srcu_cb_head); - WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail); + flush_work(&ssp->srcu_work); + WARN_ON(ssp->srcu_gp_running); + WARN_ON(ssp->srcu_gp_waiting); + WARN_ON(ssp->srcu_cb_head); + WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail); } EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); @@ -107,13 +107,13 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); * Removes the count for the old reader from the appropriate element of * the srcu_struct. */ -void __srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { - int newval = sp->srcu_lock_nesting[idx] - 1; + int newval = ssp->srcu_lock_nesting[idx] - 1; - WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); - if (!newval && READ_ONCE(sp->srcu_gp_waiting)) - swake_up_one(&sp->srcu_wq); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); + if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) + swake_up_one(&ssp->srcu_wq); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -127,24 +127,24 @@ void srcu_drive_gp(struct work_struct *wp) int idx; struct rcu_head *lh; struct rcu_head *rhp; - struct srcu_struct *sp; + struct srcu_struct *ssp; - sp = container_of(wp, struct srcu_struct, srcu_work); - if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head)) + ssp = container_of(wp, struct srcu_struct, srcu_work); + if (ssp->srcu_gp_running || !READ_ONCE(ssp->srcu_cb_head)) return; /* Already running or nothing to do. */ /* Remove recently arrived callbacks and wait for readers. */ - WRITE_ONCE(sp->srcu_gp_running, true); + WRITE_ONCE(ssp->srcu_gp_running, true); local_irq_disable(); - lh = sp->srcu_cb_head; - sp->srcu_cb_head = NULL; - sp->srcu_cb_tail = &sp->srcu_cb_head; + lh = ssp->srcu_cb_head; + ssp->srcu_cb_head = NULL; + ssp->srcu_cb_tail = &ssp->srcu_cb_head; local_irq_enable(); - idx = sp->srcu_idx; - WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); - WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ - swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); - WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ + idx = ssp->srcu_idx; + WRITE_ONCE(ssp->srcu_idx, !ssp->srcu_idx); + WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ + swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx])); + WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ /* Invoke the callbacks we removed above. */ while (lh) { @@ -161,9 +161,9 @@ void srcu_drive_gp(struct work_struct *wp) * at interrupt level, but the ->srcu_gp_running checks will * straighten that out. */ - WRITE_ONCE(sp->srcu_gp_running, false); - if (READ_ONCE(sp->srcu_cb_head)) - schedule_work(&sp->srcu_work); + WRITE_ONCE(ssp->srcu_gp_running, false); + if (READ_ONCE(ssp->srcu_cb_head)) + schedule_work(&ssp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); @@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp); * Enqueue an SRCU callback on the specified srcu_struct structure, * initiating grace-period processing if it is not already running. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; @@ -179,14 +179,14 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rhp->func = func; rhp->next = NULL; local_irq_save(flags); - *sp->srcu_cb_tail = rhp; - sp->srcu_cb_tail = &rhp->next; + *ssp->srcu_cb_tail = rhp; + ssp->srcu_cb_tail = &rhp->next; local_irq_restore(flags); - if (!READ_ONCE(sp->srcu_gp_running)) { + if (!READ_ONCE(ssp->srcu_gp_running)) { if (likely(srcu_init_done)) - schedule_work(&sp->srcu_work); - else if (list_empty(&sp->srcu_work.entry)) - list_add(&sp->srcu_work.entry, &srcu_boot_list); + schedule_work(&ssp->srcu_work); + else if (list_empty(&ssp->srcu_work.entry)) + list_add(&ssp->srcu_work.entry, &srcu_boot_list); } } EXPORT_SYMBOL_GPL(call_srcu); @@ -194,13 +194,13 @@ EXPORT_SYMBOL_GPL(call_srcu); /* * synchronize_srcu - wait for prior SRCU read-side critical-section completion */ -void synchronize_srcu(struct srcu_struct *sp) +void synchronize_srcu(struct srcu_struct *ssp) { struct rcu_synchronize rs; init_rcu_head_on_stack(&rs.head); init_completion(&rs.completion); - call_srcu(sp, &rs.head, wakeme_after_rcu); + call_srcu(ssp, &rs.head, wakeme_after_rcu); wait_for_completion(&rs.completion); destroy_rcu_head_on_stack(&rs.head); } @@ -219,13 +219,13 @@ void __init rcu_scheduler_starting(void) */ void __init srcu_init(void) { - struct srcu_struct *sp; + struct srcu_struct *ssp; srcu_init_done = true; while (!list_empty(&srcu_boot_list)) { - sp = list_first_entry(&srcu_boot_list, + ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, srcu_work.entry); - list_del_init(&sp->srcu_work.entry); - schedule_work(&sp->srcu_work); + list_del_init(&ssp->srcu_work.entry); + schedule_work(&ssp->srcu_work); } } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 697a2d7e8e8a..3600d88d8956 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -56,7 +56,7 @@ static LIST_HEAD(srcu_boot_list); static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); -static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); +static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ @@ -92,7 +92,7 @@ do { \ * srcu_read_unlock() running against them. So if the is_static parameter * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. */ -static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) +static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static) { int cpu; int i; @@ -103,13 +103,13 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) struct srcu_node *snp_first; /* Work out the overall tree geometry. */ - sp->level[0] = &sp->node[0]; + ssp->level[0] = &ssp->node[0]; for (i = 1; i < rcu_num_lvls; i++) - sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; + ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1]; rcu_init_levelspread(levelspread, num_rcu_lvl); /* Each pass through this loop initializes one srcu_node structure. */ - srcu_for_each_node_breadth_first(sp, snp) { + srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_init(&ACCESS_PRIVATE(snp, lock)); WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); @@ -120,17 +120,17 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) snp->srcu_gp_seq_needed_exp = 0; snp->grplo = -1; snp->grphi = -1; - if (snp == &sp->node[0]) { + if (snp == &ssp->node[0]) { /* Root node, special case. */ snp->srcu_parent = NULL; continue; } /* Non-root node. */ - if (snp == sp->level[level + 1]) + if (snp == ssp->level[level + 1]) level++; - snp->srcu_parent = sp->level[level - 1] + - (snp - sp->level[level]) / + snp->srcu_parent = ssp->level[level - 1] + + (snp - ssp->level[level]) / levelspread[level - 1]; } @@ -141,14 +141,14 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != ARRAY_SIZE(sdp->srcu_unlock_count)); level = rcu_num_lvls - 1; - snp_first = sp->level[level]; + snp_first = ssp->level[level]; for_each_possible_cpu(cpu) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; - sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; - sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; + sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq; + sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq; sdp->mynode = &snp_first[cpu / levelspread[level]]; for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { if (snp->grplo < 0) @@ -157,7 +157,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) } sdp->cpu = cpu; INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); - sdp->sp = sp; + sdp->ssp = ssp; sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); if (is_static) continue; @@ -176,35 +176,35 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) * parameter is passed through to init_srcu_struct_nodes(), and * also tells us that ->sda has already been wired up to srcu_data. */ -static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) +static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) { - mutex_init(&sp->srcu_cb_mutex); - mutex_init(&sp->srcu_gp_mutex); - sp->srcu_idx = 0; - sp->srcu_gp_seq = 0; - sp->srcu_barrier_seq = 0; - mutex_init(&sp->srcu_barrier_mutex); - atomic_set(&sp->srcu_barrier_cpu_cnt, 0); - INIT_DELAYED_WORK(&sp->work, process_srcu); + mutex_init(&ssp->srcu_cb_mutex); + mutex_init(&ssp->srcu_gp_mutex); + ssp->srcu_idx = 0; + ssp->srcu_gp_seq = 0; + ssp->srcu_barrier_seq = 0; + mutex_init(&ssp->srcu_barrier_mutex); + atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); + INIT_DELAYED_WORK(&ssp->work, process_srcu); if (!is_static) - sp->sda = alloc_percpu(struct srcu_data); - init_srcu_struct_nodes(sp, is_static); - sp->srcu_gp_seq_needed_exp = 0; - sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ - return sp->sda ? 0 : -ENOMEM; + ssp->sda = alloc_percpu(struct srcu_data); + init_srcu_struct_nodes(ssp, is_static); + ssp->srcu_gp_seq_needed_exp = 0; + ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ + return ssp->sda ? 0 : -ENOMEM; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key) { /* Don't re-initialize a lock while it is held. */ - debug_check_no_locks_freed((void *)sp, sizeof(*sp)); - lockdep_init_map(&sp->dep_map, name, key, 0); - spin_lock_init(&ACCESS_PRIVATE(sp, lock)); - return init_srcu_struct_fields(sp, false); + debug_check_no_locks_freed((void *)ssp, sizeof(*ssp)); + lockdep_init_map(&ssp->dep_map, name, key, 0); + spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); + return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -212,16 +212,16 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); /** * init_srcu_struct - initialize a sleep-RCU structure - * @sp: structure to initialize. + * @ssp: structure to initialize. * * Must invoke this on a given srcu_struct before passing that srcu_struct * to any other function. Each srcu_struct represents a separate domain * of SRCU protection. */ -int init_srcu_struct(struct srcu_struct *sp) +int init_srcu_struct(struct srcu_struct *ssp) { - spin_lock_init(&ACCESS_PRIVATE(sp, lock)); - return init_srcu_struct_fields(sp, false); + spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); + return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -231,37 +231,37 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); * First-use initialization of statically allocated srcu_struct * structure. Wiring up the combining tree is more than can be * done with compile-time initialization, so this check is added - * to each update-side SRCU primitive. Use sp->lock, which -is- + * to each update-side SRCU primitive. Use ssp->lock, which -is- * compile-time initialized, to resolve races involving multiple * CPUs trying to garner first-use privileges. */ -static void check_init_srcu_struct(struct srcu_struct *sp) +static void check_init_srcu_struct(struct srcu_struct *ssp) { unsigned long flags; /* The smp_load_acquire() pairs with the smp_store_release(). */ - if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ + if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(sp, flags); - if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(ssp, flags); + if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) { + spin_unlock_irqrestore_rcu_node(ssp, flags); return; } - init_srcu_struct_fields(sp, true); - spin_unlock_irqrestore_rcu_node(sp, flags); + init_srcu_struct_fields(ssp, true); + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* * Returns approximate total of the readers' ->srcu_lock_count[] values * for the rank of per-CPU counters specified by idx. */ -static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) +static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_lock_count[idx]); } @@ -272,13 +272,13 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) * Returns approximate total of the readers' ->srcu_unlock_count[] values * for the rank of per-CPU counters specified by idx. */ -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); } @@ -289,11 +289,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) * Return true if the number of pre-existing readers is determined to * be zero. */ -static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) +static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) { unsigned long unlocks; - unlocks = srcu_readers_unlock_idx(sp, idx); + unlocks = srcu_readers_unlock_idx(ssp, idx); /* * Make sure that a lock is always counted if the corresponding @@ -329,25 +329,25 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient, * especially on 64-bit systems. */ - return srcu_readers_lock_idx(sp, idx) == unlocks; + return srcu_readers_lock_idx(ssp, idx) == unlocks; } /** * srcu_readers_active - returns true if there are readers. and false * otherwise - * @sp: which srcu_struct to count active readers (holding srcu_read_lock). + * @ssp: which srcu_struct to count active readers (holding srcu_read_lock). * * Note that this is not an atomic primitive, and can therefore suffer * severe errors when invoked on an active srcu_struct. That said, it * can be useful as an error check at cleanup time. */ -static bool srcu_readers_active(struct srcu_struct *sp) +static bool srcu_readers_active(struct srcu_struct *ssp) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_lock_count[0]); sum += READ_ONCE(cpuc->srcu_lock_count[1]); @@ -363,44 +363,44 @@ static bool srcu_readers_active(struct srcu_struct *sp) * Return grace-period delay, zero if there are expedited grace * periods pending, SRCU_INTERVAL otherwise. */ -static unsigned long srcu_get_delay(struct srcu_struct *sp) +static unsigned long srcu_get_delay(struct srcu_struct *ssp) { - if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), - READ_ONCE(sp->srcu_gp_seq_needed_exp))) + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), + READ_ONCE(ssp->srcu_gp_seq_needed_exp))) return 0; return SRCU_INTERVAL; } /* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */ -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) { int cpu; - if (WARN_ON(!srcu_get_delay(sp))) + if (WARN_ON(!srcu_get_delay(ssp))) return; /* Just leak it! */ - if (WARN_ON(srcu_readers_active(sp))) + if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ if (quiesced) { - if (WARN_ON(delayed_work_pending(&sp->work))) + if (WARN_ON(delayed_work_pending(&ssp->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&sp->work); + flush_delayed_work(&ssp->work); } for_each_possible_cpu(cpu) if (quiesced) { - if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work))) + if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); + flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work); } - if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || - WARN_ON(srcu_readers_active(sp))) { + if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(srcu_readers_active(ssp))) { pr_info("%s: Active srcu_struct %p state: %d\n", - __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); + __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))); return; /* Caller forgot to stop doing call_srcu()? */ } - free_percpu(sp->sda); - sp->sda = NULL; + free_percpu(ssp->sda); + ssp->sda = NULL; } EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); @@ -409,12 +409,12 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ -int __srcu_read_lock(struct srcu_struct *sp) +int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - idx = READ_ONCE(sp->srcu_idx) & 0x1; - this_cpu_inc(sp->sda->srcu_lock_count[idx]); + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + this_cpu_inc(ssp->sda->srcu_lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -425,10 +425,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). */ -void __srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ - this_cpu_inc(sp->sda->srcu_unlock_count[idx]); + this_cpu_inc(ssp->sda->srcu_unlock_count[idx]); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -444,22 +444,22 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); /* * Start an SRCU grace period. */ -static void srcu_gp_start(struct srcu_struct *sp) +static void srcu_gp_start(struct srcu_struct *ssp) { - struct srcu_data *sdp = this_cpu_ptr(sp->sda); + struct srcu_data *sdp = this_cpu_ptr(ssp->sda); int state; - lockdep_assert_held(&ACCESS_PRIVATE(sp, lock)); - WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); + lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_gp_seq)); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_gp_seq)); spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ - rcu_seq_start(&sp->srcu_gp_seq); - state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + rcu_seq_start(&ssp->srcu_gp_seq); + state = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } @@ -513,7 +513,7 @@ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) * just-completed grace period, the one corresponding to idx. If possible, * schedule this invocation on the corresponding CPUs. */ -static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, +static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long mask, unsigned long delay) { int cpu; @@ -521,7 +521,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { if (!(mask & (1 << (cpu - snp->grplo)))) continue; - srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); + srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay); } } @@ -534,7 +534,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, * are initiating callback invocation. This allows the ->srcu_have_cbs[] * array to have a finite number of elements. */ -static void srcu_gp_end(struct srcu_struct *sp) +static void srcu_gp_end(struct srcu_struct *ssp) { unsigned long cbdelay; bool cbs; @@ -548,28 +548,28 @@ static void srcu_gp_end(struct srcu_struct *sp) struct srcu_node *snp; /* Prevent more than one additional grace period. */ - mutex_lock(&sp->srcu_cb_mutex); + mutex_lock(&ssp->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(sp); - idx = rcu_seq_state(sp->srcu_gp_seq); + spin_lock_irq_rcu_node(ssp); + idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - cbdelay = srcu_get_delay(sp); - sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - rcu_seq_end(&sp->srcu_gp_seq); - gpseq = rcu_seq_current(&sp->srcu_gp_seq); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) - sp->srcu_gp_seq_needed_exp = gpseq; - spin_unlock_irq_rcu_node(sp); - mutex_unlock(&sp->srcu_gp_mutex); + cbdelay = srcu_get_delay(ssp); + ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + rcu_seq_end(&ssp->srcu_gp_seq); + gpseq = rcu_seq_current(&ssp->srcu_gp_seq); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) + ssp->srcu_gp_seq_needed_exp = gpseq; + spin_unlock_irq_rcu_node(ssp); + mutex_unlock(&ssp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ /* Initiate callback invocation as needed. */ idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); - srcu_for_each_node_breadth_first(sp, snp) { + srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_irq_rcu_node(snp); cbs = false; - last_lvl = snp >= sp->level[rcu_num_lvls - 1]; + last_lvl = snp >= ssp->level[rcu_num_lvls - 1]; if (last_lvl) cbs = snp->srcu_have_cbs[idx] == gpseq; snp->srcu_have_cbs[idx] = gpseq; @@ -580,12 +580,12 @@ static void srcu_gp_end(struct srcu_struct *sp) snp->srcu_data_have_cbs[idx] = 0; spin_unlock_irq_rcu_node(snp); if (cbs) - srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); + srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); /* Occasionally prevent srcu_data counter wrap. */ if (!(gpseq & counter_wrap_check) && last_lvl) for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_irqsave_rcu_node(sdp, flags); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) @@ -598,18 +598,18 @@ static void srcu_gp_end(struct srcu_struct *sp) } /* Callback initiation done, allow grace periods after next. */ - mutex_unlock(&sp->srcu_cb_mutex); + mutex_unlock(&ssp->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(sp); - gpseq = rcu_seq_current(&sp->srcu_gp_seq); + spin_lock_irq_rcu_node(ssp); + gpseq = rcu_seq_current(&ssp->srcu_gp_seq); if (!rcu_seq_state(gpseq) && - ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { - srcu_gp_start(sp); - spin_unlock_irq_rcu_node(sp); - srcu_reschedule(sp, 0); + ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) { + srcu_gp_start(ssp); + spin_unlock_irq_rcu_node(ssp); + srcu_reschedule(ssp, 0); } else { - spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(ssp); } } @@ -620,13 +620,13 @@ static void srcu_gp_end(struct srcu_struct *sp) * but without expediting. To start a completely new grace period, * whether expedited or not, use srcu_funnel_gp_start() instead. */ -static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, +static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long s) { unsigned long flags; for (; snp != NULL; snp = snp->srcu_parent) { - if (rcu_seq_done(&sp->srcu_gp_seq, s) || + if (rcu_seq_done(&ssp->srcu_gp_seq, s) || ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) return; spin_lock_irqsave_rcu_node(snp, flags); @@ -637,10 +637,10 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave_rcu_node(sp, flags); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) - sp->srcu_gp_seq_needed_exp = s; - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(ssp, flags); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) + ssp->srcu_gp_seq_needed_exp = s; + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* @@ -653,7 +653,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, * Note that this function also does the work of srcu_funnel_exp_start(), * in some cases by directly invoking it. */ -static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, +static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, unsigned long s, bool do_norm) { unsigned long flags; @@ -663,7 +663,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, /* Each pass through the loop does one level of the srcu_node tree. */ for (; snp != NULL; snp = snp->srcu_parent) { - if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) + if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode) return; /* GP already done and CBs recorded. */ spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { @@ -678,7 +678,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, return; } if (!do_norm) - srcu_funnel_exp_start(sp, snp, s); + srcu_funnel_exp_start(ssp, snp, s); return; } snp->srcu_have_cbs[idx] = s; @@ -690,29 +690,29 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave_rcu_node(sp, flags); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { + spin_lock_irqsave_rcu_node(ssp, flags); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load * acquire setting up for initialization. */ - smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ + smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/ } - if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) - sp->srcu_gp_seq_needed_exp = s; + if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) + ssp->srcu_gp_seq_needed_exp = s; /* If grace period not already done and none in progress, start it. */ - if (!rcu_seq_done(&sp->srcu_gp_seq, s) && - rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { - WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); - srcu_gp_start(sp); + if (!rcu_seq_done(&ssp->srcu_gp_seq, s) && + rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) { + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + srcu_gp_start(ssp); if (likely(srcu_init_done)) - queue_delayed_work(rcu_gp_wq, &sp->work, - srcu_get_delay(sp)); - else if (list_empty(&sp->work.work.entry)) - list_add(&sp->work.work.entry, &srcu_boot_list); + queue_delayed_work(rcu_gp_wq, &ssp->work, + srcu_get_delay(ssp)); + else if (list_empty(&ssp->work.work.entry)) + list_add(&ssp->work.work.entry, &srcu_boot_list); } - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* @@ -720,12 +720,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, * loop an additional time if there is an expedited grace period pending. * The caller must ensure that ->srcu_idx is not changed while checking. */ -static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) +static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { for (;;) { - if (srcu_readers_active_idx_check(sp, idx)) + if (srcu_readers_active_idx_check(ssp, idx)) return true; - if (--trycount + !srcu_get_delay(sp) <= 0) + if (--trycount + !srcu_get_delay(ssp) <= 0) return false; udelay(SRCU_RETRY_CHECK_DELAY); } @@ -736,7 +736,7 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows * us to wait for pre-existing readers in a starvation-free manner. */ -static void srcu_flip(struct srcu_struct *sp) +static void srcu_flip(struct srcu_struct *ssp) { /* * Ensure that if this updater saw a given reader's increment @@ -748,7 +748,7 @@ static void srcu_flip(struct srcu_struct *sp) */ smp_mb(); /* E */ /* Pairs with B and C. */ - WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); + WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); /* * Ensure that if the updater misses an __srcu_read_unlock() @@ -781,7 +781,7 @@ static void srcu_flip(struct srcu_struct *sp) * negligible when amoritized over that time period, and the extra latency * of a needlessly non-expedited grace period is similarly negligible. */ -static bool srcu_might_be_idle(struct srcu_struct *sp) +static bool srcu_might_be_idle(struct srcu_struct *ssp) { unsigned long curseq; unsigned long flags; @@ -790,7 +790,7 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) /* If the local srcu_data structure has callbacks, not idle. */ local_irq_save(flags); - sdp = this_cpu_ptr(sp->sda); + sdp = this_cpu_ptr(ssp->sda); if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { local_irq_restore(flags); return false; /* Callbacks already present, so not idle. */ @@ -806,17 +806,17 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) /* First, see if enough time has passed since the last GP. */ t = ktime_get_mono_fast_ns(); if (exp_holdoff == 0 || - time_in_range_open(t, sp->srcu_last_gp_end, - sp->srcu_last_gp_end + exp_holdoff)) + time_in_range_open(t, ssp->srcu_last_gp_end, + ssp->srcu_last_gp_end + exp_holdoff)) return false; /* Too soon after last GP. */ /* Next, check for probable idleness. */ - curseq = rcu_seq_current(&sp->srcu_gp_seq); + curseq = rcu_seq_current(&ssp->srcu_gp_seq); smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ - if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) + if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed))) return false; /* Grace period in progress, so not idle. */ smp_mb(); /* Order ->srcu_gp_seq with prior access. */ - if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) + if (curseq != rcu_seq_current(&ssp->srcu_gp_seq)) return false; /* GP # changed, so not idle. */ return true; /* With reasonable probability, idle! */ } @@ -856,7 +856,7 @@ static void srcu_leak_callback(struct rcu_head *rhp) * srcu_read_lock(), and srcu_read_unlock() that are all passed the same * srcu_struct structure. */ -void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func, bool do_norm) { unsigned long flags; @@ -866,7 +866,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, unsigned long s; struct srcu_data *sdp; - check_init_srcu_struct(sp); + check_init_srcu_struct(ssp); if (debug_rcu_head_queue(rhp)) { /* Probable double call_srcu(), so leak the callback. */ WRITE_ONCE(rhp->func, srcu_leak_callback); @@ -874,14 +874,14 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, return; } rhp->func = func; - idx = srcu_read_lock(sp); + idx = srcu_read_lock(ssp); local_irq_save(flags); - sdp = this_cpu_ptr(sp->sda); + sdp = this_cpu_ptr(ssp->sda); spin_lock_rcu_node(sdp); rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); - s = rcu_seq_snap(&sp->srcu_gp_seq); + rcu_seq_current(&ssp->srcu_gp_seq)); + s = rcu_seq_snap(&ssp->srcu_gp_seq); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { sdp->srcu_gp_seq_needed = s; @@ -893,15 +893,15 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, } spin_unlock_irqrestore_rcu_node(sdp, flags); if (needgp) - srcu_funnel_gp_start(sp, sdp, s, do_norm); + srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) - srcu_funnel_exp_start(sp, sdp->mynode, s); - srcu_read_unlock(sp, idx); + srcu_funnel_exp_start(ssp, sdp->mynode, s); + srcu_read_unlock(ssp, idx); } /** * call_srcu() - Queue a callback for invocation after an SRCU grace period - * @sp: srcu_struct in queue the callback + * @ssp: srcu_struct in queue the callback * @rhp: structure to be used for queueing the SRCU callback. * @func: function to be invoked after the SRCU grace period * @@ -916,21 +916,21 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, * The callback will be invoked from process context, but must nevertheless * be fast and must not block. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func) { - __call_srcu(sp, rhp, func, true); + __call_srcu(ssp, rhp, func, true); } EXPORT_SYMBOL_GPL(call_srcu); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) +static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) { struct rcu_synchronize rcu; - RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || + RCU_LOCKDEP_WARN(lock_is_held(&ssp->dep_map) || lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), @@ -939,10 +939,10 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; might_sleep(); - check_init_srcu_struct(sp); + check_init_srcu_struct(ssp); init_completion(&rcu.completion); init_rcu_head_on_stack(&rcu.head); - __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); + __call_srcu(ssp, &rcu.head, wakeme_after_rcu, do_norm); wait_for_completion(&rcu.completion); destroy_rcu_head_on_stack(&rcu.head); @@ -958,7 +958,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) /** * synchronize_srcu_expedited - Brute-force SRCU grace period - * @sp: srcu_struct with which to synchronize. + * @ssp: srcu_struct with which to synchronize. * * Wait for an SRCU grace period to elapse, but be more aggressive about * spinning rather than blocking when waiting. @@ -966,15 +966,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) * Note that synchronize_srcu_expedited() has the same deadlock and * memory-ordering properties as does synchronize_srcu(). */ -void synchronize_srcu_expedited(struct srcu_struct *sp) +void synchronize_srcu_expedited(struct srcu_struct *ssp) { - __synchronize_srcu(sp, rcu_gp_is_normal()); + __synchronize_srcu(ssp, rcu_gp_is_normal()); } EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); /** * synchronize_srcu - wait for prior SRCU read-side critical-section completion - * @sp: srcu_struct with which to synchronize. + * @ssp: srcu_struct with which to synchronize. * * Wait for the count to drain to zero of both indexes. To avoid the * possible starvation of synchronize_srcu(), it waits for the count of @@ -1016,12 +1016,12 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); * SRCU must also provide it. Note that detecting idleness is heuristic * and subject to both false positives and negatives. */ -void synchronize_srcu(struct srcu_struct *sp) +void synchronize_srcu(struct srcu_struct *ssp) { - if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) - synchronize_srcu_expedited(sp); + if (srcu_might_be_idle(ssp) || rcu_gp_is_expedited()) + synchronize_srcu_expedited(ssp); else - __synchronize_srcu(sp, true); + __synchronize_srcu(ssp, true); } EXPORT_SYMBOL_GPL(synchronize_srcu); @@ -1031,36 +1031,36 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); static void srcu_barrier_cb(struct rcu_head *rhp) { struct srcu_data *sdp; - struct srcu_struct *sp; + struct srcu_struct *ssp; sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); - sp = sdp->sp; - if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) - complete(&sp->srcu_barrier_completion); + ssp = sdp->ssp; + if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_barrier_completion); } /** * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. - * @sp: srcu_struct on which to wait for in-flight callbacks. + * @ssp: srcu_struct on which to wait for in-flight callbacks. */ -void srcu_barrier(struct srcu_struct *sp) +void srcu_barrier(struct srcu_struct *ssp) { int cpu; struct srcu_data *sdp; - unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); + unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq); - check_init_srcu_struct(sp); - mutex_lock(&sp->srcu_barrier_mutex); - if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { + check_init_srcu_struct(ssp); + mutex_lock(&ssp->srcu_barrier_mutex); + if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) { smp_mb(); /* Force ordering following return. */ - mutex_unlock(&sp->srcu_barrier_mutex); + mutex_unlock(&ssp->srcu_barrier_mutex); return; /* Someone else did our work for us. */ } - rcu_seq_start(&sp->srcu_barrier_seq); - init_completion(&sp->srcu_barrier_completion); + rcu_seq_start(&ssp->srcu_barrier_seq); + init_completion(&ssp->srcu_barrier_completion); /* Initial count prevents reaching zero until all CBs are posted. */ - atomic_set(&sp->srcu_barrier_cpu_cnt, 1); + atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); /* * Each pass through this loop enqueues a callback, but only @@ -1071,39 +1071,39 @@ void srcu_barrier(struct srcu_struct *sp) * grace period as the last callback already in the queue. */ for_each_possible_cpu(cpu) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_irq_rcu_node(sdp); - atomic_inc(&sp->srcu_barrier_cpu_cnt); + atomic_inc(&ssp->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); if (!rcu_segcblist_entrain(&sdp->srcu_cblist, &sdp->srcu_barrier_head, 0)) { debug_rcu_head_unqueue(&sdp->srcu_barrier_head); - atomic_dec(&sp->srcu_barrier_cpu_cnt); + atomic_dec(&ssp->srcu_barrier_cpu_cnt); } spin_unlock_irq_rcu_node(sdp); } /* Remove the initial count, at which point reaching zero can happen. */ - if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) - complete(&sp->srcu_barrier_completion); - wait_for_completion(&sp->srcu_barrier_completion); + if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_barrier_completion); + wait_for_completion(&ssp->srcu_barrier_completion); - rcu_seq_end(&sp->srcu_barrier_seq); - mutex_unlock(&sp->srcu_barrier_mutex); + rcu_seq_end(&ssp->srcu_barrier_seq); + mutex_unlock(&ssp->srcu_barrier_mutex); } EXPORT_SYMBOL_GPL(srcu_barrier); /** * srcu_batches_completed - return batches completed. - * @sp: srcu_struct on which to report batch completion. + * @ssp: srcu_struct on which to report batch completion. * * Report the number of batches, correlated with, but not necessarily * precisely the same as, the number of grace periods that have elapsed. */ -unsigned long srcu_batches_completed(struct srcu_struct *sp) +unsigned long srcu_batches_completed(struct srcu_struct *ssp) { - return sp->srcu_idx; + return ssp->srcu_idx; } EXPORT_SYMBOL_GPL(srcu_batches_completed); @@ -1112,11 +1112,11 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed); * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has * completed in that state. */ -static void srcu_advance_state(struct srcu_struct *sp) +static void srcu_advance_state(struct srcu_struct *ssp) { int idx; - mutex_lock(&sp->srcu_gp_mutex); + mutex_lock(&ssp->srcu_gp_mutex); /* * Because readers might be delayed for an extended period after @@ -1128,47 +1128,47 @@ static void srcu_advance_state(struct srcu_struct *sp) * The load-acquire ensures that we see the accesses performed * by the prior grace period. */ - idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ + idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq_rcu_node(sp); - if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { - WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); - spin_unlock_irq_rcu_node(sp); - mutex_unlock(&sp->srcu_gp_mutex); + spin_lock_irq_rcu_node(ssp); + if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { + WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq)); + spin_unlock_irq_rcu_node(ssp); + mutex_unlock(&ssp->srcu_gp_mutex); return; } - idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) - srcu_gp_start(sp); - spin_unlock_irq_rcu_node(sp); + srcu_gp_start(ssp); + spin_unlock_irq_rcu_node(ssp); if (idx != SRCU_STATE_IDLE) { - mutex_unlock(&sp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_gp_mutex); return; /* Someone else started the grace period. */ } } - if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { - idx = 1 ^ (sp->srcu_idx & 1); - if (!try_check_zero(sp, idx, 1)) { - mutex_unlock(&sp->srcu_gp_mutex); + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { + idx = 1 ^ (ssp->srcu_idx & 1); + if (!try_check_zero(ssp, idx, 1)) { + mutex_unlock(&ssp->srcu_gp_mutex); return; /* readers present, retry later. */ } - srcu_flip(sp); - rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); + srcu_flip(ssp); + rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); } - if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { /* * SRCU read-side critical sections are normally short, * so check at least twice in quick succession after a flip. */ - idx = 1 ^ (sp->srcu_idx & 1); - if (!try_check_zero(sp, idx, 2)) { - mutex_unlock(&sp->srcu_gp_mutex); + idx = 1 ^ (ssp->srcu_idx & 1); + if (!try_check_zero(ssp, idx, 2)) { + mutex_unlock(&ssp->srcu_gp_mutex); return; /* readers present, retry later. */ } - srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */ + srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */ } } @@ -1184,14 +1184,14 @@ static void srcu_invoke_callbacks(struct work_struct *work) struct rcu_cblist ready_cbs; struct rcu_head *rhp; struct srcu_data *sdp; - struct srcu_struct *sp; + struct srcu_struct *ssp; sdp = container_of(work, struct srcu_data, work.work); - sp = sdp->sp; + ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); spin_lock_irq_rcu_node(sdp); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { spin_unlock_irq_rcu_node(sdp); @@ -1217,7 +1217,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) spin_lock_irq_rcu_node(sdp); rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); spin_unlock_irq_rcu_node(sdp); @@ -1229,24 +1229,24 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Finished one round of SRCU grace period. Start another if there are * more SRCU callbacks queued, otherwise put SRCU into not-running state. */ -static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) +static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) { bool pushgp = true; - spin_lock_irq_rcu_node(sp); - if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { - if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { + spin_lock_irq_rcu_node(ssp); + if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { + if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ pushgp = false; } - } else if (!rcu_seq_state(sp->srcu_gp_seq)) { + } else if (!rcu_seq_state(ssp->srcu_gp_seq)) { /* Outstanding request and no GP. Start one. */ - srcu_gp_start(sp); + srcu_gp_start(ssp); } - spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(ssp); if (pushgp) - queue_delayed_work(rcu_gp_wq, &sp->work, delay); + queue_delayed_work(rcu_gp_wq, &ssp->work, delay); } /* @@ -1254,41 +1254,41 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) */ static void process_srcu(struct work_struct *work) { - struct srcu_struct *sp; + struct srcu_struct *ssp; - sp = container_of(work, struct srcu_struct, work.work); + ssp = container_of(work, struct srcu_struct, work.work); - srcu_advance_state(sp); - srcu_reschedule(sp, srcu_get_delay(sp)); + srcu_advance_state(ssp); + srcu_reschedule(ssp, srcu_get_delay(ssp)); } void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, + struct srcu_struct *ssp, int *flags, unsigned long *gp_seq) { if (test_type != SRCU_FLAVOR) return; *flags = 0; - *gp_seq = rcu_seq_current(&sp->srcu_gp_seq); + *gp_seq = rcu_seq_current(&ssp->srcu_gp_seq); } EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); -void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf) +void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) { int cpu; int idx; unsigned long s0 = 0, s1 = 0; - idx = sp->srcu_idx & 0x1; + idx = ssp->srcu_idx & 0x1; pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):", - tt, tf, rcu_seq_current(&sp->srcu_gp_seq), idx); + tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx); for_each_possible_cpu(cpu) { unsigned long l0, l1; unsigned long u0, u1; long c0, c1; struct srcu_data *sdp; - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); u0 = sdp->srcu_unlock_count[!idx]; u1 = sdp->srcu_unlock_count[idx]; @@ -1323,14 +1323,14 @@ early_initcall(srcu_bootup_announce); void __init srcu_init(void) { - struct srcu_struct *sp; + struct srcu_struct *ssp; srcu_init_done = true; while (!list_empty(&srcu_boot_list)) { - sp = list_first_entry(&srcu_boot_list, struct srcu_struct, + ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, work.work.entry); - check_init_srcu_struct(sp); - list_del_init(&sp->work.work.entry); - queue_work(rcu_gp_wq, &sp->work.work); + check_init_srcu_struct(ssp); + list_del_init(&ssp->work.work.entry); + queue_work(rcu_gp_wq, &ssp->work.work); } } -- cgit v1.2.3 From 70d188041e6f1f92004f1d5d7ddfd5013273b7a5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 14 Nov 2018 16:09:03 +0100 Subject: serdev: make synchronous write helper interruptible Allow the synchronous serdev_device_write() helper to be interrupted. This is useful for cases where I/O is performed on behalf of user space and we don't want to block indefinitely when using flow control. Signed-off-by: Johan Hovold Reviewed-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/core.c | 20 ++++++++++++++------ include/linux/serdev.h | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index ee4c40336633..c7006bbb793a 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(serdev_device_write_buf); int serdev_device_write(struct serdev_device *serdev, const unsigned char *buf, size_t count, - unsigned long timeout) + long timeout) { struct serdev_controller *ctrl = serdev->ctrl; int written = 0; @@ -254,16 +254,24 @@ int serdev_device_write(struct serdev_device *serdev, written += ret; buf += ret; count -= ret; - } while (count && - (timeout = wait_for_completion_timeout(&serdev->write_comp, - timeout))); + + if (count == 0) + break; + + timeout = wait_for_completion_interruptible_timeout(&serdev->write_comp, + timeout); + } while (timeout > 0); mutex_unlock(&serdev->write_lock); if (ret < 0) return ret; - if (timeout == 0 && written == 0) - return -ETIMEDOUT; + if (timeout <= 0 && written == 0) { + if (timeout == -ERESTARTSYS) + return -ERESTARTSYS; + else + return -ETIMEDOUT; + } return written; } diff --git a/include/linux/serdev.h b/include/linux/serdev.h index f153b2c7f0cd..070bf4e92df7 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -210,7 +210,7 @@ void serdev_device_wait_until_sent(struct serdev_device *, long); int serdev_device_get_tiocm(struct serdev_device *); int serdev_device_set_tiocm(struct serdev_device *, int, int); void serdev_device_write_wakeup(struct serdev_device *); -int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, unsigned long); +int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, long); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); -- cgit v1.2.3 From faa2541f5b1afa8b6d777a73bc2f27d5c8c98695 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 26 Nov 2018 17:47:44 +0100 Subject: leds: trigger: Introduce audio mute LED trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new LED trigger for coupling the audio mixer change with the LED on laptops or other devices. Currently there are two trigger types, "audio-mute" and "audio-micmute". The audio driver triggers the LED brightness change via ledtrig_audio_set() call with the proper type (either mute or mic-mute). OTOH, the consumers may call ledtrig_audio_get() for the initial brightness value that may have been set by the audio driver beforehand. This new stuff will be used by HD-audio codec driver and some platform drivers (thinkpad_acpi and dell-laptop, also upcoming huawei-wmi). Acked-by: Jacek Anaszewski Acked-by: Pavel Machek Acked-by: Pali Rohár Signed-off-by: Takashi Iwai --- drivers/leds/trigger/Kconfig | 7 ++++++ drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-audio.c | 44 ++++++++++++++++++++++++++++++++++++ include/linux/leds.h | 20 ++++++++++++++++ 4 files changed, 72 insertions(+) create mode 100644 drivers/leds/trigger/ledtrig-audio.c (limited to 'include/linux') diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index b76fc3cdc8f8..23cc85e2e0e5 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -136,4 +136,11 @@ config LEDS_TRIGGER_PATTERN which is a series of tuples, of brightness and duration (ms). If unsure, say N +config LEDS_TRIGGER_AUDIO + tristate "Audio Mute LED Trigger" + help + This allows LEDs to be controlled by audio drivers for following + the audio mute and mic-mute changes. + If unsure, say N + endif # LEDS_TRIGGERS diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile index 9bcb64ee8123..733a83e2a718 100644 --- a/drivers/leds/trigger/Makefile +++ b/drivers/leds/trigger/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_LEDS_TRIGGER_CAMERA) += ledtrig-camera.o obj-$(CONFIG_LEDS_TRIGGER_PANIC) += ledtrig-panic.o obj-$(CONFIG_LEDS_TRIGGER_NETDEV) += ledtrig-netdev.o obj-$(CONFIG_LEDS_TRIGGER_PATTERN) += ledtrig-pattern.o +obj-$(CONFIG_LEDS_TRIGGER_AUDIO) += ledtrig-audio.o diff --git a/drivers/leds/trigger/ledtrig-audio.c b/drivers/leds/trigger/ledtrig-audio.c new file mode 100644 index 000000000000..f76621e88482 --- /dev/null +++ b/drivers/leds/trigger/ledtrig-audio.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Audio Mute LED trigger +// + +#include +#include +#include + +static struct led_trigger *ledtrig_audio[NUM_AUDIO_LEDS]; +static enum led_brightness audio_state[NUM_AUDIO_LEDS]; + +enum led_brightness ledtrig_audio_get(enum led_audio type) +{ + return audio_state[type]; +} +EXPORT_SYMBOL_GPL(ledtrig_audio_get); + +void ledtrig_audio_set(enum led_audio type, enum led_brightness state) +{ + audio_state[type] = state; + led_trigger_event(ledtrig_audio[type], state); +} +EXPORT_SYMBOL_GPL(ledtrig_audio_set); + +static int __init ledtrig_audio_init(void) +{ + led_trigger_register_simple("audio-mute", + &ledtrig_audio[LED_AUDIO_MUTE]); + led_trigger_register_simple("audio-micmute", + &ledtrig_audio[LED_AUDIO_MICMUTE]); + return 0; +} +module_init(ledtrig_audio_init); + +static void __exit ledtrig_audio_exit(void) +{ + led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MUTE]); + led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MICMUTE]); +} +module_exit(ledtrig_audio_exit); + +MODULE_DESCRIPTION("LED trigger for audio mute control"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/leds.h b/include/linux/leds.h index 7393a316d9fa..580cbaef789a 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -487,4 +487,24 @@ struct led_pattern { int brightness; }; +enum led_audio { + LED_AUDIO_MUTE, /* master mute LED */ + LED_AUDIO_MICMUTE, /* mic mute LED */ + NUM_AUDIO_LEDS +}; + +#if IS_ENABLED(CONFIG_LEDS_TRIGGER_AUDIO) +enum led_brightness ledtrig_audio_get(enum led_audio type); +void ledtrig_audio_set(enum led_audio type, enum led_brightness state); +#else +static inline enum led_brightness ledtrig_audio_get(enum led_audio type) +{ + return LED_OFF; +} +static inline void ledtrig_audio_set(enum led_audio type, + enum led_brightness state) +{ +} +#endif + #endif /* __LINUX_LEDS_H_INCLUDED */ -- cgit v1.2.3 From bc184549853133303cf08d1f19477f9c87ef39fb Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 16 Nov 2018 15:41:41 +0200 Subject: ASoC: davinci-mcasp: Implement configurable dismod handling If the dismod is specified in the DT node, use the specified custom value to configure the drive on state of the inactive TX slots. If the dismod is not present or booted in legacy mode, the dismod is set to low as it was the original behavior. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- include/linux/platform_data/davinci_asp.h | 1 + sound/soc/davinci/davinci-mcasp.c | 19 ++++++++++++++++++- sound/soc/davinci/davinci-mcasp.h | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h index 85ad68f9206a..7fe80f1c7e08 100644 --- a/include/linux/platform_data/davinci_asp.h +++ b/include/linux/platform_data/davinci_asp.h @@ -79,6 +79,7 @@ struct davinci_mcasp_pdata { /* McASP specific fields */ int tdm_slots; u8 op_mode; + u8 dismod; u8 num_serializer; u8 *serial_dir; u8 version; diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 0f3911be1c8e..40d3a916fb74 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -85,6 +85,7 @@ struct davinci_mcasp { u32 tdm_mask[2]; int slot_width; u8 op_mode; + u8 dismod; u8 num_serializer; u8 *serial_dir; u8 version; @@ -834,7 +835,7 @@ static int mcasp_common_hw_param(struct davinci_mcasp *mcasp, int stream, if (mcasp->serial_dir[i] == TX_MODE && tx_ser < max_active_serializers) { mcasp_mod_bits(mcasp, DAVINCI_MCASP_XRSRCTL_REG(i), - DISMOD_LOW, DISMOD_MASK); + mcasp->dismod, DISMOD_MASK); set_bit(PIN_BIT_AXR(i), &mcasp->pdir); tx_ser++; } else if (mcasp->serial_dir[i] == RX_MODE && @@ -847,6 +848,8 @@ static int mcasp_common_hw_param(struct davinci_mcasp *mcasp, int stream, clear_bit(PIN_BIT_AXR(i), &mcasp->pdir); } else if (mcasp->serial_dir[i] == TX_MODE) { /* Unused TX pins, clear PDIR */ + mcasp_mod_bits(mcasp, DAVINCI_MCASP_XRSRCTL_REG(i), + mcasp->dismod, DISMOD_MASK); clear_bit(PIN_BIT_AXR(i), &mcasp->pdir); } } @@ -1709,6 +1712,7 @@ static struct davinci_mcasp_pdata *davinci_mcasp_set_pdata_from_of( if (pdev->dev.platform_data) { pdata = pdev->dev.platform_data; + pdata->dismod = DISMOD_LOW; return pdata; } else if (match) { pdata = devm_kmemdup(&pdev->dev, match->data, sizeof(*pdata), @@ -1798,6 +1802,18 @@ static struct davinci_mcasp_pdata *davinci_mcasp_set_pdata_from_of( if (ret >= 0) pdata->sram_size_capture = val; + ret = of_property_read_u32(np, "dismod", &val); + if (ret >= 0) { + if (val == 0 || val == 2 || val == 3) { + pdata->dismod = DISMOD_VAL(val); + } else { + dev_warn(&pdev->dev, "Invalid dismod value: %u\n", val); + pdata->dismod = DISMOD_LOW; + } + } else { + pdata->dismod = DISMOD_LOW; + } + return pdata; nodata: @@ -1973,6 +1989,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev) mcasp->version = pdata->version; mcasp->txnumevt = pdata->txnumevt; mcasp->rxnumevt = pdata->rxnumevt; + mcasp->dismod = pdata->dismod; mcasp->dev = &pdev->dev; diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h index acb024ab6a9d..5e4060d8fe56 100644 --- a/sound/soc/davinci/davinci-mcasp.h +++ b/sound/soc/davinci/davinci-mcasp.h @@ -209,6 +209,7 @@ #define DISMOD_3STATE (0x0) #define DISMOD_LOW (0x2 << 2) #define DISMOD_HIGH (0x3 << 2) +#define DISMOD_VAL(x) ((x) << 2) #define DISMOD_MASK DISMOD_HIGH #define TXSTATE BIT(4) #define RXSTATE BIT(5) -- cgit v1.2.3 From 94a2c3a32b62e868dc1e3d854326745a7f1b8c7a Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Wed, 28 Nov 2018 16:42:01 +0800 Subject: block: use rcu_work instead of call_rcu to avoid sleep in softirq We recently got a stack by syzkaller like this: BUG: sleeping function called from invalid context at mm/slab.h:361 in_atomic(): 1, irqs_disabled(): 0, pid: 6644, name: blkid INFO: lockdep is turned off. CPU: 1 PID: 6644 Comm: blkid Not tainted 4.4.163-514.55.6.9.x86_64+ #76 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 0000000000000000 5ba6a6b879e50c00 ffff8801f6b07b10 ffffffff81cb2194 0000000041b58ab3 ffffffff833c7745 ffffffff81cb2080 5ba6a6b879e50c00 0000000000000000 0000000000000001 0000000000000004 0000000000000000 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0x114/0x1a0 lib/dump_stack.c:51 [] ___might_sleep+0x291/0x490 kernel/sched/core.c:7675 [] __might_sleep+0xb3/0x270 kernel/sched/core.c:7637 [] slab_pre_alloc_hook mm/slab.h:361 [inline] [] slab_alloc_node mm/slub.c:2610 [inline] [] slab_alloc mm/slub.c:2692 [inline] [] kmem_cache_alloc_trace+0x2c3/0x5c0 mm/slub.c:2709 [] kmalloc include/linux/slab.h:479 [inline] [] kzalloc include/linux/slab.h:623 [inline] [] kobject_uevent_env+0x2c7/0x1150 lib/kobject_uevent.c:227 [] kobject_uevent+0x1f/0x30 lib/kobject_uevent.c:374 [] kobject_cleanup lib/kobject.c:633 [inline] [] kobject_release+0x229/0x440 lib/kobject.c:675 [] kref_sub include/linux/kref.h:73 [inline] [] kref_put include/linux/kref.h:98 [inline] [] kobject_put+0x72/0xd0 lib/kobject.c:692 [] put_device+0x25/0x30 drivers/base/core.c:1237 [] delete_partition_rcu_cb+0x1d4/0x2f0 block/partition-generic.c:232 [] __rcu_reclaim kernel/rcu/rcu.h:118 [inline] [] rcu_do_batch kernel/rcu/tree.c:2705 [inline] [] invoke_rcu_callbacks kernel/rcu/tree.c:2973 [inline] [] __rcu_process_callbacks kernel/rcu/tree.c:2940 [inline] [] rcu_process_callbacks+0x59c/0x1c70 kernel/rcu/tree.c:2957 [] __do_softirq+0x299/0xe20 kernel/softirq.c:273 [] invoke_softirq kernel/softirq.c:350 [inline] [] irq_exit+0x216/0x2c0 kernel/softirq.c:391 [] exiting_irq arch/x86/include/asm/apic.h:652 [inline] [] smp_apic_timer_interrupt+0x8b/0xc0 arch/x86/kernel/apic/apic.c:926 [] apic_timer_interrupt+0xa5/0xb0 arch/x86/entry/entry_64.S:746 [] ? audit_kill_trees+0x180/0x180 [] fd_install+0x57/0x80 fs/file.c:626 [] do_sys_open+0x45e/0x550 fs/open.c:1043 [] SYSC_open fs/open.c:1055 [inline] [] SyS_open+0x32/0x40 fs/open.c:1050 [] entry_SYSCALL_64_fastpath+0x1e/0x9a In softirq context, we call rcu callback function delete_partition_rcu_cb(), which may allocate memory by kzalloc with GFP_KERNEL flag. If the allocation cannot be satisfied, it may sleep. However, That is not allowed in softirq contex. Although we found this problem on linux 4.4, the latest kernel version seems to have this problem as well. And it is very similar to the previous one: https://lkml.org/lkml/2018/7/9/391 Fix it by using RCU workqueue, which allows sleep. Reviewed-by: Paul E. McKenney Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/partition-generic.c | 8 +++++--- include/linux/genhd.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/partition-generic.c b/block/partition-generic.c index d3d14e81fb12..5f8db5c5140f 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -249,9 +249,10 @@ struct device_type part_type = { .uevent = part_uevent, }; -static void delete_partition_rcu_cb(struct rcu_head *head) +static void delete_partition_work_fn(struct work_struct *work) { - struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); + struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct, + rcu_work); part->start_sect = 0; part->nr_sects = 0; @@ -262,7 +263,8 @@ static void delete_partition_rcu_cb(struct rcu_head *head) void __delete_partition(struct percpu_ref *ref) { struct hd_struct *part = container_of(ref, struct hd_struct, ref); - call_rcu(&part->rcu_head, delete_partition_rcu_cb); + INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn); + queue_rcu_work(system_wq, &part->rcu_work); } /* diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 70fc838e6773..0c5ee17b4d88 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,7 +129,7 @@ struct hd_struct { struct disk_stats dkstats; #endif struct percpu_ref ref; - struct rcu_head rcu_head; + struct rcu_work rcu_work; }; #define GENHD_FL_REMOVABLE 1 -- cgit v1.2.3 From f783e128a6f1484d72ceab06d483ea32df0ce333 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 26 Nov 2018 17:47:46 +0100 Subject: platform/x86: dell-laptop: Drop superfluous exported function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we've switched to the LED trigger for binding with HD-audio, we can drop the exported function as well as the whole linux/dell-led.h. Acked-by: Jacek Anaszewski Acked-by: Pavel Machek Acked-by: Andy Shevchenko Acked-by: Pali Rohár Signed-off-by: Takashi Iwai --- drivers/platform/x86/dell-laptop.c | 22 +++++----------------- include/linux/dell-led.h | 7 ------- 2 files changed, 5 insertions(+), 24 deletions(-) delete mode 100644 include/linux/dell-led.h (limited to 'include/linux') diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 0db2dbf7b0d1..fb071e6a5058 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include "dell-rbtn.h" @@ -2109,17 +2108,17 @@ static struct notifier_block dell_laptop_notifier = { .notifier_call = dell_laptop_notifier_call, }; -int dell_micmute_led_set(int state) +static int micmute_led_set(struct led_classdev *led_cdev, + enum led_brightness brightness) { struct calling_interface_buffer buffer; struct calling_interface_token *token; + int state = brightness != LED_OFF; if (state == 0) token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE); - else if (state == 1) - token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); else - return -EINVAL; + token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); if (!token) return -ENODEV; @@ -2127,18 +2126,7 @@ int dell_micmute_led_set(int state) dell_fill_request(&buffer, token->location, token->value, 0, 0); dell_send_request(&buffer, CLASS_TOKEN_WRITE, SELECT_TOKEN_STD); - return state; -} -EXPORT_SYMBOL_GPL(dell_micmute_led_set); - -static int micmute_led_set(struct led_classdev *led_cdev, - enum led_brightness brightness) -{ - int state = brightness != LED_OFF; - int err; - - err = dell_micmute_led_set(state); - return err < 0 ? err : 0; + return 0; } static struct led_classdev micmute_led_cdev = { diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h deleted file mode 100644 index 92521471517f..000000000000 --- a/include/linux/dell-led.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __DELL_LED_H__ -#define __DELL_LED_H__ - -int dell_micmute_led_set(int on); - -#endif -- cgit v1.2.3 From 9e908a180e6a90fa102d5d3f96ca86825f43e4fb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 26 Nov 2018 17:47:47 +0100 Subject: platform/x86: thinkpad_acpi: Drop superfluous exported function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we've switched to the LED trigger for binding with HD-audio, we can drop the exported function as well as the whole linux/thinkpad_acpi.h. The own TPACPI_LED_MUTE and TPACPI_LED_MICMUTE definitions are replaced with the identical ones for LEDS, i.e. LED_AUDIO_MUTE and LED_AUDIO_MICMUTE, respectively. They are no longer needed as referred only locally. Acked-by: Jacek Anaszewski Acked-by: Pavel Machek Acked-by: Andy Shevchenko Acked-by: Henrique de Moraes Holschuh Acked-by: Pali Rohár Signed-off-by: Takashi Iwai --- drivers/platform/x86/thinkpad_acpi.c | 30 +++++++++++------------------- include/linux/thinkpad_acpi.h | 16 ---------------- 2 files changed, 11 insertions(+), 35 deletions(-) delete mode 100644 include/linux/thinkpad_acpi.h (limited to 'include/linux') diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 3d2c1f5f22e2..21ffb961585a 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -81,7 +81,6 @@ #include #include #include -#include #include #include #include @@ -9150,6 +9149,7 @@ static struct ibm_struct fan_driver_data = { * Mute LED subdriver */ +#define TPACPI_LED_MAX 2 struct tp_led_table { acpi_string name; @@ -9158,13 +9158,13 @@ struct tp_led_table { int state; }; -static struct tp_led_table led_tables[] = { - [TPACPI_LED_MUTE] = { +static struct tp_led_table led_tables[TPACPI_LED_MAX] = { + [LED_AUDIO_MUTE] = { .name = "SSMS", .on_value = 1, .off_value = 0, }, - [TPACPI_LED_MICMUTE] = { + [LED_AUDIO_MICMUTE] = { .name = "MMTS", .on_value = 2, .off_value = 0, @@ -9189,40 +9189,36 @@ static int mute_led_on_off(struct tp_led_table *t, bool state) return state; } -int tpacpi_led_set(int whichled, bool on) +static int tpacpi_led_set(int whichled, bool on) { struct tp_led_table *t; - if (whichled < 0 || whichled >= TPACPI_LED_MAX) - return -EINVAL; - t = &led_tables[whichled]; if (t->state < 0 || t->state == on) return t->state; return mute_led_on_off(t, on); } -EXPORT_SYMBOL_GPL(tpacpi_led_set); static int tpacpi_led_mute_set(struct led_classdev *led_cdev, enum led_brightness brightness) { - return tpacpi_led_set(TPACPI_LED_MUTE, brightness != LED_OFF); + return tpacpi_led_set(LED_AUDIO_MUTE, brightness != LED_OFF); } static int tpacpi_led_micmute_set(struct led_classdev *led_cdev, enum led_brightness brightness) { - return tpacpi_led_set(TPACPI_LED_MICMUTE, brightness != LED_OFF); + return tpacpi_led_set(LED_AUDIO_MICMUTE, brightness != LED_OFF); } -static struct led_classdev mute_led_cdev[] = { - [TPACPI_LED_MUTE] = { +static struct led_classdev mute_led_cdev[TPACPI_LED_MAX] = { + [LED_AUDIO_MUTE] = { .name = "platform::mute", .max_brightness = 1, .brightness_set_blocking = tpacpi_led_mute_set, .default_trigger = "audio-mute", }, - [TPACPI_LED_MICMUTE] = { + [LED_AUDIO_MICMUTE] = { .name = "platform::micmute", .max_brightness = 1, .brightness_set_blocking = tpacpi_led_micmute_set, @@ -9232,10 +9228,6 @@ static struct led_classdev mute_led_cdev[] = { static int mute_led_init(struct ibm_init_struct *iibm) { - static enum led_audio types[] = { - [TPACPI_LED_MUTE] = LED_AUDIO_MUTE, - [TPACPI_LED_MICMUTE] = LED_AUDIO_MICMUTE, - }; acpi_handle temp; int i, err; @@ -9246,7 +9238,7 @@ static int mute_led_init(struct ibm_init_struct *iibm) continue; } - mute_led_cdev[i].brightness = ledtrig_audio_get(types[i]); + mute_led_cdev[i].brightness = ledtrig_audio_get(i); err = led_classdev_register(&tpacpi_pdev->dev, &mute_led_cdev[i]); if (err < 0) { while (i--) { diff --git a/include/linux/thinkpad_acpi.h b/include/linux/thinkpad_acpi.h deleted file mode 100644 index 9fb317970c01..000000000000 --- a/include/linux/thinkpad_acpi.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __THINKPAD_ACPI_H__ -#define __THINKPAD_ACPI_H__ - -/* These two functions return 0 if success, or negative error code - (e g -ENODEV if no led present) */ - -enum { - TPACPI_LED_MUTE, - TPACPI_LED_MICMUTE, - TPACPI_LED_MAX, -}; - -int tpacpi_led_set(int whichled, bool on); - -#endif -- cgit v1.2.3 From 97bce63408f192712574a4d9d6dcab794eed3a79 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Nov 2018 11:11:35 -0500 Subject: svcrdma: Optimize the logic that selects the R_key to invalidate o Select the R_key to invalidate while the CPU cache still contains the received RPC Call transport header, rather than waiting until we're about to send the RPC Reply. o Choose Send With Invalidate if there is exactly one distinct R_key in the received transport header. If there's more than one, the client will have to perform local invalidation after it has already waited for remote invalidation. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 63 +++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 53 +++++++-------------------- 3 files changed, 77 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e6e26918504c..7e22681333d0 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -135,6 +135,7 @@ struct svc_rdma_recv_ctxt { u32 rc_byte_len; unsigned int rc_page_count; unsigned int rc_hdr_count; + u32 rc_inv_rkey; struct page *rc_pages[RPCSVC_MAXPAGES]; }; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index b24d5b8f2fee..828b149eaaef 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) return p; } +/* RPC-over-RDMA Version One private extension: Remote Invalidation. + * Responder's choice: requester signals it can handle Send With + * Invalidate, and responder chooses one R_key to invalidate. + * + * If there is exactly one distinct R_key in the received transport + * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero. + * + * Perform this operation while the received transport header is + * still in the CPU cache. + */ +static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, + struct svc_rdma_recv_ctxt *ctxt) +{ + __be32 inv_rkey, *p; + u32 i, segcount; + + ctxt->rc_inv_rkey = 0; + + if (!rdma->sc_snd_w_inv) + return; + + inv_rkey = xdr_zero; + p = ctxt->rc_recv_buf; + p += rpcrdma_fixed_maxsz; + + /* Read list */ + while (*p++ != xdr_zero) { + p++; /* position */ + if (inv_rkey == xdr_zero) + inv_rkey = *p; + else if (inv_rkey != *p) + return; + p += 4; + } + + /* Write list */ + while (*p++ != xdr_zero) { + segcount = be32_to_cpup(p++); + for (i = 0; i < segcount; i++) { + if (inv_rkey == xdr_zero) + inv_rkey = *p; + else if (inv_rkey != *p) + return; + p += 4; + } + } + + /* Reply chunk */ + if (*p++ != xdr_zero) { + segcount = be32_to_cpup(p++); + for (i = 0; i < segcount; i++) { + if (inv_rkey == xdr_zero) + inv_rkey = *p; + else if (inv_rkey != *p) + return; + p += 4; + } + } + + ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); +} + /* On entry, xdr->head[0].iov_base points to first byte in the * RPC-over-RDMA header. * @@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return ret; } + svc_rdma_get_inv_rkey(rdma_xprt, ctxt); p += rpcrdma_fixed_maxsz; if (*p != xdr_zero) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 8602a5f1b515..d48bc6dd7b96 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp, *reply = NULL; } -/* RPC-over-RDMA Version One private extension: Remote Invalidation. - * Responder's choice: requester signals it can handle Send With - * Invalidate, and responder chooses one rkey to invalidate. - * - * Find a candidate rkey to invalidate when sending a reply. Picks the - * first R_key it finds in the chunk lists. - * - * Returns zero if RPC's chunk lists are empty. - */ -static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp, - __be32 *wr_lst, __be32 *rp_ch) -{ - __be32 *p; - - p = rdma_argp + rpcrdma_fixed_maxsz; - if (*p != xdr_zero) - p += 2; - else if (wr_lst && be32_to_cpup(wr_lst + 1)) - p = wr_lst + 2; - else if (rp_ch && be32_to_cpup(rp_ch + 1)) - p = rp_ch + 2; - else - return 0; - return be32_to_cpup(p); -} - static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt, struct page *page, @@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, * * RDMA Send is the last step of transmitting an RPC reply. Pages * involved in the earlier RDMA Writes are here transferred out - * of the rqstp and into the ctxt's page array. These pages are + * of the rqstp and into the sctxt's page array. These pages are * DMA unmapped by each Write completion, but the subsequent Send * completion finally releases these pages. * @@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, * - The Reply's transport header will never be larger than a page. */ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - __be32 *rdma_argp, + struct svc_rdma_send_ctxt *sctxt, + struct svc_rdma_recv_ctxt *rctxt, struct svc_rqst *rqstp, __be32 *wr_lst, __be32 *rp_ch) { int ret; if (!rp_ch) { - ret = svc_rdma_map_reply_msg(rdma, ctxt, + ret = svc_rdma_map_reply_msg(rdma, sctxt, &rqstp->rq_res, wr_lst); if (ret < 0) return ret; } - svc_rdma_save_io_pages(rqstp, ctxt); + svc_rdma_save_io_pages(rqstp, sctxt); - ctxt->sc_send_wr.opcode = IB_WR_SEND; - if (rdma->sc_snd_w_inv) { - ctxt->sc_send_wr.ex.invalidate_rkey = - svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); - if (ctxt->sc_send_wr.ex.invalidate_rkey) - ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; + if (rctxt->rc_inv_rkey) { + sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; + sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; + } else { + sctxt->sc_send_wr.opcode = IB_WR_SEND; } dprintk("svcrdma: posting Send WR with %u sge(s)\n", - ctxt->sc_send_wr.num_sge); - return svc_rdma_send(rdma, &ctxt->sc_send_wr); + sctxt->sc_send_wr.num_sge); + return svc_rdma_send(rdma, &sctxt->sc_send_wr); } /* Given the client-provided Write and Reply chunks, the server was not @@ -809,7 +782,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) } svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); - ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp, + ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp, wr_lst, rp_ch); if (ret < 0) goto err1; -- cgit v1.2.3 From 9adcfaffc34d53e498637237fb3701560359d50b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 24 Nov 2018 13:10:25 +0900 Subject: printk: Make printk_emit() local function. printk_emit() is called from only devkmsg_write() in the same file. Save object size by making it a local function. Link: http://lkml.kernel.org/r/5cc99d2c-c408-34f7-d1fc-e1cd2a9e31da@i-love.sakura.ne.jp Cc: Steven Rostedt Signed-off-by: Tetsuo Handa Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 ----- kernel/printk/printk.c | 30 ++++++++++++++---------------- 2 files changed, 14 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index cf3eccfe1543..55aa96975fa2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -166,11 +166,6 @@ int vprintk_emit(int facility, int level, asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); -asmlinkage __printf(5, 6) __cold -int printk_emit(int facility, int level, - const char *dict, size_t dictlen, - const char *fmt, ...); - asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b77150ad1965..a1d88212a5d2 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -753,6 +753,19 @@ struct devkmsg_user { char buf[CONSOLE_EXT_LOG_MAX]; }; +static __printf(3, 4) __cold +int devkmsg_emit(int facility, int level, const char *fmt, ...) +{ + va_list args; + int r; + + va_start(args, fmt); + r = vprintk_emit(facility, level, NULL, 0, fmt, args); + va_end(args); + + return r; +} + static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) { char *buf, *line; @@ -811,7 +824,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) } } - printk_emit(facility, level, NULL, 0, "%s", line); + devkmsg_emit(facility, level, "%s", line); kfree(buf); return ret; } @@ -1936,21 +1949,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) } EXPORT_SYMBOL(vprintk); -asmlinkage int printk_emit(int facility, int level, - const char *dict, size_t dictlen, - const char *fmt, ...) -{ - va_list args; - int r; - - va_start(args, fmt); - r = vprintk_emit(facility, level, dict, dictlen, fmt, args); - va_end(args); - - return r; -} -EXPORT_SYMBOL(printk_emit); - int vprintk_default(const char *fmt, va_list args) { int r; -- cgit v1.2.3 From 58d81d64e06ffaea6bddc85ae2b7295c371bcc55 Mon Sep 17 00:00:00 2001 From: Priit Laes Date: Mon, 19 Nov 2018 20:01:22 +0200 Subject: lib: cordic: Move cordic macros and defines to header file Now that these macros are in header file, we can eventually clean up the duplicate macros present in the drivers that utilize the same cordic algorithm implementation. Also add CORDIC_ prefix to nonprefixed macros. Reviewed-by: Arend van Spriel Signed-off-by: Priit Laes Acked-by: Larry Finger Signed-off-by: Kalle Valo --- include/linux/cordic.h | 9 +++++++++ lib/cordic.c | 23 +++++++---------------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cordic.h b/include/linux/cordic.h index cf68ca4a508c..3d656f54d64f 100644 --- a/include/linux/cordic.h +++ b/include/linux/cordic.h @@ -18,6 +18,15 @@ #include +#define CORDIC_ANGLE_GEN 39797 +#define CORDIC_PRECISION_SHIFT 16 +#define CORDIC_NUM_ITER (CORDIC_PRECISION_SHIFT + 2) + +#define CORDIC_FIXED(X) ((s32)((X) << CORDIC_PRECISION_SHIFT)) +#define CORDIC_FLOAT(X) (((X) >= 0) \ + ? ((((X) >> (CORDIC_PRECISION_SHIFT - 1)) + 1) >> 1) \ + : -((((-(X)) >> (CORDIC_PRECISION_SHIFT - 1)) + 1) >> 1)) + /** * struct cordic_iq - i/q coordinate. * diff --git a/lib/cordic.c b/lib/cordic.c index 6cf477839ebd..8ef27c12956f 100644 --- a/lib/cordic.c +++ b/lib/cordic.c @@ -16,15 +16,6 @@ #include #include -#define CORDIC_ANGLE_GEN 39797 -#define CORDIC_PRECISION_SHIFT 16 -#define CORDIC_NUM_ITER (CORDIC_PRECISION_SHIFT + 2) - -#define FIXED(X) ((s32)((X) << CORDIC_PRECISION_SHIFT)) -#define FLOAT(X) (((X) >= 0) \ - ? ((((X) >> (CORDIC_PRECISION_SHIFT - 1)) + 1) >> 1) \ - : -((((-(X)) >> (CORDIC_PRECISION_SHIFT - 1)) + 1) >> 1)) - static const s32 arctan_table[] = { 2949120, 1740967, @@ -64,16 +55,16 @@ struct cordic_iq cordic_calc_iq(s32 theta) coord.q = 0; angle = 0; - theta = FIXED(theta); + theta = CORDIC_FIXED(theta); signtheta = (theta < 0) ? -1 : 1; - theta = ((theta + FIXED(180) * signtheta) % FIXED(360)) - - FIXED(180) * signtheta; + theta = ((theta + CORDIC_FIXED(180) * signtheta) % CORDIC_FIXED(360)) - + CORDIC_FIXED(180) * signtheta; - if (FLOAT(theta) > 90) { - theta -= FIXED(180); + if (CORDIC_FLOAT(theta) > 90) { + theta -= CORDIC_FIXED(180); signx = -1; - } else if (FLOAT(theta) < -90) { - theta += FIXED(180); + } else if (CORDIC_FLOAT(theta) < -90) { + theta += CORDIC_FIXED(180); signx = -1; } -- cgit v1.2.3 From ce5b009cff1961137127edf91f44effd0eec8ffd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Nov 2018 17:13:56 -0700 Subject: block: improve logic around when to sort a plug list Only do it if we have requests for multiple queues in the same plug. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + block/blk-mq.c | 23 ++++++++++++++++++----- include/linux/blkdev.h | 1 + 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index be9233400314..d107d016b92b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1780,6 +1780,7 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); plug->rq_count = 0; + plug->multiple_queues = false; /* * Store ordering should not be needed here, since a potential diff --git a/block/blk-mq.c b/block/blk-mq.c index 5f4b93f424b4..2a1a653a8054 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1677,7 +1677,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) list_splice_init(&plug->mq_list, &list); plug->rq_count = 0; - list_sort(NULL, &list, plug_rq_cmp); + if (plug->rq_count > 2 && plug->multiple_queues) + list_sort(NULL, &list, plug_rq_cmp); this_q = NULL; this_hctx = NULL; @@ -1866,6 +1867,20 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, } } +static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) +{ + list_add_tail(&rq->queuelist, &plug->mq_list); + plug->rq_count++; + if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) { + struct request *tmp; + + tmp = list_first_entry(&plug->mq_list, struct request, + queuelist); + if (tmp->q != rq->q) + plug->multiple_queues = true; + } +} + static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); @@ -1932,8 +1947,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) trace_block_plug(q); } - list_add_tail(&rq->queuelist, &plug->mq_list); - plug->rq_count++; + blk_add_rq_to_plug(plug, rq); } else if (plug && !blk_queue_nomerges(q)) { blk_mq_bio_to_request(rq, bio); @@ -1950,8 +1964,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) list_del_init(&same_queue_rq->queuelist); plug->rq_count--; } - list_add_tail(&rq->queuelist, &plug->mq_list); - plug->rq_count++; + blk_add_rq_to_plug(plug, rq); blk_mq_put_ctx(data.ctx); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 02732cae6080..08d940f85fa0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1131,6 +1131,7 @@ struct blk_plug { struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ unsigned short rq_count; + bool multiple_queues; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) -- cgit v1.2.3 From d666ba98f849ad44c4405ecc2180390ebe80f4f9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Nov 2018 17:02:25 -0700 Subject: blk-mq: add mq_ops->commit_rqs() blk-mq passes information to the hardware about any given request being the last that we will issue in this sequence. The point is that hardware can defer costly doorbell type writes to the last request. But if we run into errors issuing a sequence of requests, we may never send the request with bd->last == true set. For that case, we need a hook that tells the hardware that nothing else is coming right now. For failures returned by the drivers ->queue_rq() hook, the driver is responsible for flushing pending requests, if it uses bd->last to optimize that part. This works like before, no changes there. Reviewed-by: Omar Sandoval Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 16 ++++++++++++++++ include/linux/blk-mq.h | 10 ++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 2a1a653a8054..d8534107bb6f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1259,6 +1259,14 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, if (!list_empty(list)) { bool needs_restart; + /* + * If we didn't flush the entire list, we could have told + * the driver there was more coming, but that turned out to + * be a lie. + */ + if (q->mq_ops->commit_rqs) + q->mq_ops->commit_rqs(hctx); + spin_lock(&hctx->lock); list_splice_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); @@ -1865,6 +1873,14 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, blk_mq_end_request(rq, ret); } } + + /* + * If we didn't flush the entire list, we could have told + * the driver there was more coming, but that turned out to + * be a lie. + */ + if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) + hctx->queue->mq_ops->commit_rqs(hctx); } static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b8de11e0603b..467f1dd21ccf 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -117,6 +117,7 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); /* takes rq->cmd_flags as input, returns a hardware type index */ typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); @@ -144,6 +145,15 @@ struct blk_mq_ops { */ queue_rq_fn *queue_rq; + /* + * If a driver uses bd->last to judge when to submit requests to + * hardware, it must define this function. In case of errors that + * make us stop issuing further requests, this hook serves the + * purpose of kicking the hardware (which the last request otherwise + * would have done). + */ + commit_rqs_fn *commit_rqs; + /* * Return a queue map type for the given request/bio flags */ -- cgit v1.2.3 From 20902be46c4da59b1891d238801146134e0e06b5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:38:56 -0800 Subject: net/mlx5: Driver events notifier API Use atomic notifier chain to fire events to mlx5 core driver consumers (mlx5e/mlx5_ib) and provide mlx5 register/unregister notifier API. This API will replace the current mlx5_interface->event callback and all the logic around it, especially the delayed events logic introduced by commit 97834eba7c19 ("net/mlx5: Delay events till ib registration ends") Which is not needed anymore with this new API where the mlx5 interface can dynamically register/unregister its notifier. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 25 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 1 + include/linux/mlx5/driver.h | 4 ++++ 3 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 3ad004af37d7..560cc14c55f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -35,7 +35,8 @@ static struct mlx5_nb events_nbs_ref[] = { struct mlx5_events { struct mlx5_core_dev *dev; struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; - + /* driver notifier chain */ + struct atomic_notifier_head nh; /* port module events stats */ struct mlx5_pme_stats pme_stats; }; @@ -300,6 +301,7 @@ int mlx5_events_init(struct mlx5_core_dev *dev) if (!events) return -ENOMEM; + ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); events->dev = dev; dev->priv.events = events; return 0; @@ -330,3 +332,24 @@ void mlx5_events_stop(struct mlx5_core_dev *dev) for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); } + +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_register(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_register); + +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_unregister(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_unregister); + +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) +{ + return atomic_notifier_call_chain(&events->nh, event, data); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 23317e328b0b..4d78a459676e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -73,5 +73,6 @@ struct mlx5_pme_stats { }; void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ba64ecf72478..b96929d0cc9c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -1062,6 +1063,9 @@ struct mlx5_interface { void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); + int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 58d180b34e98698fec178a469b700f1bb5a32c1f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:38:59 -0800 Subject: net/mlx5: Forward all mlx5 events to mlx5 notifiers chain This to allow seamless migration to the new notifier chain API, and to eventually deprecate interfaces dev->event callback. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 3 +++ include/linux/mlx5/driver.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 7eedbea38a78..d63ba8813829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -32,6 +32,7 @@ #include #include "mlx5_core.h" +#include "lib/mlx5.h" static LIST_HEAD(intf_list); static LIST_HEAD(mlx5_dev_list); @@ -425,6 +426,8 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, dev_ctx->intf->event(dev, dev_ctx->context, event, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); + + mlx5_notifier_call_chain(dev->priv.events, event, (void *)param); } void mlx5_dev_list_lock(void) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b96929d0cc9c..14ca74707275 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -195,7 +195,7 @@ struct mlx5_rsc_debug { }; enum mlx5_dev_event { - MLX5_DEV_EVENT_SYS_ERROR, + MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */ MLX5_DEV_EVENT_PORT_UP, MLX5_DEV_EVENT_PORT_DOWN, MLX5_DEV_EVENT_PORT_INITIALIZED, -- cgit v1.2.3 From 02039fb659b366011f55b15890136754f3d82e2d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:01 -0800 Subject: net/mlx5: Remove unused events callback and logic The mlx5_interface->event callback is not used by mlx5e/mlx5_ib anymore. We totally remove the delayed events logic work around, since with the dynamic notifier registration API it is not needed anymore, mlx5_ib can register its notifier and start receiving events exactly at the moment it is ready to handle them. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 109 +-------------------- drivers/net/ethernet/mellanox/mlx5/core/events.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 -- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 3 - include/linux/mlx5/driver.h | 10 +- 6 files changed, 11 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index d63ba8813829..d2ed14bc37c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -32,7 +32,6 @@ #include #include "mlx5_core.h" -#include "lib/mlx5.h" static LIST_HEAD(intf_list); static LIST_HEAD(mlx5_dev_list); @@ -46,75 +45,11 @@ struct mlx5_device_context { unsigned long state; }; -struct mlx5_delayed_event { - struct list_head list; - struct mlx5_core_dev *dev; - enum mlx5_dev_event event; - unsigned long param; -}; - enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; -static void add_delayed_event(struct mlx5_priv *priv, - struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_delayed_event *delayed_event; - - delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); - if (!delayed_event) { - mlx5_core_err(dev, "event %d is missed\n", event); - return; - } - - mlx5_core_dbg(dev, "Accumulating event %d\n", event); - delayed_event->dev = dev; - delayed_event->event = event; - delayed_event->param = param; - list_add_tail(&delayed_event->list, &priv->waiting_events_list); -} - -static void delayed_event_release(struct mlx5_device_context *dev_ctx, - struct mlx5_priv *priv) -{ - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - struct mlx5_delayed_event *de; - struct mlx5_delayed_event *n; - struct list_head temp; - - INIT_LIST_HEAD(&temp); - - spin_lock_irq(&priv->ctx_lock); - - priv->is_accum_events = false; - list_splice_init(&priv->waiting_events_list, &temp); - if (!dev_ctx->context) - goto out; - list_for_each_entry_safe(de, n, &temp, list) - dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); - -out: - spin_unlock_irq(&priv->ctx_lock); - - list_for_each_entry_safe(de, n, &temp, list) { - list_del(&de->list); - kfree(de); - } -} - -/* accumulating events that can come after mlx5_ib calls to - * ib_register_device, till adding that interface to the events list. - */ -static void delayed_event_start(struct mlx5_priv *priv) -{ - spin_lock_irq(&priv->ctx_lock); - priv->is_accum_events = true; - spin_unlock_irq(&priv->ctx_lock); -} void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { @@ -130,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) dev_ctx->intf = intf; - delayed_event_start(priv); - dev_ctx->context = intf->add(dev); if (dev_ctx->context) { set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); @@ -143,8 +76,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_unlock_irq(&priv->ctx_lock); } - delayed_event_release(dev_ctx, priv); - if (!dev_ctx->context) kfree(dev_ctx); } @@ -188,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; - delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - goto out; + return; if (intf->attach(dev, dev_ctx->context)) - goto out; - + return; set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - goto out; + return; dev_ctx->context = intf->add(dev); if (!dev_ctx->context) - goto out; - + return; set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } - -out: - delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -403,32 +328,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) return res; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - if (priv->is_accum_events) - add_delayed_event(priv, dev, event, param); - - /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is - * still in priv->ctx_list. In this case, only notify the dev_ctx if its - * ADDED or ATTACHED bit are set. - */ - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event && - (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || - test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - mlx5_notifier_call_chain(dev->priv.events, event, (void *)param); -} void mlx5_dev_list_lock(void) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index adab66eb726c..ab66f5d65a04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -178,8 +178,8 @@ static int port_change(struct notifier_block *nb, port, eqe->sub_type); } - if (dev->event && dev_event_dispatch) - dev->event(dev, dev_event, dev_event_data); + if (dev_event_dispatch) + mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); return NOTIFY_OK; } @@ -207,8 +207,8 @@ static int general_event(struct notifier_block *nb, unsigned long type, void *da eqe->sub_type); } - if (dev->event && dev_event_dispatch) - dev->event(dev, dev_event, dev_event_data); + if (dev_event_dispatch) + mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 4e42bd290959..196c07383082 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -39,6 +39,7 @@ #include #include "mlx5_core.h" #include "lib/eq.h" +#include "lib/mlx5.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -105,7 +106,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_cmd_trigger_completions(dev); } - mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); mlx5_core_err(dev, "end\n"); unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e56278ead4eb..4bc27a073dc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1125,12 +1125,6 @@ out: return err; } -struct mlx5_core_event_handler { - void (*event)(struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - void *data); -}; - static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -1164,7 +1158,6 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dev->pdev = pdev; - dev->event = mlx5_core_event; dev->profile = &profile[prof_sel]; INIT_LIST_HEAD(&priv->ctx_list); @@ -1172,9 +1165,6 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); - INIT_LIST_HEAD(&priv->waiting_events_list); - priv->is_accum_events = false; - mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); INIT_LIST_HEAD(&priv->bfregs.reg_head.list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c70bd94e18d6..fd3141a4b3f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -102,9 +102,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); - -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 14ca74707275..d3ffc64f9a75 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -588,10 +588,7 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; - - struct list_head waiting_events_list; - bool is_accum_events; - struct mlx5_events *events; + struct mlx5_events *events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; @@ -696,9 +693,6 @@ struct mlx5_core_dev { /* sync interface state */ struct mutex intf_state_mutex; unsigned long intf_state; - void (*event) (struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - unsigned long param); struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; @@ -1053,8 +1047,6 @@ struct mlx5_interface { void (*remove)(struct mlx5_core_dev *dev, void *context); int (*attach)(struct mlx5_core_dev *dev, void *context); void (*detach)(struct mlx5_core_dev *dev, void *context); - void (*event)(struct mlx5_core_dev *dev, void *context, - enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); int protocol; struct list_head list; -- cgit v1.2.3 From b8267cd765b333673e05696b517d38a1a7eb5b2e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:05 -0800 Subject: net/mlx5: Remove all deprecated software versions of FW events Before the new mlx5 event notification infrastructure and API, mlx5_core used to process all events before forwarding them to mlx5 interfaces (mlx5e/mlx5_ib) and used to translate the event type enum to a software defined enum, this is not needed anymore since it is ok for mlx5e and mlx5_ib to receive FW events as is, at least the few ones mlx5 core allows. mlx5e and mlx5_ib already moved to use the new API and they only handle FW events types, it is now safe to remove all equivalent software defined events and the logic around them. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 92 +----------------------- include/linux/mlx5/driver.h | 9 --- 2 files changed, 1 insertion(+), 100 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 735a9b038a73..3708b42c1d6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -19,8 +19,6 @@ struct mlx5_event_nb { * separate notifiers callbacks, specifically by those mlx5 components. */ static int any_notifier(struct notifier_block *, unsigned long, void *); -static int port_change(struct notifier_block *, unsigned long, void *); -static int general_event(struct notifier_block *, unsigned long, void *); static int temp_warn(struct notifier_block *, unsigned long, void *); static int port_module(struct notifier_block *, unsigned long, void *); @@ -28,9 +26,8 @@ static int port_module(struct notifier_block *, unsigned long, void *); static int forward_event(struct notifier_block *, unsigned long, void *); static struct mlx5_nb events_nbs_ref[] = { + /* Events to be proccessed by mlx5_core */ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, - {.nb.notifier_call = port_change, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, - {.nb.notifier_call = general_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, @@ -127,93 +124,6 @@ static int any_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static enum mlx5_dev_event port_subtype2dev(u8 subtype) -{ - switch (subtype) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - return MLX5_DEV_EVENT_PORT_DOWN; - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - return MLX5_DEV_EVENT_PORT_UP; - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - return MLX5_DEV_EVENT_PORT_INITIALIZED; - case MLX5_PORT_CHANGE_SUBTYPE_LID: - return MLX5_DEV_EVENT_LID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - return MLX5_DEV_EVENT_PKEY_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - return MLX5_DEV_EVENT_GUID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - return MLX5_DEV_EVENT_CLIENT_REREG; - } - return -1; -} - -/* type == MLX5_EVENT_TYPE_PORT_CHANGE */ -static int port_change(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); - struct mlx5_events *events = event_nb->ctx; - struct mlx5_core_dev *dev = events->dev; - - bool dev_event_dispatch = false; - enum mlx5_dev_event dev_event; - unsigned long dev_event_data; - struct mlx5_eqe *eqe = data; - u8 port = (eqe->data.port.port >> 4) & 0xf; - - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - dev_event = port_subtype2dev(eqe->sub_type); - dev_event_data = (unsigned long)port; - dev_event_dispatch = true; - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - - if (dev_event_dispatch) - mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); - - return NOTIFY_OK; -} - -/* type == MLX5_EVENT_TYPE_GENERAL_EVENT */ -static int general_event(struct notifier_block *nb, unsigned long type, void *data) -{ - struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); - struct mlx5_events *events = event_nb->ctx; - struct mlx5_core_dev *dev = events->dev; - - bool dev_event_dispatch = false; - enum mlx5_dev_event dev_event; - unsigned long dev_event_data; - struct mlx5_eqe *eqe = data; - - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - dev_event = MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT; - dev_event_data = 0; - dev_event_dispatch = true; - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } - - if (dev_event_dispatch) - mlx5_notifier_call_chain(events, dev_event, (void *)dev_event_data); - - return NOTIFY_OK; -} - /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d3ffc64f9a75..a77bedb8a556 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -196,15 +196,6 @@ struct mlx5_rsc_debug { enum mlx5_dev_event { MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */ - MLX5_DEV_EVENT_PORT_UP, - MLX5_DEV_EVENT_PORT_DOWN, - MLX5_DEV_EVENT_PORT_INITIALIZED, - MLX5_DEV_EVENT_LID_CHANGE, - MLX5_DEV_EVENT_PKEY_CHANGE, - MLX5_DEV_EVENT_GUID_CHANGE, - MLX5_DEV_EVENT_CLIENT_REREG, - MLX5_DEV_EVENT_PPS, - MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, }; enum mlx5_port_status { -- cgit v1.2.3 From 451be51c0b474f790e9833cd575fd9a6fbd679df Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:06 -0800 Subject: net/mlx5: Forward QP/WorkQueues resource events Allow forwarding QP and WQ events to mlx5_core interfaces, e.g. mlx5_ib Use mlx5_notifier_register/unregister in qp.c in order to allow seamless transition of qp.c to infiniband subsystem. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/qp.c | 8 ++++---- include/linux/mlx5/driver.h | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 3708b42c1d6b..201c5f6091ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -34,6 +34,16 @@ static struct mlx5_nb events_nbs_ref[] = { /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + /* QP/WQ resource events to forward */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, }; struct mlx5_events { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 28726c63101f..388f205a497f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -150,7 +150,7 @@ static int rsc_event_notifier(struct notifier_block *nb, return NOTIFY_DONE; } - table = mlx5_nb_cof(nb, struct mlx5_qp_table, nb); + table = container_of(nb, struct mlx5_qp_table, nb); priv = container_of(table, struct mlx5_priv, qp_table); dev = container_of(priv, struct mlx5_core_dev, priv); @@ -523,15 +523,15 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); - MLX5_NB_INIT(&table->nb, rsc_event_notifier, NOTIFY_ANY); - mlx5_eq_notifier_register(dev, &table->nb); + table->nb.notifier_call = rsc_event_notifier; + mlx5_notifier_register(dev, &table->nb); } void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) { struct mlx5_qp_table *table = &dev->priv.qp_table; - mlx5_eq_notifier_unregister(dev, &table->nb); + mlx5_notifier_unregister(dev, &table->nb); mlx5_qp_debugfs_cleanup(dev); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a77bedb8a556..4f078b7f6620 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -456,7 +456,7 @@ struct mlx5_core_health { }; struct mlx5_qp_table { - struct mlx5_nb nb; + struct notifier_block nb; /* protect radix tree */ -- cgit v1.2.3 From 4e2df04ad25ab8e627878817e56d6a27645ca4a8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Nov 2018 14:39:07 -0800 Subject: net/mlx5: Forward SRQ resource events Allow forwarding of SRQ events to mlx5_core interfaces, e.g. mlx5_ib. Use mlx5_notifier_register/unregister in srq.c in order to allow seamless transition of srq.c to infiniband subsystem. Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 3 ++ drivers/net/ethernet/mellanox/mlx5/core/srq.c | 38 +++++++----------------- include/linux/mlx5/driver.h | 3 +- 3 files changed, 14 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 201c5f6091ea..9e6e216faac3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -44,6 +44,9 @@ static struct mlx5_nb events_nbs_ref[] = { {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, + /* SRQ events */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, }; struct mlx5_events { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 0563866c13f2..79c5f0d57956 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -40,15 +40,21 @@ #include "mlx5_core.h" #include "lib/eq.h" -static int srq_event_notifier(struct mlx5_srq_table *table, +static int srq_event_notifier(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_srq_table *table; struct mlx5_core_dev *dev; struct mlx5_core_srq *srq; struct mlx5_priv *priv; struct mlx5_eqe *eqe; u32 srqn; + if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && + type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) + return NOTIFY_DONE; + + table = container_of(nb, struct mlx5_srq_table, nb); priv = container_of(table, struct mlx5_priv, srq_table); dev = container_of(priv, struct mlx5_core_dev, priv); @@ -77,26 +83,6 @@ static int srq_event_notifier(struct mlx5_srq_table *table, return NOTIFY_OK; } -static int catas_err_notifier(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_srq_table *table; - - table = mlx5_nb_cof(nb, struct mlx5_srq_table, catas_err_nb); - /* type == MLX5_EVENT_TYPE_SRQ_CATAS_ERROR */ - return srq_event_notifier(table, type, data); -} - -static int rq_limit_notifier(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_srq_table *table; - - table = mlx5_nb_cof(nb, struct mlx5_srq_table, rq_limit_nb); - /* type == MLX5_EVENT_TYPE_SRQ_RQ_LIMIT */ - return srq_event_notifier(table, type, data); -} - static int get_pas_size(struct mlx5_srq_attr *in) { u32 log_page_size = in->log_page_size + 12; @@ -743,17 +729,13 @@ void mlx5_init_srq_table(struct mlx5_core_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); - MLX5_NB_INIT(&table->catas_err_nb, catas_err_notifier, SRQ_CATAS_ERROR); - mlx5_eq_notifier_register(dev, &table->catas_err_nb); - - MLX5_NB_INIT(&table->rq_limit_nb, rq_limit_notifier, SRQ_RQ_LIMIT); - mlx5_eq_notifier_register(dev, &table->rq_limit_nb); + table->nb.notifier_call = srq_event_notifier; + mlx5_notifier_register(dev, &table->nb); } void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) { struct mlx5_srq_table *table = &dev->priv.srq_table; - mlx5_eq_notifier_unregister(dev, &table->rq_limit_nb); - mlx5_eq_notifier_unregister(dev, &table->catas_err_nb); + mlx5_notifier_unregister(dev, &table->nb); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4f078b7f6620..27a481b159ed 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -465,8 +465,7 @@ struct mlx5_qp_table { }; struct mlx5_srq_table { - struct mlx5_nb catas_err_nb; - struct mlx5_nb rq_limit_nb; + struct notifier_block nb; /* protect radix tree */ spinlock_t lock; -- cgit v1.2.3 From 23621fac32ec9dbc4afada344cbf82b0f6281be3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 18:32:40 -0500 Subject: function_graph: Remove unused task_curr_ret_stack() The static inline function task_curr_ret_stack() is unused, remove it. Reviewed-by: Joel Fernandes (Google) Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index dd16e8218db3..10bd46434908 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -809,11 +809,6 @@ extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu); -static inline int task_curr_ret_stack(struct task_struct *t) -{ - return t->curr_ret_stack; -} - static inline void pause_graph_tracing(void) { atomic_inc(¤t->tracing_graph_pause); @@ -838,11 +833,6 @@ static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, } static inline void unregister_ftrace_graph(void) { } -static inline int task_curr_ret_stack(struct task_struct *tsk) -{ - return -1; -} - static inline unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) -- cgit v1.2.3 From 47c33a095e1fae376d74b4160a0d73c1a4e73969 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Prakhya Date: Thu, 29 Nov 2018 18:12:25 +0100 Subject: x86/efi: Move efi__boot_services() to arch/x86 efi__boot_services() are x86 specific quirks and as such should be in asm/efi.h, so move them from linux/efi.h. Also, call efi_free_boot_services() from __efi_enter_virtual_mode() as it is x86 specific call and ideally shouldn't be part of init/main.c Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sedat Dilek Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-7-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 2 ++ arch/x86/platform/efi/efi.c | 2 ++ include/linux/efi.h | 3 --- init/main.c | 4 ---- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index eea40d52ca78..d1e64ac80b9c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -141,6 +141,8 @@ extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); +extern void efi_free_boot_services(void); +extern void efi_reserve_boot_services(void); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 7ae939e353cd..e1cb01a22fa8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -993,6 +993,8 @@ static void __init __efi_enter_virtual_mode(void) panic("EFI call to SetVirtualAddressMap() failed!"); } + efi_free_boot_services(); + /* * Now that EFI is in virtual mode, update the function * pointers in the runtime service table to the new virtual addresses. diff --git a/include/linux/efi.h b/include/linux/efi.h index 100ce4a4aff6..2b3b33c83b05 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1000,13 +1000,11 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec64 *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ #ifdef CONFIG_X86 -extern void efi_free_boot_services(void); extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, bool nonblocking); extern void efi_find_mirror(void); #else -static inline void efi_free_boot_services(void) {} static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, @@ -1046,7 +1044,6 @@ extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params); extern struct kobject *efi_kobj; diff --git a/init/main.c b/init/main.c index ee147103ba1b..ccefcd8e855f 100644 --- a/init/main.c +++ b/init/main.c @@ -737,10 +737,6 @@ asmlinkage __visible void __init start_kernel(void) arch_post_acpi_subsys_init(); sfi_init_late(); - if (efi_enabled(EFI_RUNTIME_SERVICES)) { - efi_free_boot_services(); - } - /* Do the rest non-__init'ed, we're now alive */ arch_call_rest_init(); } -- cgit v1.2.3 From 5f0b0ecf043a5319e729c11a53bc8294df12dab3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Nov 2018 18:12:28 +0100 Subject: efi: Permit multiple entries in persistent memreserve data structure In preparation of updating efi_mem_reserve_persistent() to cause less fragmentation when dealing with many persistent reservations, update the struct definition and the code that handles it currently so it can describe an arbitrary number of reservations using a single linked list entry. The actual optimization will be implemented in a subsequent patch. Tested-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-10-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 39 +++++++++++++++++++++++---------- drivers/firmware/efi/libstub/arm-stub.c | 2 +- include/linux/efi.h | 13 ++++++++--- 3 files changed, 38 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 415849bab233..80b11521627a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -602,21 +602,33 @@ int __init efi_apply_persistent_mem_reservations(void) while (prsv) { struct linux_efi_memreserve *rsv; - - /* reserve the entry itself */ - memblock_reserve(prsv, sizeof(*rsv)); - - rsv = early_memremap(prsv, sizeof(*rsv)); - if (rsv == NULL) { + u8 *p; + int i; + + /* + * Just map a full page: that is what we will get + * anyway, and it permits us to map the entire entry + * before knowing its size. + */ + p = early_memremap(ALIGN_DOWN(prsv, PAGE_SIZE), + PAGE_SIZE); + if (p == NULL) { pr_err("Could not map UEFI memreserve entry!\n"); return -ENOMEM; } - if (rsv->size) - memblock_reserve(rsv->base, rsv->size); + rsv = (void *)(p + prsv % PAGE_SIZE); + + /* reserve the entry itself */ + memblock_reserve(prsv, EFI_MEMRESERVE_SIZE(rsv->size)); + + for (i = 0; i < atomic_read(&rsv->count); i++) { + memblock_reserve(rsv->entry[i].base, + rsv->entry[i].size); + } prsv = rsv->next; - early_memunmap(rsv, sizeof(*rsv)); + early_memunmap(p, PAGE_SIZE); } } @@ -985,6 +997,7 @@ static int __init efi_memreserve_map_root(void) int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) { struct linux_efi_memreserve *rsv; + int rsvsize = EFI_MEMRESERVE_SIZE(1); int rc; if (efi_memreserve_root == (void *)ULONG_MAX) @@ -996,12 +1009,14 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) return rc; } - rsv = kmalloc(sizeof(*rsv), GFP_ATOMIC); + rsv = kmalloc(rsvsize, GFP_ATOMIC); if (!rsv) return -ENOMEM; - rsv->base = addr; - rsv->size = size; + rsv->size = 1; + atomic_set(&rsv->count, 1); + rsv->entry[0].base = addr; + rsv->entry[0].size = size; spin_lock(&efi_mem_reserve_persistent_lock); rsv->next = efi_memreserve_root->next; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 3d36142cf812..9e20159ea5f5 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -86,8 +86,8 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) } rsv->next = 0; - rsv->base = 0; rsv->size = 0; + atomic_set(&rsv->count, 0); status = efi_call_early(install_configuration_table, &memreserve_table_guid, diff --git a/include/linux/efi.h b/include/linux/efi.h index 2b3b33c83b05..4f27640fdcdc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1712,9 +1712,16 @@ extern struct efi_runtime_work efi_rts_work; extern struct workqueue_struct *efi_rts_wq; struct linux_efi_memreserve { - phys_addr_t next; - phys_addr_t base; - phys_addr_t size; + int size; // allocated size of the array + atomic_t count; // number of entries used + phys_addr_t next; // pa of next struct instance + struct { + phys_addr_t base; + phys_addr_t size; + } entry[0]; }; +#define EFI_MEMRESERVE_SIZE(count) (sizeof(struct linux_efi_memreserve) + \ + (count) * sizeof(((struct linux_efi_memreserve *)0)->entry[0])) + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 80424b02d42bb22f8ff8839cb93a84ade53b39c0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Nov 2018 18:12:29 +0100 Subject: efi: Reduce the amount of memblock reservations for persistent allocations The current implementation of efi_mem_reserve_persistent() is rather naive, in the sense that for each invocation, it creates a separate linked list entry to describe the reservation. Since the linked list entries themselves need to persist across subsequent kexec reboots, every reservation created this way results in two memblock_reserve() calls at the next boot. On arm64 systems with 100s of CPUs, this may result in a excessive number of memblock reservations, and needless fragmentation. So instead, make use of the newly updated struct linux_efi_memreserve layout to put multiple reservations into a single linked list entry. This should get rid of the numerous tiny memblock reservations, and effectively cut the total number of reservations in half on arm64 systems with many CPUs. [ mingo: build warning fix. ] Tested-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-11-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 21 +++++++++++++++++---- include/linux/efi.h | 3 +++ 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 80b11521627a..4c46ff6f2242 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -997,8 +997,8 @@ static int __init efi_memreserve_map_root(void) int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) { struct linux_efi_memreserve *rsv; - int rsvsize = EFI_MEMRESERVE_SIZE(1); - int rc; + unsigned long prsv; + int rc, index; if (efi_memreserve_root == (void *)ULONG_MAX) return -ENODEV; @@ -1009,11 +1009,24 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) return rc; } - rsv = kmalloc(rsvsize, GFP_ATOMIC); + /* first try to find a slot in an existing linked list entry */ + for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + rsv = __va(prsv); + index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); + if (index < rsv->size) { + rsv->entry[index].base = addr; + rsv->entry[index].size = size; + + return 0; + } + } + + /* no slot found - allocate a new linked list entry */ + rsv = (struct linux_efi_memreserve *)__get_free_page(GFP_ATOMIC); if (!rsv) return -ENOMEM; - rsv->size = 1; + rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE); atomic_set(&rsv->count, 1); rsv->entry[0].base = addr; rsv->entry[0].size = size; diff --git a/include/linux/efi.h b/include/linux/efi.h index 4f27640fdcdc..becd5d76a207 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1724,4 +1724,7 @@ struct linux_efi_memreserve { #define EFI_MEMRESERVE_SIZE(count) (sizeof(struct linux_efi_memreserve) + \ (count) * sizeof(((struct linux_efi_memreserve *)0)->entry[0])) +#define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ + / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From ad697a1aecac19ec351063b5d8e6fc9d4bca7ee5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 15 Nov 2018 22:41:58 +0000 Subject: linkage: add generic GLOBAL() macro Declaring a global symbol in assembly is tedious, error-prone, and painful to read. While ENTRY() exists, this is supposed to be used for function entry points, and this affects alignment in a potentially undesireable manner. Instead, let's add a generic GLOBAL() macro for this, as x86 added locally in commit: 95695547a7db44b8 ("x86: asm linkage - introduce GLOBAL macro") ... thus allowing us to use this more freely in the kernel. Signed-off-by: Mark Rutland Cc: AKASHI Takahiro Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Torsten Duwe Cc: Will Deacon Signed-off-by: Will Deacon --- include/linux/linkage.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 7c47b1a471d4..7e020782ade2 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -79,6 +79,12 @@ #define ALIGN __ALIGN #define ALIGN_STR __ALIGN_STR +#ifndef GLOBAL +#define GLOBAL(name) \ + .globl name ASM_NL \ + name: +#endif + #ifndef ENTRY #define ENTRY(name) \ .globl name ASM_NL \ -- cgit v1.2.3 From ada5c1da8660ecae24b3e75c18ee77d79e099fee Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 30 Nov 2018 10:04:08 +1100 Subject: fs/locks: rename some lists and pointers. struct file lock contains an 'fl_next' pointer which is used to point to the lock that this request is blocked waiting for. So rename it to fl_blocker. The fl_blocked list_head in an active lock is the head of a list of blocked requests. In a request it is a node in that list. These are two distinct uses, so replace with two list_heads with different names. fl_blocked_requests is the head of a list of blocked requests fl_blocked_member is a node in a member of that list. The two different list_heads are never used at the same time, but that will change in a future patch. Note that a tracepoint is changed to report fl_blocker instead of fl_next. Signed-off-by: NeilBrown Reviewed-by: J. Bruce Fields Signed-off-by: Jeff Layton --- fs/cifs/file.c | 2 +- fs/locks.c | 59 ++++++++++++++++++++++------------------- include/linux/fs.h | 9 +++++-- include/trace/events/filelock.h | 16 +++++------ 4 files changed, 47 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 74c33d5fafc8..d7ed895e05d1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1103,7 +1103,7 @@ try_again: rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); + rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); if (!rc) goto try_again; posix_unblock_lock(flock); diff --git a/fs/locks.c b/fs/locks.c index 2ecb4db8c840..c6df0c8b3d13 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -189,9 +189,9 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * - * In addition, it also protects the fl->fl_block list, and the fl->fl_next - * pointer for file_lock structures that are acting as lock requests (in - * contrast to those that are acting as records of acquired locks). + * In addition, it also protects the fl->fl_blocked_requests list, and the + * fl->fl_blocker pointer for file_lock structures that are acting as lock + * requests (in contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, * we often hold the flc_lock as well. In certain cases, when reading the fields @@ -293,7 +293,8 @@ static void locks_init_lock_heads(struct file_lock *fl) { INIT_HLIST_NODE(&fl->fl_link); INIT_LIST_HEAD(&fl->fl_list); - INIT_LIST_HEAD(&fl->fl_block); + INIT_LIST_HEAD(&fl->fl_blocked_requests); + INIT_LIST_HEAD(&fl->fl_blocked_member); init_waitqueue_head(&fl->fl_wait); } @@ -332,7 +333,8 @@ void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); BUG_ON(!list_empty(&fl->fl_list)); - BUG_ON(!list_empty(&fl->fl_block)); + BUG_ON(!list_empty(&fl->fl_blocked_requests)); + BUG_ON(!list_empty(&fl->fl_blocked_member)); BUG_ON(!hlist_unhashed(&fl->fl_link)); locks_release_private(fl); @@ -666,8 +668,8 @@ static void locks_delete_global_blocked(struct file_lock *waiter) static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); - list_del_init(&waiter->fl_block); - waiter->fl_next = NULL; + list_del_init(&waiter->fl_blocked_member); + waiter->fl_blocker = NULL; } static void locks_delete_block(struct file_lock *waiter) @@ -683,16 +685,17 @@ static void locks_delete_block(struct file_lock *waiter) * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The - * fl_block list itself is protected by the blocked_lock_lock, but by ensuring - * that the flc_lock is also held on insertions we can avoid taking the - * blocked_lock_lock in some cases when we see that the fl_block list is empty. + * fl_blocked_requests list itself is protected by the blocked_lock_lock, + * but by ensuring that the flc_lock is also held on insertions we can avoid + * taking the blocked_lock_lock in some cases when we see that the + * fl_blocked_requests list is empty. */ static void __locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { - BUG_ON(!list_empty(&waiter->fl_block)); - waiter->fl_next = blocker; - list_add_tail(&waiter->fl_block, &blocker->fl_block); + BUG_ON(!list_empty(&waiter->fl_blocked_member)); + waiter->fl_blocker = blocker; + list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); } @@ -716,19 +719,19 @@ static void locks_wake_up_blocks(struct file_lock *blocker) /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and - * the flc_lock is always held here. Note that removal from the fl_block - * list does not require the flc_lock, so we must recheck list_empty() - * after acquiring the blocked_lock_lock. + * the flc_lock is always held here. Note that removal from the + * fl_blocked_requests list does not require the flc_lock, so we must + * recheck list_empty() after acquiring the blocked_lock_lock. */ - if (list_empty(&blocker->fl_block)) + if (list_empty(&blocker->fl_blocked_requests)) return; spin_lock(&blocked_lock_lock); - while (!list_empty(&blocker->fl_block)) { + while (!list_empty(&blocker->fl_blocked_requests)) { struct file_lock *waiter; - waiter = list_first_entry(&blocker->fl_block, - struct file_lock, fl_block); + waiter = list_first_entry(&blocker->fl_blocked_requests, + struct file_lock, fl_blocked_member); __locks_delete_block(waiter); if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) waiter->fl_lmops->lm_notify(waiter); @@ -878,7 +881,7 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { if (posix_same_owner(fl, block_fl)) - return fl->fl_next; + return fl->fl_blocker; } return NULL; } @@ -1237,7 +1240,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); if (!error) continue; @@ -1324,7 +1327,7 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, error = posix_lock_inode(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); + error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker); if (!error) { /* * If we've been sleeping someone might have @@ -1518,7 +1521,7 @@ restart: locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, - !new_fl->fl_next, break_time); + !new_fl->fl_blocker, break_time); percpu_down_read_preempt_disable(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1931,7 +1934,7 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); if (!error) continue; @@ -2210,7 +2213,7 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); if (!error) continue; @@ -2581,7 +2584,7 @@ posix_unblock_lock(struct file_lock *waiter) int status = 0; spin_lock(&blocked_lock_lock); - if (waiter->fl_next) + if (waiter->fl_blocker) __locks_delete_block(waiter); else status = -ENOENT; @@ -2707,7 +2710,7 @@ static int locks_show(struct seq_file *f, void *v) lock_get_status(f, fl, iter->li_pos, ""); - list_for_each_entry(bfl, &fl->fl_block, fl_block) + list_for_each_entry(bfl, &fl->fl_blocked_requests, fl_blocked_member) lock_get_status(f, bfl, iter->li_pos, " ->"); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index c95c0807471f..16df3a7df378 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1044,10 +1044,15 @@ bool opens_in_grace(struct net *); * Obviously, the last two criteria only matter for POSIX locks. */ struct file_lock { - struct file_lock *fl_next; /* singly linked list for this inode */ + struct file_lock *fl_blocker; /* The lock, that is blocking us */ struct list_head fl_list; /* link into file_lock_context */ struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_block; /* circular list of blocked processes */ + struct list_head fl_blocked_requests; /* list of requests with + * ->fl_blocker pointing here + */ + struct list_head fl_blocked_member; /* node in + * ->fl_blocker->fl_blocked_requests + */ fl_owner_t fl_owner; unsigned int fl_flags; unsigned char fl_type; diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index 68b17c116907..fad7befa612d 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(filelock_lock, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, fl_next) + __field(struct file_lock *, fl_blocker) __field(fl_owner_t, fl_owner) __field(unsigned int, fl_pid) __field(unsigned int, fl_flags) @@ -82,7 +82,7 @@ DECLARE_EVENT_CLASS(filelock_lock, __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->fl_next = fl ? fl->fl_next : NULL; + __entry->fl_blocker = fl ? fl->fl_blocker : NULL; __entry->fl_owner = fl ? fl->fl_owner : NULL; __entry->fl_pid = fl ? fl->fl_pid : 0; __entry->fl_flags = fl ? fl->fl_flags : 0; @@ -92,9 +92,9 @@ DECLARE_EVENT_CLASS(filelock_lock, __entry->ret = ret; ), - TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", + TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_blocker=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), - __entry->i_ino, __entry->fl_next, __entry->fl_owner, + __entry->i_ino, __entry->fl_blocker, __entry->fl_owner, __entry->fl_pid, show_fl_flags(__entry->fl_flags), show_fl_type(__entry->fl_type), __entry->fl_start, __entry->fl_end, __entry->ret) @@ -125,7 +125,7 @@ DECLARE_EVENT_CLASS(filelock_lease, __field(struct file_lock *, fl) __field(unsigned long, i_ino) __field(dev_t, s_dev) - __field(struct file_lock *, fl_next) + __field(struct file_lock *, fl_blocker) __field(fl_owner_t, fl_owner) __field(unsigned int, fl_flags) __field(unsigned char, fl_type) @@ -137,7 +137,7 @@ DECLARE_EVENT_CLASS(filelock_lease, __entry->fl = fl ? fl : NULL; __entry->s_dev = inode->i_sb->s_dev; __entry->i_ino = inode->i_ino; - __entry->fl_next = fl ? fl->fl_next : NULL; + __entry->fl_blocker = fl ? fl->fl_blocker : NULL; __entry->fl_owner = fl ? fl->fl_owner : NULL; __entry->fl_flags = fl ? fl->fl_flags : 0; __entry->fl_type = fl ? fl->fl_type : 0; @@ -145,9 +145,9 @@ DECLARE_EVENT_CLASS(filelock_lease, __entry->fl_downgrade_time = fl ? fl->fl_downgrade_time : 0; ), - TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_flags=%s fl_type=%s fl_break_time=%lu fl_downgrade_time=%lu", + TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_blocker=0x%p fl_owner=0x%p fl_flags=%s fl_type=%s fl_break_time=%lu fl_downgrade_time=%lu", __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), - __entry->i_ino, __entry->fl_next, __entry->fl_owner, + __entry->i_ino, __entry->fl_blocker, __entry->fl_owner, show_fl_flags(__entry->fl_flags), show_fl_type(__entry->fl_type), __entry->fl_break_time, __entry->fl_downgrade_time) -- cgit v1.2.3 From 36907cd5cd720c5a6d36670b49eba3b1f7f4d8fe Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Nov 2018 18:16:02 +0200 Subject: qed: Add doorbell overflow recovery mechanism Add the database used to register doorbelling entities, and APIs for adding and deleting entries, and logic for traversing the database and doorbelling once on behalf of all entities. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Tomer Tayar Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 17 ++ drivers/net/ethernet/qlogic/qed/qed_dev.c | 320 ++++++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 28 +++ include/linux/qed/qed_if.h | 14 ++ 4 files changed, 379 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index d9a03aba0e02..fb399ee681d3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -296,6 +296,12 @@ enum qed_wol_support { QED_WOL_SUPPORT_PME, }; +enum qed_db_rec_exec { + DB_REC_DRY_RUN, + DB_REC_REAL_DEAL, + DB_REC_ONCE, +}; + struct qed_hw_info { /* PCI personality */ enum qed_pci_personality personality; @@ -425,6 +431,14 @@ struct qed_qm_info { u8 num_pf_rls; }; +struct qed_db_recovery_info { + struct list_head list; + + /* Lock to protect the doorbell recovery mechanism list */ + spinlock_t lock; + u32 db_recovery_counter; +}; + struct storm_stats { u32 address; u32 len; @@ -640,6 +654,9 @@ struct qed_hwfn { /* L2-related */ struct qed_l2_info *p_l2_info; + /* Mechanism for recovering from doorbell drop */ + struct qed_db_recovery_info db_recovery_info; + /* Nvm images number and attributes */ struct qed_nvm_image_info nvm_info; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 88a8576ca9ce..19b8a6d72832 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -66,6 +66,318 @@ static DEFINE_SPINLOCK(qm_lock); +/******************** Doorbell Recovery *******************/ +/* The doorbell recovery mechanism consists of a list of entries which represent + * doorbelling entities (l2 queues, roce sq/rq/cqs, the slowpath spq, etc). Each + * entity needs to register with the mechanism and provide the parameters + * describing it's doorbell, including a location where last used doorbell data + * can be found. The doorbell execute function will traverse the list and + * doorbell all of the registered entries. + */ +struct qed_db_recovery_entry { + struct list_head list_entry; + void __iomem *db_addr; + void *db_data; + enum qed_db_rec_width db_width; + enum qed_db_rec_space db_space; + u8 hwfn_idx; +}; + +/* Display a single doorbell recovery entry */ +static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn, + struct qed_db_recovery_entry *db_entry, + char *action) +{ + DP_VERBOSE(p_hwfn, + QED_MSG_SPQ, + "(%s: db_entry %p, addr %p, data %p, width %s, %s space, hwfn %d)\n", + action, + db_entry, + db_entry->db_addr, + db_entry->db_data, + db_entry->db_width == DB_REC_WIDTH_32B ? "32b" : "64b", + db_entry->db_space == DB_REC_USER ? "user" : "kernel", + db_entry->hwfn_idx); +} + +/* Doorbell address sanity (address within doorbell bar range) */ +static bool qed_db_rec_sanity(struct qed_dev *cdev, + void __iomem *db_addr, void *db_data) +{ + /* Make sure doorbell address is within the doorbell bar */ + if (db_addr < cdev->doorbells || + (u8 __iomem *)db_addr > + (u8 __iomem *)cdev->doorbells + cdev->db_size) { + WARN(true, + "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n", + db_addr, + cdev->doorbells, + (u8 __iomem *)cdev->doorbells + cdev->db_size); + return false; + } + + /* ake sure doorbell data pointer is not null */ + if (!db_data) { + WARN(true, "Illegal doorbell data pointer: %p", db_data); + return false; + } + + return true; +} + +/* Find hwfn according to the doorbell address */ +static struct qed_hwfn *qed_db_rec_find_hwfn(struct qed_dev *cdev, + void __iomem *db_addr) +{ + struct qed_hwfn *p_hwfn; + + /* In CMT doorbell bar is split down the middle between engine 0 and enigne 1 */ + if (cdev->num_hwfns > 1) + p_hwfn = db_addr < cdev->hwfns[1].doorbells ? + &cdev->hwfns[0] : &cdev->hwfns[1]; + else + p_hwfn = QED_LEADING_HWFN(cdev); + + return p_hwfn; +} + +/* Add a new entry to the doorbell recovery mechanism */ +int qed_db_recovery_add(struct qed_dev *cdev, + void __iomem *db_addr, + void *db_data, + enum qed_db_rec_width db_width, + enum qed_db_rec_space db_space) +{ + struct qed_db_recovery_entry *db_entry; + struct qed_hwfn *p_hwfn; + + /* Shortcircuit VFs, for now */ + if (IS_VF(cdev)) { + DP_VERBOSE(cdev, + QED_MSG_IOV, "db recovery - skipping VF doorbell\n"); + return 0; + } + + /* Sanitize doorbell address */ + if (!qed_db_rec_sanity(cdev, db_addr, db_data)) + return -EINVAL; + + /* Obtain hwfn from doorbell address */ + p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr); + + /* Create entry */ + db_entry = kzalloc(sizeof(*db_entry), GFP_KERNEL); + if (!db_entry) { + DP_NOTICE(cdev, "Failed to allocate a db recovery entry\n"); + return -ENOMEM; + } + + /* Populate entry */ + db_entry->db_addr = db_addr; + db_entry->db_data = db_data; + db_entry->db_width = db_width; + db_entry->db_space = db_space; + db_entry->hwfn_idx = p_hwfn->my_id; + + /* Display */ + qed_db_recovery_dp_entry(p_hwfn, db_entry, "Adding"); + + /* Protect the list */ + spin_lock_bh(&p_hwfn->db_recovery_info.lock); + list_add_tail(&db_entry->list_entry, &p_hwfn->db_recovery_info.list); + spin_unlock_bh(&p_hwfn->db_recovery_info.lock); + + return 0; +} + +/* Remove an entry from the doorbell recovery mechanism */ +int qed_db_recovery_del(struct qed_dev *cdev, + void __iomem *db_addr, void *db_data) +{ + struct qed_db_recovery_entry *db_entry = NULL; + struct qed_hwfn *p_hwfn; + int rc = -EINVAL; + + /* Shortcircuit VFs, for now */ + if (IS_VF(cdev)) { + DP_VERBOSE(cdev, + QED_MSG_IOV, "db recovery - skipping VF doorbell\n"); + return 0; + } + + /* Sanitize doorbell address */ + if (!qed_db_rec_sanity(cdev, db_addr, db_data)) + return -EINVAL; + + /* Obtain hwfn from doorbell address */ + p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr); + + /* Protect the list */ + spin_lock_bh(&p_hwfn->db_recovery_info.lock); + list_for_each_entry(db_entry, + &p_hwfn->db_recovery_info.list, list_entry) { + /* search according to db_data addr since db_addr is not unique (roce) */ + if (db_entry->db_data == db_data) { + qed_db_recovery_dp_entry(p_hwfn, db_entry, "Deleting"); + list_del(&db_entry->list_entry); + rc = 0; + break; + } + } + + spin_unlock_bh(&p_hwfn->db_recovery_info.lock); + + if (rc == -EINVAL) + + DP_NOTICE(p_hwfn, + "Failed to find element in list. Key (db_data addr) was %p. db_addr was %p\n", + db_data, db_addr); + else + kfree(db_entry); + + return rc; +} + +/* Initialize the doorbell recovery mechanism */ +static int qed_db_recovery_setup(struct qed_hwfn *p_hwfn) +{ + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Setting up db recovery\n"); + + /* Make sure db_size was set in cdev */ + if (!p_hwfn->cdev->db_size) { + DP_ERR(p_hwfn->cdev, "db_size not set\n"); + return -EINVAL; + } + + INIT_LIST_HEAD(&p_hwfn->db_recovery_info.list); + spin_lock_init(&p_hwfn->db_recovery_info.lock); + p_hwfn->db_recovery_info.db_recovery_counter = 0; + + return 0; +} + +/* Destroy the doorbell recovery mechanism */ +static void qed_db_recovery_teardown(struct qed_hwfn *p_hwfn) +{ + struct qed_db_recovery_entry *db_entry = NULL; + + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Tearing down db recovery\n"); + if (!list_empty(&p_hwfn->db_recovery_info.list)) { + DP_VERBOSE(p_hwfn, + QED_MSG_SPQ, + "Doorbell Recovery teardown found the doorbell recovery list was not empty (Expected in disorderly driver unload (e.g. recovery) otherwise this probably means some flow forgot to db_recovery_del). Prepare to purge doorbell recovery list...\n"); + while (!list_empty(&p_hwfn->db_recovery_info.list)) { + db_entry = + list_first_entry(&p_hwfn->db_recovery_info.list, + struct qed_db_recovery_entry, + list_entry); + qed_db_recovery_dp_entry(p_hwfn, db_entry, "Purging"); + list_del(&db_entry->list_entry); + kfree(db_entry); + } + } + p_hwfn->db_recovery_info.db_recovery_counter = 0; +} + +/* Print the content of the doorbell recovery mechanism */ +void qed_db_recovery_dp(struct qed_hwfn *p_hwfn) +{ + struct qed_db_recovery_entry *db_entry = NULL; + + DP_NOTICE(p_hwfn, + "Dispalying doorbell recovery database. Counter was %d\n", + p_hwfn->db_recovery_info.db_recovery_counter); + + /* Protect the list */ + spin_lock_bh(&p_hwfn->db_recovery_info.lock); + list_for_each_entry(db_entry, + &p_hwfn->db_recovery_info.list, list_entry) { + qed_db_recovery_dp_entry(p_hwfn, db_entry, "Printing"); + } + + spin_unlock_bh(&p_hwfn->db_recovery_info.lock); +} + +/* Ring the doorbell of a single doorbell recovery entry */ +static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn, + struct qed_db_recovery_entry *db_entry, + enum qed_db_rec_exec db_exec) +{ + if (db_exec != DB_REC_ONCE) { + /* Print according to width */ + if (db_entry->db_width == DB_REC_WIDTH_32B) { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "%s doorbell address %p data %x\n", + db_exec == DB_REC_DRY_RUN ? + "would have rung" : "ringing", + db_entry->db_addr, + *(u32 *)db_entry->db_data); + } else { + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "%s doorbell address %p data %llx\n", + db_exec == DB_REC_DRY_RUN ? + "would have rung" : "ringing", + db_entry->db_addr, + *(u64 *)(db_entry->db_data)); + } + } + + /* Sanity */ + if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr, + db_entry->db_data)) + return; + + /* Flush the write combined buffer. Since there are multiple doorbelling + * entities using the same address, if we don't flush, a transaction + * could be lost. + */ + wmb(); + + /* Ring the doorbell */ + if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) { + if (db_entry->db_width == DB_REC_WIDTH_32B) + DIRECT_REG_WR(db_entry->db_addr, + *(u32 *)(db_entry->db_data)); + else + DIRECT_REG_WR64(db_entry->db_addr, + *(u64 *)(db_entry->db_data)); + } + + /* Flush the write combined buffer. Next doorbell may come from a + * different entity to the same address... + */ + wmb(); +} + +/* Traverse the doorbell recovery entry list and ring all the doorbells */ +void qed_db_recovery_execute(struct qed_hwfn *p_hwfn, + enum qed_db_rec_exec db_exec) +{ + struct qed_db_recovery_entry *db_entry = NULL; + + if (db_exec != DB_REC_ONCE) { + DP_NOTICE(p_hwfn, + "Executing doorbell recovery. Counter was %d\n", + p_hwfn->db_recovery_info.db_recovery_counter); + + /* Track amount of times recovery was executed */ + p_hwfn->db_recovery_info.db_recovery_counter++; + } + + /* Protect the list */ + spin_lock_bh(&p_hwfn->db_recovery_info.lock); + list_for_each_entry(db_entry, + &p_hwfn->db_recovery_info.list, list_entry) { + qed_db_recovery_ring(p_hwfn, db_entry, db_exec); + if (db_exec == DB_REC_ONCE) + break; + } + + spin_unlock_bh(&p_hwfn->db_recovery_info.lock); +} + +/******************** Doorbell Recovery end ****************/ + #define QED_MIN_DPIS (4) #define QED_MIN_PWM_REGION (QED_WID_SIZE * QED_MIN_DPIS) @@ -194,6 +506,9 @@ void qed_resc_free(struct qed_dev *cdev) qed_dmae_info_free(p_hwfn); qed_dcbx_info_free(p_hwfn); qed_dbg_user_data_free(p_hwfn); + + /* Destroy doorbell recovery mechanism */ + qed_db_recovery_teardown(p_hwfn); } } @@ -969,6 +1284,11 @@ int qed_resc_alloc(struct qed_dev *cdev) struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; u32 n_eqes, num_cons; + /* Initialize the doorbell recovery mechanism */ + rc = qed_db_recovery_setup(p_hwfn); + if (rc) + goto alloc_err; + /* First allocate the context manager structure */ rc = qed_cxt_mngr_alloc(p_hwfn); if (rc) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index defdda1ffaa2..acccd85170aa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -472,6 +472,34 @@ int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle); int qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle); +/** + * @brief db_recovery_add - add doorbell information to the doorbell + * recovery mechanism. + * + * @param cdev + * @param db_addr - doorbell address + * @param db_data - address of where db_data is stored + * @param db_width - doorbell is 32b pr 64b + * @param db_space - doorbell recovery addresses are user or kernel space + */ +int qed_db_recovery_add(struct qed_dev *cdev, + void __iomem *db_addr, + void *db_data, + enum qed_db_rec_width db_width, + enum qed_db_rec_space db_space); + +/** + * @brief db_recovery_del - remove doorbell information from the doorbell + * recovery mechanism. db_data serves as key (db_addr is not unique). + * + * @param cdev + * @param db_addr - doorbell address + * @param db_data - address where db_data is stored. Serves as key for the + * entry to delete. + */ +int qed_db_recovery_del(struct qed_dev *cdev, + void __iomem *db_addr, void *db_data); + const char *qed_hw_get_resc_name(enum qed_resources res_id); #endif diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index a47321a0d572..eb851f89f417 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -47,6 +47,7 @@ #include #include #include +#include enum dcbx_protocol_type { DCBX_PROTOCOL_ISCSI, @@ -448,11 +449,24 @@ struct qed_mfw_tlv_iscsi { bool tx_bytes_set; }; +enum qed_db_rec_width { + DB_REC_WIDTH_32B, + DB_REC_WIDTH_64B, +}; + +enum qed_db_rec_space { + DB_REC_KERNEL, + DB_REC_USER, +}; + #define DIRECT_REG_WR(reg_addr, val) writel((u32)val, \ (void __iomem *)(reg_addr)) #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) +#define DIRECT_REG_WR64(reg_addr, val) writeq((u32)val, \ + (void __iomem *)(reg_addr)) + #define QED_COALESCE_MAX 0x1FF #define QED_DEFAULT_RX_USECS 12 #define QED_DEFAULT_TX_USECS 48 -- cgit v1.2.3 From 0e1f10447e2aa79ba7d8960e5d0ed3cf2ea8356e Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Nov 2018 18:16:06 +0200 Subject: qed: Expose the doorbell overflow recovery mechanism to the protocol drivers Most of the doorbelling entities are outside of the core module. L2 queues, Roce queues, iscsi and fcoe all need to register. Make the APIs available for these drivers. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Tomer Tayar Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ include/linux/qed/qed_if.h | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 5ec3f5d1d6b2..6adf5bda9811 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -2384,6 +2384,8 @@ const struct qed_common_ops qed_common_ops_pass = { .update_mac = &qed_update_mac, .update_mtu = &qed_update_mtu, .update_wol = &qed_update_wol, + .db_recovery_add = &qed_db_recovery_add, + .db_recovery_del = &qed_db_recovery_del, .read_module_eeprom = &qed_read_module_eeprom, }; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index eb851f89f417..91c536a01b56 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1029,6 +1029,33 @@ struct qed_common_ops { */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); +/** + * @brief db_recovery_add - add doorbell information to the doorbell + * recovery mechanism. + * + * @param cdev + * @param db_addr - doorbell address + * @param db_data - address of where db_data is stored + * @param db_is_32b - doorbell is 32b pr 64b + * @param db_is_user - doorbell recovery addresses are user or kernel space + */ + int (*db_recovery_add)(struct qed_dev *cdev, + void __iomem *db_addr, + void *db_data, + enum qed_db_rec_width db_width, + enum qed_db_rec_space db_space); + +/** + * @brief db_recovery_del - remove doorbell information from the doorbell + * recovery mechanism. db_data serves as key (db_addr is not unique). + * + * @param cdev + * @param db_addr - doorbell address + * @param db_data - address where db_data is stored. Serves as key for the + * entry to delete. + */ + int (*db_recovery_del)(struct qed_dev *cdev, + void __iomem *db_addr, void *db_data); /** * @brief update_drv_state - API to inform the change in the driver state. -- cgit v1.2.3 From ea86ea2cdced20057da4d2c32965c1219c238197 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Nov 2018 13:18:06 -0700 Subject: sbitmap: ammortize cost of clearing bits sbitmap maintains a set of words that we use to set and clear bits, with each bit representing a tag for blk-mq. Even though we spread the bits out and maintain a hint cache, one particular bit allocated will end up being cleared in the exact same spot. This introduces batched clearing of bits. Instead of clearing a given bit, the same bit is set in a cleared/free mask instead. If we fail allocating a bit from a given word, then we check the free mask, and batch move those cleared bits at that time. This trades 64 atomic bitops for 2 cmpxchg(). In a threaded poll test case, half the overhead of getting and clearing tags is removed with this change. On another poll test case with a single thread, performance is unchanged. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 33 ++++++++++++++++---- lib/sbitmap.c | 81 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 100 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 804a50983ec5..81359d45751e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -30,14 +30,24 @@ struct seq_file; */ struct sbitmap_word { /** - * @word: The bitmap word itself. + * @depth: Number of bits being used in @word/@cleared */ - unsigned long word; + unsigned long depth; /** - * @depth: Number of bits being used in @word. + * @word: word holding free bits */ - unsigned long depth; + unsigned long word ____cacheline_aligned_in_smp; + + /** + * @cleared: word holding cleared bits + */ + unsigned long cleared ____cacheline_aligned_in_smp; + + /** + * @swap_lock: Held while swapping word <-> cleared + */ + spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** @@ -310,6 +320,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } +/* + * This one is special, since it doesn't actually clear the bit, rather it + * sets the corresponding bit in the ->cleared mask instead. Paired with + * the caller doing sbitmap_batch_clear() if a given index is full, which + * will clear the previously freed entries in the corresponding ->word. + */ +static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) +{ + unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; + + set_bit(SB_NR_TO_BIT(sb, bitnr), addr); +} + static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, unsigned int bitnr) { @@ -321,8 +344,6 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } -unsigned int sbitmap_weight(const struct sbitmap *sb); - /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 45cab6bbc1c7..f99382e59314 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -59,6 +59,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, for (i = 0; i < sb->map_nr; i++) { sb->map[i].depth = min(depth, bits_per_word); depth -= sb->map[i].depth; + spin_lock_init(&sb->map[i].swap_lock); } return 0; } @@ -111,6 +112,57 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, return nr; } +/* + * See if we have deferred clears that we can batch move + */ +static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) +{ + unsigned long mask, val; + bool ret = false; + + spin_lock(&sb->map[index].swap_lock); + + if (!sb->map[index].cleared) + goto out_unlock; + + /* + * First get a stable cleared mask, setting the old mask to 0. + */ + do { + mask = sb->map[index].cleared; + } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask); + + /* + * Now clear the masked bits in our free word + */ + do { + val = sb->map[index].word; + } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); + + ret = true; +out_unlock: + spin_unlock(&sb->map[index].swap_lock); + return ret; +} + +static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, + unsigned int alloc_hint, bool round_robin) +{ + int nr; + + do { + nr = __sbitmap_get_word(&sb->map[index].word, + sb->map[index].depth, alloc_hint, + !round_robin); + if (nr != -1) + break; + if (!sbitmap_deferred_clear(sb, index)) + break; + } while (1); + + return nr; +} + int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) { unsigned int i, index; @@ -129,9 +181,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) alloc_hint = 0; for (i = 0; i < sb->map_nr; i++) { - nr = __sbitmap_get_word(&sb->map[index].word, - sb->map[index].depth, alloc_hint, - !round_robin); + nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, + round_robin); if (nr != -1) { nr += index << sb->shift; break; @@ -206,23 +257,36 @@ bool sbitmap_any_bit_clear(const struct sbitmap *sb) } EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); -unsigned int sbitmap_weight(const struct sbitmap *sb) +static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) { unsigned int i, weight = 0; for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; - weight += bitmap_weight(&word->word, word->depth); + if (set) + weight += bitmap_weight(&word->word, word->depth); + else + weight += bitmap_weight(&word->cleared, word->depth); } return weight; } -EXPORT_SYMBOL_GPL(sbitmap_weight); + +static unsigned int sbitmap_weight(const struct sbitmap *sb) +{ + return __sbitmap_weight(sb, true); +} + +static unsigned int sbitmap_cleared(const struct sbitmap *sb) +{ + return __sbitmap_weight(sb, false); +} void sbitmap_show(struct sbitmap *sb, struct seq_file *m) { seq_printf(m, "depth=%u\n", sb->depth); - seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); + seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); + seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); seq_printf(m, "map_nr=%u\n", sb->map_nr); } @@ -514,7 +578,8 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { - sbitmap_clear_bit_unlock(&sbq->sb, nr); + sbitmap_deferred_clear_bit(&sbq->sb, nr); + /* * Pairs with the memory barrier in set_current_state() to ensure the * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker -- cgit v1.2.3 From 5d2ee7122c73be6a3b6bfe90d237e8aed737cfaa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 Nov 2018 17:36:41 -0700 Subject: sbitmap: optimize wakeup check Even if we have no waiters on any of the sbitmap_queue wait states, we still have to loop every entry to check. We do this for every IO, so the cost adds up. Shift a bit of the cost to the slow path, when we actually have waiters. Wrap prepare_to_wait_exclusive() and finish_wait(), so we can maintain an internal count of how many are currently active. Then we can simply check this count in sbq_wake_ptr() and not have to loop if we don't have any sleepers. Convert the two users of sbitmap with waiting, blk-mq-tag and iSCSI. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 11 +++++------ drivers/target/iscsi/iscsi_target_util.c | 12 ++++++----- include/linux/sbitmap.h | 34 ++++++++++++++++++++++++++++++++ lib/sbitmap.c | 28 ++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 87bc5df72d48..2089c6c62f44 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -110,7 +110,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct sbitmap_queue *bt; struct sbq_wait_state *ws; - DEFINE_WAIT(wait); + DEFINE_SBQ_WAIT(wait); unsigned int tag_offset; bool drop_ctx; int tag; @@ -154,8 +154,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (tag != -1) break; - prepare_to_wait_exclusive(&ws->wait, &wait, - TASK_UNINTERRUPTIBLE); + sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE); tag = __blk_mq_get_tag(data, bt); if (tag != -1) @@ -167,6 +166,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) bt_prev = bt; io_schedule(); + sbitmap_finish_wait(bt, ws, &wait); + data->ctx = blk_mq_get_ctx(data->q); data->hctx = blk_mq_map_queue(data->q, data->cmd_flags, data->ctx->cpu); @@ -176,8 +177,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) else bt = &tags->bitmap_tags; - finish_wait(&ws->wait, &wait); - /* * If destination hw queue is changed, fake wake up on * previous queue for compensating the wake up miss, so @@ -192,7 +191,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (drop_ctx && data->ctx) blk_mq_put_ctx(data->ctx); - finish_wait(&ws->wait, &wait); + sbitmap_finish_wait(bt, ws, &wait); found_tag: return tag + tag_offset; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 36b742932c72..86987da86dd6 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -150,24 +150,26 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup) { int tag = -1; - DEFINE_WAIT(wait); + DEFINE_SBQ_WAIT(wait); struct sbq_wait_state *ws; + struct sbitmap_queue *sbq; if (state == TASK_RUNNING) return tag; - ws = &se_sess->sess_tag_pool.ws[0]; + sbq = &se_sess->sess_tag_pool; + ws = &sbq->ws[0]; for (;;) { - prepare_to_wait_exclusive(&ws->wait, &wait, state); + sbitmap_prepare_to_wait(sbq, ws, &wait, state); if (signal_pending_state(state, current)) break; - tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup); + tag = sbitmap_queue_get(sbq, cpup); if (tag >= 0) break; schedule(); } - finish_wait(&ws->wait, &wait); + sbitmap_finish_wait(sbq, ws, &wait); return tag; } diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 81359d45751e..92806a2dbab7 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -135,6 +135,11 @@ struct sbitmap_queue { */ struct sbq_wait_state *ws; + /* + * @ws_active: count of currently active ws waitqueues + */ + atomic_t ws_active; + /** * @round_robin: Allocate bits in strict round-robin order. */ @@ -552,4 +557,33 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); +struct sbq_wait { + int accounted; + struct wait_queue_entry wait; +}; + +#define DEFINE_SBQ_WAIT(name) \ + struct sbq_wait name = { \ + .accounted = 0, \ + .wait = { \ + .private = current, \ + .func = autoremove_wake_function, \ + .entry = LIST_HEAD_INIT((name).wait.entry), \ + } \ + } + +/* + * Wrapper around prepare_to_wait_exclusive(), which maintains some extra + * internal state. + */ +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait, int state); + +/* + * Must be paired with sbitmap_prepare_to_wait(). + */ +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait); + #endif /* __LINUX_SCALE_BITMAP_H */ diff --git a/lib/sbitmap.c b/lib/sbitmap.c index f99382e59314..a89fbe7cf6ca 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -394,6 +394,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, sbq->min_shallow_depth = UINT_MAX; sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); + atomic_set(&sbq->ws_active, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { @@ -509,6 +510,9 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; + if (!atomic_read(&sbq->ws_active)) + return NULL; + wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; @@ -634,6 +638,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); + seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { @@ -649,3 +654,26 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); + +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait, int state) +{ + if (!sbq_wait->accounted) { + atomic_inc(&sbq->ws_active); + sbq_wait->accounted = 1; + } + prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); +} +EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); + +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait) +{ + finish_wait(&ws->wait, &sbq_wait->wait); + if (sbq_wait->accounted) { + atomic_dec(&sbq->ws_active); + sbq_wait->accounted = 0; + } +} +EXPORT_SYMBOL_GPL(sbitmap_finish_wait); -- cgit v1.2.3 From bbda5ec671d3fe62faefa1cab7270aa586042a4b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 30 Nov 2018 10:05:26 +0900 Subject: kbuild: simplify dependency generation for CONFIG_TRIM_UNUSED_KSYMS My main motivation of this commit is to clean up scripts/Kbuild.include and scripts/Makefile.build. Currently, CONFIG_TRIM_UNUSED_KSYMS works with a tricky gimmick; possibly exported symbols are detected by letting $(CPP) replace EXPORT_SYMBOL* with a special string '=== __KSYM_*===', which is post-processed by sed, and passed to fixdep. The extra preprocessing is costly, and hacking cmd_and_fixdep is ugly. I came up with a new way to find exported symbols; insert a dummy symbol __ksym_marker_* to each potentially exported symbol. Those dummy symbols are picked up by $(NM), post-processed by sed, then appended to .*.cmd files. I collected the post-process part to a new shell script scripts/gen_ksymdeps.sh for readability. The dummy symbols are put into the .discard.* section so that the linker script rips them off the final vmlinux or modules. A nice side-effect is building with CONFIG_TRIM_UNUSED_KSYMS will be much faster. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Pitre --- include/asm-generic/export.h | 13 ++++++++----- include/linux/export.h | 18 +++++++++--------- scripts/Kbuild.include | 28 ---------------------------- scripts/Makefile.build | 7 +++++++ scripts/basic/fixdep.c | 31 ++++--------------------------- scripts/gen_ksymdeps.sh | 25 +++++++++++++++++++++++++ 6 files changed, 53 insertions(+), 69 deletions(-) create mode 100755 scripts/gen_ksymdeps.sh (limited to 'include/linux') diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 4d73e6e3c66c..294d6ae785d4 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -59,16 +59,19 @@ __kcrctab_\name: .endm #undef __put -#if defined(__KSYM_DEPS__) - -#define __EXPORT_SYMBOL(sym, val, sec) === __KSYM_##sym === - -#elif defined(CONFIG_TRIM_UNUSED_KSYMS) +#if defined(CONFIG_TRIM_UNUSED_KSYMS) #include #include +.macro __ksym_marker sym + .section ".discard.ksym","a" +__ksym_marker_\sym: + .previous +.endm + #define __EXPORT_SYMBOL(sym, val, sec) \ + __ksym_marker sym; \ __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym)) #define __cond_export_sym(sym, val, sec, conf) \ ___cond_export_sym(sym, val, sec, conf) diff --git a/include/linux/export.h b/include/linux/export.h index ce764a5d2ee4..fd8711ed9ac4 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -92,22 +92,22 @@ struct kernel_symbol { */ #define __EXPORT_SYMBOL(sym, sec) -#elif defined(__KSYM_DEPS__) +#elif defined(CONFIG_TRIM_UNUSED_KSYMS) + +#include /* * For fine grained build dependencies, we want to tell the build system * about each possible exported symbol even if they're not actually exported. - * We use a string pattern that is unlikely to be valid code that the build - * system filters out from the preprocessor output (see ksym_dep_filter - * in scripts/Kbuild.include). + * We use a symbol pattern __ksym_marker_ that the build system filters + * from the $(NM) output (see scripts/gen_ksymdeps.sh). These symbols are + * discarded in the final link stage. */ -#define __EXPORT_SYMBOL(sym, sec) === __KSYM_##sym === - -#elif defined(CONFIG_TRIM_UNUSED_KSYMS) - -#include +#define __ksym_marker(sym) \ + static int __ksym_marker_##sym[0] __section(".discard.ksym") __used #define __EXPORT_SYMBOL(sym, sec) \ + __ksym_marker(sym); \ __cond_export_sym(sym, sec, __is_defined(__KSYM_##sym)) #define __cond_export_sym(sym, sec, conf) \ ___cond_export_sym(sym, sec, conf) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 6cf6a8b83b97..4b943f4d2226 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -260,39 +260,11 @@ if_changed_dep = $(if $(strip $(any-prereq) $(arg-check) ), \ @set -e; \ $(cmd_and_fixdep), @:) -ifndef CONFIG_TRIM_UNUSED_KSYMS - cmd_and_fixdep = \ $(echo-cmd) $(cmd_$(1)); \ scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).cmd;\ rm -f $(depfile); -else - -# Filter out exported kernel symbol names from the preprocessor output. -# See also __KSYM_DEPS__ in include/linux/export.h. -# We disable the depfile generation here, so as not to overwrite the existing -# depfile while fixdep is parsing it. -flags_nodeps = $(filter-out -Wp$(comma)-M%, $($(1))) -ksym_dep_filter = \ - case "$(1)" in \ - cc_*_c|cpp_i_c) \ - $(CPP) $(call flags_nodeps,c_flags) -D__KSYM_DEPS__ $< ;; \ - as_*_S|cpp_s_S) \ - $(CPP) $(call flags_nodeps,a_flags) -D__KSYM_DEPS__ $< ;; \ - boot*|build*|cpp_its_S|*cpp_lds_S|dtc|host*|vdso*) : ;; \ - *) echo "Don't know how to preprocess $(1)" >&2; false ;; \ - esac | tr ";" "\n" | sed -n 's/^.*=== __KSYM_\(.*\) ===.*$$/_\1/p' - -cmd_and_fixdep = \ - $(echo-cmd) $(cmd_$(1)); \ - $(ksym_dep_filter) | \ - scripts/basic/fixdep -e $(depfile) $@ '$(make-cmd)' \ - > $(dot-target).cmd; \ - rm -f $(depfile); - -endif - # Usage: $(call if_changed_rule,foo) # Will check if $(cmd_foo) or any of the prerequisites changed, # and if so will execute $(rule_foo). diff --git a/scripts/Makefile.build b/scripts/Makefile.build index cdb25d163b42..23ebf2508234 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -254,9 +254,15 @@ objtool_dep = $(objtool_obj) \ $(wildcard include/config/orc/unwinder.h \ include/config/stack/validation.h) +ifdef CONFIG_TRIM_UNUSED_KSYMS +cmd_gen_ksymdeps = \ + $(CONFIG_SHELL) $(srctree)/scripts/gen_ksymdeps.sh $@ >> $(dot-target).cmd; +endif + define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call cmd_and_fixdep,cc_o_c) \ + $(cmd_gen_ksymdeps) \ $(cmd_checkdoc) \ $(call echo-cmd,objtool) $(cmd_objtool) \ $(cmd_modversions_c) \ @@ -265,6 +271,7 @@ endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) \ + $(cmd_gen_ksymdeps) \ $(call echo-cmd,objtool) $(cmd_objtool) \ $(cmd_modversions_S) endef diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 850966f3d602..facbd603adf6 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -105,8 +105,7 @@ static void usage(void) { - fprintf(stderr, "Usage: fixdep [-e] \n"); - fprintf(stderr, " -e insert extra dependencies given on stdin\n"); + fprintf(stderr, "Usage: fixdep \n"); exit(1); } @@ -131,21 +130,6 @@ static void print_dep(const char *m, int slen, const char *dir) printf(".h) \\\n"); } -static void do_extra_deps(void) -{ - char buf[80]; - - while (fgets(buf, sizeof(buf), stdin)) { - int len = strlen(buf); - - if (len < 2 || buf[len - 1] != '\n') { - fprintf(stderr, "fixdep: bad data on stdin\n"); - exit(1); - } - print_dep(buf, len - 1, "include/ksym"); - } -} - struct item { struct item *next; unsigned int len; @@ -293,7 +277,7 @@ static int is_ignored_file(const char *s, int len) * assignments are parsed not only by make, but also by the rather simple * parser in scripts/mod/sumversion.c. */ -static void parse_dep_file(char *m, const char *target, int insert_extra_deps) +static void parse_dep_file(char *m, const char *target) { char *p; int is_last, is_target; @@ -369,9 +353,6 @@ static void parse_dep_file(char *m, const char *target, int insert_extra_deps) exit(1); } - if (insert_extra_deps) - do_extra_deps(); - printf("\n%s: $(deps_%s)\n\n", target, target); printf("$(deps_%s):\n", target); } @@ -379,13 +360,9 @@ static void parse_dep_file(char *m, const char *target, int insert_extra_deps) int main(int argc, char *argv[]) { const char *depfile, *target, *cmdline; - int insert_extra_deps = 0; void *buf; - if (argc == 5 && !strcmp(argv[1], "-e")) { - insert_extra_deps = 1; - argv++; - } else if (argc != 4) + if (argc != 4) usage(); depfile = argv[1]; @@ -395,7 +372,7 @@ int main(int argc, char *argv[]) printf("cmd_%s := %s\n\n", target, cmdline); buf = read_file(depfile); - parse_dep_file(buf, target, insert_extra_deps); + parse_dep_file(buf, target); free(buf); return 0; diff --git a/scripts/gen_ksymdeps.sh b/scripts/gen_ksymdeps.sh new file mode 100755 index 000000000000..1324986e1362 --- /dev/null +++ b/scripts/gen_ksymdeps.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# List of exported symbols +ksyms=$($NM $1 | sed -n 's/.*__ksym_marker_\(.*\)/\1/p' | tr A-Z a-z) + +if [ -z "$ksyms" ]; then + exit 0 +fi + +echo +echo "ksymdeps_$1 := \\" + +for s in $ksyms +do + echo $s | sed -e 's:^_*: $(wildcard include/ksym/:' \ + -e 's:__*:/:g' -e 's/$/.h) \\/' +done + +echo +echo "$1: \$(ksymdeps_$1)" +echo +echo "\$(ksymdeps_$1):" -- cgit v1.2.3 From b18814e767a445534ab9ccba02e82a31208f85d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 17:27:56 +0100 Subject: dma-direct: provide page based alloc/free helpers Some architectures support remapping highmem into DMA coherent allocations. To use the common code for them we need variants of dma_direct_{alloc,free}_pages that do not use kernel virtual addresses. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-direct.h | 3 +++ kernel/dma/direct.c | 32 ++++++++++++++++++++++---------- 2 files changed, 25 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 9e66bfe369aa..61b78f934f64 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -67,6 +67,9 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page); dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 22a12ab5a5e9..680287779b0a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -103,14 +103,13 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); } -void *dma_direct_alloc_pages(struct device *dev, size_t size, +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); struct page *page = NULL; u64 phys_mask; - void *ret; if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; @@ -150,11 +149,22 @@ again: } } + return page; +} + +void *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +{ + struct page *page; + void *ret; + + page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); if (!page) return NULL; + ret = page_address(page); if (force_dma_unencrypted()) { - set_memory_decrypted((unsigned long)ret, 1 << page_order); + set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); *dma_handle = __phys_to_dma(dev, page_to_phys(page)); } else { *dma_handle = phys_to_dma(dev, page_to_phys(page)); @@ -163,20 +173,22 @@ again: return ret; } -/* - * NOTE: this function must never look at the dma_addr argument, because we want - * to be able to use it as a helper for iommu implementations as well. - */ +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (!dma_release_from_contiguous(dev, page, count)) + __free_pages(page, get_order(size)); +} + void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned int page_order = get_order(size); if (force_dma_unencrypted()) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, page_order); + __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); } void *dma_direct_alloc(struct device *dev, size_t size, -- cgit v1.2.3 From 0c3b3171ceccb8830c2bb5adff1b4e9b204c1450 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 20:29:28 +0100 Subject: dma-mapping: move the arm64 noncoherent alloc/free support to common code The arm64 codebase to implement coherent dma allocation for architectures with non-coherent DMA is a good start for a generic implementation, given that is uses the generic remap helpers, provides the atomic pool for allocations that can't sleep and still is realtively simple and well tested. Move it to kernel/dma and allow architectures to opt into it using a config symbol. Architectures just need to provide a new arch_dma_prep_coherent helper to writeback an invalidate the caches for any memory that gets remapped for uncached access. Signed-off-by: Christoph Hellwig Reviewed-by: Will Deacon Reviewed-by: Robin Murphy --- arch/arm64/Kconfig | 2 +- arch/arm64/mm/dma-mapping.c | 184 +++------------------------------------- include/linux/dma-mapping.h | 5 ++ include/linux/dma-noncoherent.h | 2 + kernel/dma/Kconfig | 5 ++ kernel/dma/remap.c | 158 +++++++++++++++++++++++++++++++++- 6 files changed, 180 insertions(+), 176 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d065acb6d10..2e645ea693ea 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -82,7 +82,7 @@ config ARM64 select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS - select DMA_REMAP + select DMA_DIRECT_REMAP select EDAC_SUPPORT select FRAME_POINTER select GENERIC_ALLOCATOR diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a3ac26284845..e2e7e5d0f94e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -33,113 +33,6 @@ #include -static struct gen_pool *atomic_pool __ro_after_init; - -#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; - -static int __init early_coherent_pool(char *p) -{ - atomic_pool_size = memparse(p, &p); - return 0; -} -early_param("coherent_pool", early_coherent_pool); - -static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) -{ - unsigned long val; - void *ptr = NULL; - - if (!atomic_pool) { - WARN(1, "coherent pool not initialised!\n"); - return NULL; - } - - val = gen_pool_alloc(atomic_pool, size); - if (val) { - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); - - *ret_page = phys_to_page(phys); - ptr = (void *)val; - memset(ptr, 0, size); - } - - return ptr; -} - -static bool __in_atomic_pool(void *start, size_t size) -{ - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); -} - -static int __free_from_pool(void *start, size_t size) -{ - if (!__in_atomic_pool(start, size)) - return 0; - - gen_pool_free(atomic_pool, (unsigned long)start, size); - - return 1; -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flags, unsigned long attrs) -{ - struct page *page; - void *ptr, *coherent_ptr; - pgprot_t prot = pgprot_writecombine(PAGE_KERNEL); - - size = PAGE_ALIGN(size); - - if (!gfpflags_allow_blocking(flags)) { - struct page *page = NULL; - void *addr = __alloc_from_pool(size, &page, flags); - - if (addr) - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - - return addr; - } - - ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); - if (!ptr) - goto no_mem; - - /* remove any dirty cache lines on the kernel alias */ - __dma_flush_area(ptr, size); - - /* create a coherent mapping */ - page = virt_to_page(ptr); - coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, - prot, __builtin_return_address(0)); - if (!coherent_ptr) - goto no_map; - - return coherent_ptr; - -no_map: - dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs); -no_mem: - return NULL; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) { - void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); - - vunmap(vaddr); - dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); - } -} - -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, - dma_addr_t dma_addr) -{ - return __phys_to_pfn(dma_to_phys(dev, dma_addr)); -} - pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { @@ -160,6 +53,11 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, __dma_unmap_area(phys_to_virt(paddr), size, dir); } +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + __dma_flush_area(page_address(page), size); +} + #ifdef CONFIG_IOMMU_DMA static int __swiotlb_get_sgtable_page(struct sg_table *sgt, struct page *page, size_t size) @@ -191,67 +89,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, } #endif /* CONFIG_IOMMU_DMA */ -static int __init atomic_pool_init(void) -{ - pgprot_t prot = __pgprot(PROT_NORMAL_NC); - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; - struct page *page; - void *addr; - unsigned int pool_size_order = get_order(atomic_pool_size); - - if (dev_get_cma_area(NULL)) - page = dma_alloc_from_contiguous(NULL, nr_pages, - pool_size_order, false); - else - page = alloc_pages(GFP_DMA32, pool_size_order); - - if (page) { - int ret; - void *page_addr = page_address(page); - - memset(page_addr, 0, atomic_pool_size); - __dma_flush_area(page_addr, atomic_pool_size); - - atomic_pool = gen_pool_create(PAGE_SHIFT, -1); - if (!atomic_pool) - goto free_page; - - addr = dma_common_contiguous_remap(page, atomic_pool_size, - VM_USERMAP, prot, atomic_pool_init); - - if (!addr) - goto destroy_genpool; - - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, - page_to_phys(page), - atomic_pool_size, -1); - if (ret) - goto remove_mapping; - - gen_pool_set_algo(atomic_pool, - gen_pool_first_fit_order_align, - NULL); - - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", - atomic_pool_size / 1024); - return 0; - } - goto out; - -remove_mapping: - dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); -destroy_genpool: - gen_pool_destroy(atomic_pool); - atomic_pool = NULL; -free_page: - if (!dma_release_from_contiguous(NULL, page, nr_pages)) - __free_pages(page, pool_size_order); -out: - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", - atomic_pool_size / 1024); - return -ENOMEM; -} - /******************************************** * The following APIs are for dummy DMA ops * ********************************************/ @@ -350,8 +187,7 @@ static int __init arm64_dma_init(void) TAINT_CPU_OUT_OF_SPEC, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", ARCH_DMA_MINALIGN, cache_line_size()); - - return atomic_pool_init(); + return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); } arch_initcall(arm64_dma_init); @@ -397,7 +233,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, page = alloc_pages(gfp, get_order(size)); addr = page ? page_address(page) : NULL; } else { - addr = __alloc_from_pool(size, &page, gfp); + addr = dma_alloc_from_pool(size, &page, gfp); } if (!addr) return NULL; @@ -407,7 +243,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, if (coherent) __free_pages(page, get_order(size)); else - __free_from_pool(addr, size); + dma_free_from_pool(addr, size); addr = NULL; } } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { @@ -471,9 +307,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, * coherent devices. * Hence how dodgy the below logic looks... */ - if (__in_atomic_pool(cpu_addr, size)) { + if (dma_in_atomic_pool(cpu_addr, size)) { iommu_dma_unmap_page(dev, handle, iosize, 0, 0); - __free_from_pool(cpu_addr, size); + dma_free_from_pool(cpu_addr, size); } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { struct page *page = vmalloc_to_page(cpu_addr); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0f81c713f6e9..1a0edcde7d14 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -455,6 +455,11 @@ void *dma_common_pages_remap(struct page **pages, size_t size, const void *caller); void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot); +bool dma_in_atomic_pool(void *start, size_t size); +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); +bool dma_free_from_pool(void *start, size_t size); + /** * dma_mmap_attrs - map a coherent DMA allocation into user space * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 9051b055beec..306557331d7d 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -69,4 +69,6 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev) } #endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ +void arch_dma_prep_coherent(struct page *page, size_t size); + #endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c92e08173ed8..41c3b1df70eb 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -55,3 +55,8 @@ config SWIOTLB config DMA_REMAP depends on MMU bool + +config DMA_DIRECT_REMAP + bool + depends on DMA_DIRECT_OPS + select DMA_REMAP diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index a15c393ea4e5..b32bb08f96ae 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -1,8 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2012 ARM Ltd. * Copyright (c) 2014 The Linux Foundation */ -#include +#include +#include +#include +#include +#include #include #include @@ -86,3 +91,154 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); vunmap(cpu_addr); } + +#ifdef CONFIG_DMA_DIRECT_REMAP +static struct gen_pool *atomic_pool __ro_after_init; + +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; + +static int __init early_coherent_pool(char *p) +{ + atomic_pool_size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) +{ + unsigned int pool_size_order = get_order(atomic_pool_size); + unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; + struct page *page; + void *addr; + int ret; + + if (dev_get_cma_area(NULL)) + page = dma_alloc_from_contiguous(NULL, nr_pages, + pool_size_order, false); + else + page = alloc_pages(gfp, pool_size_order); + if (!page) + goto out; + + memset(page_address(page), 0, atomic_pool_size); + arch_dma_prep_coherent(page, atomic_pool_size); + + atomic_pool = gen_pool_create(PAGE_SHIFT, -1); + if (!atomic_pool) + goto free_page; + + addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP, + prot, __builtin_return_address(0)); + if (!addr) + goto destroy_genpool; + + ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, + page_to_phys(page), atomic_pool_size, -1); + if (ret) + goto remove_mapping; + gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL); + + pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", + atomic_pool_size / 1024); + return 0; + +remove_mapping: + dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); +destroy_genpool: + gen_pool_destroy(atomic_pool); + atomic_pool = NULL; +free_page: + if (!dma_release_from_contiguous(NULL, page, nr_pages)) + __free_pages(page, pool_size_order); +out: + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", + atomic_pool_size / 1024); + return -ENOMEM; +} + +bool dma_in_atomic_pool(void *start, size_t size) +{ + return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); +} + +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) +{ + unsigned long val; + void *ptr = NULL; + + if (!atomic_pool) { + WARN(1, "coherent pool not initialised!\n"); + return NULL; + } + + val = gen_pool_alloc(atomic_pool, size); + if (val) { + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); + + *ret_page = pfn_to_page(__phys_to_pfn(phys)); + ptr = (void *)val; + memset(ptr, 0, size); + } + + return ptr; +} + +bool dma_free_from_pool(void *start, size_t size) +{ + if (!dma_in_atomic_pool(start, size)) + return false; + gen_pool_free(atomic_pool, (unsigned long)start, size); + return true; +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flags, unsigned long attrs) +{ + struct page *page = NULL; + void *ret, *kaddr; + + size = PAGE_ALIGN(size); + + if (!gfpflags_allow_blocking(flags)) { + ret = dma_alloc_from_pool(size, &page, flags); + if (!ret) + return NULL; + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + return ret; + } + + kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); + if (!kaddr) + return NULL; + page = virt_to_page(kaddr); + + /* remove any dirty cache lines on the kernel alias */ + arch_dma_prep_coherent(page, size); + + /* create a coherent mapping */ + ret = dma_common_contiguous_remap(page, size, VM_USERMAP, + arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), + __builtin_return_address(0)); + if (!ret) + dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs); + return ret; +} + +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); + + vunmap(vaddr); + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); + } +} + +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) +{ + return __phys_to_pfn(dma_to_phys(dev, dma_addr)); +} +#endif /* CONFIG_DMA_DIRECT_REMAP */ -- cgit v1.2.3 From e3e740544173ef0dd8bffbf158182a7748e6c678 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Nov 2018 13:53:34 -0800 Subject: percpu-rwsem: Replace synchronize_sched() with synchronize_rcu() Now that synchronize_rcu() waits for preempt-disable regions of code as well as RCU read-side critical sections, synchronize_sched() can be replaced by synchronize_rcu(). This commit therefore makes this change, even though it is but a comment. Signed-off-by: Paul E. McKenney Cc: Dennis Zhou Cc: Christoph Lameter Acked-by: Tejun Heo --- include/linux/percpu-rwsem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 79b99d653e03..71b75643c432 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -41,7 +41,7 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * * cannot both change sem->state from readers_fast and start checking * counters while we are here. So if we see !sem->state, we know that * the writer won't be checking until we're past the preempt_enable() - * and that one the synchronize_sched() is done, the writer will see + * and that once the synchronize_rcu() is done, the writer will see * anything we did within this RCU-sched read-size critical section. */ __this_cpu_inc(*sem->read_count); -- cgit v1.2.3 From d5cccfc7b772b8a20b06557f1b7c066e7fc2c393 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Nov 2018 14:01:39 -0800 Subject: types: Remove call_rcu_bh() and call_rcu_sched() Now that call_rcu()'s callback is not invoked until after bh-disable and preempt-disable regions of code have completed (in addition to explicitly marked RCU read-side critical sections), call_rcu() can be used in place of call_rcu_bh() and call_rcu_sched(). This commit therefore removes these two API members from the callback_head structure's header comment. Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Pekka Enberg Cc: Masahiro Yamada Cc: Alexey Dobriyan --- include/linux/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 9834e90aa010..c2615d6a019e 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -212,8 +212,8 @@ struct ustat { * weird ABI and we need to ask it explicitly. * * The alignment is required to guarantee that bit 0 of @next will be - * clear under normal conditions -- as long as we use call_rcu(), - * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. + * clear under normal conditions -- as long as we use call_rcu() or + * call_srcu() to queue the callback. * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; -- cgit v1.2.3 From 4348433d8c0234f44adb6e12112e69343f50f0c5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Sun, 18 Nov 2018 21:18:30 +0100 Subject: mtd: fix mtd_oobavail() incoherent returned value mtd_oobavail() returns either mtd->oovabail or mtd->oobsize. Both values are unsigned 32-bit entities, so there is no reason to pretend returning a signed one. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/mtd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index cd0be91bdefa..035d641e8847 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -386,7 +386,7 @@ static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd) return dev_of_node(&mtd->dev); } -static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) +static inline u32 mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) { return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize; } -- cgit v1.2.3 From 1186af457cc186c5ed01708da71b1ffbdf0a2638 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Tue, 20 Nov 2018 09:55:45 +0100 Subject: mtd: keep original flags for every struct mtd_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When allocating a new partition mtd subsystem runs internal tests in the allocate_partition(). They may result in modifying specified flags (e.g. dropping some /features/ like write access). Those constraints don't have to be necessary true for subpartitions. It may happen parent partition isn't block aligned (effectively disabling write access) while subpartition may fit blocks nicely. In such case all checks should be run again (starting with original flags value). Signed-off-by: Rafał Miłecki Signed-off-by: Boris Brezillon --- drivers/mtd/mtdcore.c | 2 ++ drivers/mtd/mtdpart.c | 3 ++- include/linux/mtd/mtd.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index afb4b17fb670..b6b93291aba9 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -665,6 +665,8 @@ static void mtd_set_dev_defaults(struct mtd_info *mtd) } else { pr_debug("mtd device won't show a device symlink in sysfs\n"); } + + mtd->orig_flags = mtd->flags; } /** diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 99c460facd5e..2b6e53af47da 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -346,7 +346,8 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent, /* set up the MTD object for this partition */ slave->mtd.type = parent->type; - slave->mtd.flags = parent->flags & ~part->mask_flags; + slave->mtd.flags = parent->orig_flags & ~part->mask_flags; + slave->mtd.orig_flags = slave->mtd.flags; slave->mtd.size = part->size; slave->mtd.writesize = parent->writesize; slave->mtd.writebufsize = parent->writebufsize; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 035d641e8847..ba8fa9072aca 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -207,6 +207,7 @@ struct mtd_debug_info { struct mtd_info { u_char type; uint32_t flags; + uint32_t orig_flags; /* Flags as before running mtd checks */ uint64_t size; // Total size of the MTD /* "Major" erase size for the device. Naïve users may take this -- cgit v1.2.3 From 576f1b4bc80220e1f88f1de5ecb25d99a6e9fa04 Mon Sep 17 00:00:00 2001 From: Houlong Wei Date: Thu, 29 Nov 2018 11:37:09 +0800 Subject: soc: mediatek: Add Mediatek CMDQ helper Add Mediatek CMDQ helper to create CMDQ packet and assemble GCE op code. Signed-off-by: Houlong Wei Signed-off-by: HS Liao Signed-off-by: Matthias Brugger --- drivers/soc/mediatek/Kconfig | 12 ++ drivers/soc/mediatek/Makefile | 1 + drivers/soc/mediatek/mtk-cmdq-helper.c | 300 +++++++++++++++++++++++++++++++++ include/linux/soc/mediatek/mtk-cmdq.h | 133 +++++++++++++++ 4 files changed, 446 insertions(+) create mode 100644 drivers/soc/mediatek/mtk-cmdq-helper.c create mode 100644 include/linux/soc/mediatek/mtk-cmdq.h (limited to 'include/linux') diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig index a7d0667338f2..17bd7590464f 100644 --- a/drivers/soc/mediatek/Kconfig +++ b/drivers/soc/mediatek/Kconfig @@ -4,6 +4,18 @@ menu "MediaTek SoC drivers" depends on ARCH_MEDIATEK || COMPILE_TEST +config MTK_CMDQ + tristate "MediaTek CMDQ Support" + depends on ARCH_MEDIATEK || COMPILE_TEST + select MAILBOX + select MTK_CMDQ_MBOX + select MTK_INFRACFG + help + Say yes here to add support for the MediaTek Command Queue (CMDQ) + driver. The CMDQ is used to help read/write registers with critical + time limitation, such as updating display configuration during the + vblank. + config MTK_INFRACFG bool "MediaTek INFRACFG Support" select REGMAP diff --git a/drivers/soc/mediatek/Makefile b/drivers/soc/mediatek/Makefile index 12998b08819e..64ce5eeaba32 100644 --- a/drivers/soc/mediatek/Makefile +++ b/drivers/soc/mediatek/Makefile @@ -1,3 +1,4 @@ +obj-$(CONFIG_MTK_CMDQ) += mtk-cmdq-helper.o obj-$(CONFIG_MTK_INFRACFG) += mtk-infracfg.o obj-$(CONFIG_MTK_PMIC_WRAP) += mtk-pmic-wrap.o obj-$(CONFIG_MTK_SCPSYS) += mtk-scpsys.o diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c new file mode 100644 index 000000000000..ff9fef5a032b --- /dev/null +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (c) 2018 MediaTek Inc. + +#include +#include +#include +#include +#include +#include + +#define CMDQ_ARG_A_WRITE_MASK 0xffff +#define CMDQ_WRITE_ENABLE_MASK BIT(0) +#define CMDQ_EOC_IRQ_EN BIT(0) +#define CMDQ_EOC_CMD ((u64)((CMDQ_CODE_EOC << CMDQ_OP_CODE_SHIFT)) \ + << 32 | CMDQ_EOC_IRQ_EN) + +static void cmdq_client_timeout(struct timer_list *t) +{ + struct cmdq_client *client = from_timer(client, t, timer); + + dev_err(client->client.dev, "cmdq timeout!\n"); +} + +struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, u32 timeout) +{ + struct cmdq_client *client; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return (struct cmdq_client *)-ENOMEM; + + client->timeout_ms = timeout; + if (timeout != CMDQ_NO_TIMEOUT) { + spin_lock_init(&client->lock); + timer_setup(&client->timer, cmdq_client_timeout, 0); + } + client->pkt_cnt = 0; + client->client.dev = dev; + client->client.tx_block = false; + client->chan = mbox_request_channel(&client->client, index); + + if (IS_ERR(client->chan)) { + long err; + + dev_err(dev, "failed to request channel\n"); + err = PTR_ERR(client->chan); + kfree(client); + + return ERR_PTR(err); + } + + return client; +} +EXPORT_SYMBOL(cmdq_mbox_create); + +void cmdq_mbox_destroy(struct cmdq_client *client) +{ + if (client->timeout_ms != CMDQ_NO_TIMEOUT) { + spin_lock(&client->lock); + del_timer_sync(&client->timer); + spin_unlock(&client->lock); + } + mbox_free_channel(client->chan); + kfree(client); +} +EXPORT_SYMBOL(cmdq_mbox_destroy); + +struct cmdq_pkt *cmdq_pkt_create(struct cmdq_client *client, size_t size) +{ + struct cmdq_pkt *pkt; + struct device *dev; + dma_addr_t dma_addr; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return ERR_PTR(-ENOMEM); + pkt->va_base = kzalloc(size, GFP_KERNEL); + if (!pkt->va_base) { + kfree(pkt); + return ERR_PTR(-ENOMEM); + } + pkt->buf_size = size; + pkt->cl = (void *)client; + + dev = client->chan->mbox->dev; + dma_addr = dma_map_single(dev, pkt->va_base, pkt->buf_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "dma map failed, size=%u\n", (u32)(u64)size); + kfree(pkt->va_base); + kfree(pkt); + return ERR_PTR(-ENOMEM); + } + + pkt->pa_base = dma_addr; + + return pkt; +} +EXPORT_SYMBOL(cmdq_pkt_create); + +void cmdq_pkt_destroy(struct cmdq_pkt *pkt) +{ + struct cmdq_client *client = (struct cmdq_client *)pkt->cl; + + dma_unmap_single(client->chan->mbox->dev, pkt->pa_base, pkt->buf_size, + DMA_TO_DEVICE); + kfree(pkt->va_base); + kfree(pkt); +} +EXPORT_SYMBOL(cmdq_pkt_destroy); + +static int cmdq_pkt_append_command(struct cmdq_pkt *pkt, enum cmdq_code code, + u32 arg_a, u32 arg_b) +{ + u64 *cmd_ptr; + + if (unlikely(pkt->cmd_buf_size + CMDQ_INST_SIZE > pkt->buf_size)) { + /* + * In the case of allocated buffer size (pkt->buf_size) is used + * up, the real required size (pkt->cmdq_buf_size) is still + * increased, so that the user knows how much memory should be + * ultimately allocated after appending all commands and + * flushing the command packet. Therefor, the user can call + * cmdq_pkt_create() again with the real required buffer size. + */ + pkt->cmd_buf_size += CMDQ_INST_SIZE; + WARN_ONCE(1, "%s: buffer size %u is too small !\n", + __func__, (u32)pkt->buf_size); + return -ENOMEM; + } + cmd_ptr = pkt->va_base + pkt->cmd_buf_size; + (*cmd_ptr) = (u64)((code << CMDQ_OP_CODE_SHIFT) | arg_a) << 32 | arg_b; + pkt->cmd_buf_size += CMDQ_INST_SIZE; + + return 0; +} + +int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 value, u32 subsys, u32 offset) +{ + u32 arg_a = (offset & CMDQ_ARG_A_WRITE_MASK) | + (subsys << CMDQ_SUBSYS_SHIFT); + + return cmdq_pkt_append_command(pkt, CMDQ_CODE_WRITE, arg_a, value); +} +EXPORT_SYMBOL(cmdq_pkt_write); + +int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 value, + u32 subsys, u32 offset, u32 mask) +{ + u32 offset_mask = offset; + int err = 0; + + if (mask != 0xffffffff) { + err = cmdq_pkt_append_command(pkt, CMDQ_CODE_MASK, 0, ~mask); + offset_mask |= CMDQ_WRITE_ENABLE_MASK; + } + err |= cmdq_pkt_write(pkt, value, subsys, offset_mask); + + return err; +} +EXPORT_SYMBOL(cmdq_pkt_write_mask); + +int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u32 event) +{ + u32 arg_b; + + if (event >= CMDQ_MAX_EVENT) + return -EINVAL; + + /* + * WFE arg_b + * bit 0-11: wait value + * bit 15: 1 - wait, 0 - no wait + * bit 16-27: update value + * bit 31: 1 - update, 0 - no update + */ + arg_b = CMDQ_WFE_UPDATE | CMDQ_WFE_WAIT | CMDQ_WFE_WAIT_VALUE; + + return cmdq_pkt_append_command(pkt, CMDQ_CODE_WFE, event, arg_b); +} +EXPORT_SYMBOL(cmdq_pkt_wfe); + +int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u32 event) +{ + if (event >= CMDQ_MAX_EVENT) + return -EINVAL; + + return cmdq_pkt_append_command(pkt, CMDQ_CODE_WFE, event, + CMDQ_WFE_UPDATE); +} +EXPORT_SYMBOL(cmdq_pkt_clear_event); + +static int cmdq_pkt_finalize(struct cmdq_pkt *pkt) +{ + int err; + + /* insert EOC and generate IRQ for each command iteration */ + err = cmdq_pkt_append_command(pkt, CMDQ_CODE_EOC, 0, CMDQ_EOC_IRQ_EN); + + /* JUMP to end */ + err |= cmdq_pkt_append_command(pkt, CMDQ_CODE_JUMP, 0, CMDQ_JUMP_PASS); + + return err; +} + +static void cmdq_pkt_flush_async_cb(struct cmdq_cb_data data) +{ + struct cmdq_pkt *pkt = (struct cmdq_pkt *)data.data; + struct cmdq_task_cb *cb = &pkt->cb; + struct cmdq_client *client = (struct cmdq_client *)pkt->cl; + + if (client->timeout_ms != CMDQ_NO_TIMEOUT) { + unsigned long flags = 0; + + spin_lock_irqsave(&client->lock, flags); + if (--client->pkt_cnt == 0) + del_timer(&client->timer); + else + mod_timer(&client->timer, jiffies + + msecs_to_jiffies(client->timeout_ms)); + spin_unlock_irqrestore(&client->lock, flags); + } + + dma_sync_single_for_cpu(client->chan->mbox->dev, pkt->pa_base, + pkt->cmd_buf_size, DMA_TO_DEVICE); + if (cb->cb) { + data.data = cb->data; + cb->cb(data); + } +} + +int cmdq_pkt_flush_async(struct cmdq_pkt *pkt, cmdq_async_flush_cb cb, + void *data) +{ + int err; + unsigned long flags = 0; + struct cmdq_client *client = (struct cmdq_client *)pkt->cl; + + err = cmdq_pkt_finalize(pkt); + if (err < 0) + return err; + + pkt->cb.cb = cb; + pkt->cb.data = data; + pkt->async_cb.cb = cmdq_pkt_flush_async_cb; + pkt->async_cb.data = pkt; + + dma_sync_single_for_device(client->chan->mbox->dev, pkt->pa_base, + pkt->cmd_buf_size, DMA_TO_DEVICE); + + if (client->timeout_ms != CMDQ_NO_TIMEOUT) { + spin_lock_irqsave(&client->lock, flags); + if (client->pkt_cnt++ == 0) + mod_timer(&client->timer, jiffies + + msecs_to_jiffies(client->timeout_ms)); + spin_unlock_irqrestore(&client->lock, flags); + } + + mbox_send_message(client->chan, pkt); + /* We can send next packet immediately, so just call txdone. */ + mbox_client_txdone(client->chan, 0); + + return 0; +} +EXPORT_SYMBOL(cmdq_pkt_flush_async); + +struct cmdq_flush_completion { + struct completion cmplt; + bool err; +}; + +static void cmdq_pkt_flush_cb(struct cmdq_cb_data data) +{ + struct cmdq_flush_completion *cmplt; + + cmplt = (struct cmdq_flush_completion *)data.data; + if (data.sta != CMDQ_CB_NORMAL) + cmplt->err = true; + else + cmplt->err = false; + complete(&cmplt->cmplt); +} + +int cmdq_pkt_flush(struct cmdq_pkt *pkt) +{ + struct cmdq_flush_completion cmplt; + int err; + + init_completion(&cmplt.cmplt); + err = cmdq_pkt_flush_async(pkt, cmdq_pkt_flush_cb, &cmplt); + if (err < 0) + return err; + wait_for_completion(&cmplt.cmplt); + + return cmplt.err ? -EFAULT : 0; +} +EXPORT_SYMBOL(cmdq_pkt_flush); + +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h new file mode 100644 index 000000000000..54ade13a9b15 --- /dev/null +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018 MediaTek Inc. + * + */ + +#ifndef __MTK_CMDQ_H__ +#define __MTK_CMDQ_H__ + +#include +#include +#include + +#define CMDQ_NO_TIMEOUT 0xffffffffu + +/** cmdq event maximum */ +#define CMDQ_MAX_EVENT 0x3ff + +struct cmdq_pkt; + +struct cmdq_client { + spinlock_t lock; + u32 pkt_cnt; + struct mbox_client client; + struct mbox_chan *chan; + struct timer_list timer; + u32 timeout_ms; /* in unit of microsecond */ +}; + +/** + * cmdq_mbox_create() - create CMDQ mailbox client and channel + * @dev: device of CMDQ mailbox client + * @index: index of CMDQ mailbox channel + * @timeout: timeout of a pkt execution by GCE, in unit of microsecond, set + * CMDQ_NO_TIMEOUT if a timer is not used. + * + * Return: CMDQ mailbox client pointer + */ +struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, + u32 timeout); + +/** + * cmdq_mbox_destroy() - destroy CMDQ mailbox client and channel + * @client: the CMDQ mailbox client + */ +void cmdq_mbox_destroy(struct cmdq_client *client); + +/** + * cmdq_pkt_create() - create a CMDQ packet + * @client: the CMDQ mailbox client + * @size: required CMDQ buffer size + * + * Return: CMDQ packet pointer + */ +struct cmdq_pkt *cmdq_pkt_create(struct cmdq_client *client, size_t size); + +/** + * cmdq_pkt_destroy() - destroy the CMDQ packet + * @pkt: the CMDQ packet + */ +void cmdq_pkt_destroy(struct cmdq_pkt *pkt); + +/** + * cmdq_pkt_write() - append write command to the CMDQ packet + * @pkt: the CMDQ packet + * @value: the specified target register value + * @subsys: the CMDQ sub system code + * @offset: register offset from CMDQ sub system + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 value, u32 subsys, u32 offset); + +/** + * cmdq_pkt_write_mask() - append write command with mask to the CMDQ packet + * @pkt: the CMDQ packet + * @value: the specified target register value + * @subsys: the CMDQ sub system code + * @offset: register offset from CMDQ sub system + * @mask: the specified target register mask + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 value, + u32 subsys, u32 offset, u32 mask); + +/** + * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet + * @pkt: the CMDQ packet + * @event: the desired event type to "wait and CLEAR" + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u32 event); + +/** + * cmdq_pkt_clear_event() - append clear event command to the CMDQ packet + * @pkt: the CMDQ packet + * @event: the desired event to be cleared + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u32 event); + +/** + * cmdq_pkt_flush_async() - trigger CMDQ to asynchronously execute the CMDQ + * packet and call back at the end of done packet + * @pkt: the CMDQ packet + * @cb: called at the end of done packet + * @data: this data will pass back to cb + * + * Return: 0 for success; else the error code is returned + * + * Trigger CMDQ to asynchronously execute the CMDQ packet and call back + * at the end of done packet. Note that this is an ASYNC function. When the + * function returned, it may or may not be finished. + */ +int cmdq_pkt_flush_async(struct cmdq_pkt *pkt, cmdq_async_flush_cb cb, + void *data); + +/** + * cmdq_pkt_flush() - trigger CMDQ to execute the CMDQ packet + * @pkt: the CMDQ packet + * + * Return: 0 for success; else the error code is returned + * + * Trigger CMDQ to execute the CMDQ packet. Note that this is a + * synchronous flush function. When the function returned, the recorded + * commands have been done. + */ +int cmdq_pkt_flush(struct cmdq_pkt *pkt); + +#endif /* __MTK_CMDQ_H__ */ -- cgit v1.2.3 From 7ed98dddb764eebf2783881a17dc4980181a6e1a Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 8 Nov 2018 15:05:21 -0600 Subject: fsi: Add On-Chip Controller (OCC) driver The OCC is a device embedded on a POWER processor that collects and aggregates sensor data from the processor and system. The OCC can provide the raw sensor data as well as perform thermal and power management on the system. This driver provides an atomic communications channel between a service processor (e.g. a BMC) and the OCC. The driver is dependent on the FSI SBEFIFO driver to get hardware access through the SBE to the OCC SRAM. Commands are issued to the SBE to send or fetch data to the SRAM. Signed-off-by: Eddie James Signed-off-by: Andrew Jeffery Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Joel Stanley Signed-off-by: Guenter Roeck --- drivers/fsi/Kconfig | 10 + drivers/fsi/Makefile | 1 + drivers/fsi/fsi-occ.c | 599 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fsi-occ.h | 25 ++ 4 files changed, 635 insertions(+) create mode 100644 drivers/fsi/fsi-occ.c create mode 100644 include/linux/fsi-occ.h (limited to 'include/linux') diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig index 99c99a5d57fe..5cc20f3c3fd6 100644 --- a/drivers/fsi/Kconfig +++ b/drivers/fsi/Kconfig @@ -65,4 +65,14 @@ config FSI_SBEFIFO a pipe-like FSI device for communicating with the self boot engine (SBE) on POWER processors. +config FSI_OCC + tristate "OCC SBEFIFO client device driver" + depends on FSI_SBEFIFO + ---help--- + This option enables an SBEFIFO based On-Chip Controller (OCC) device + driver. The OCC is a device embedded on a POWER processor that collects + and aggregates sensor data from the processor and system. The OCC can + provide the raw sensor data as well as perform thermal and power + management on the system. + endif diff --git a/drivers/fsi/Makefile b/drivers/fsi/Makefile index a50d6ce22fb3..62687ec86d2e 100644 --- a/drivers/fsi/Makefile +++ b/drivers/fsi/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_FSI_MASTER_GPIO) += fsi-master-gpio.o obj-$(CONFIG_FSI_MASTER_AST_CF) += fsi-master-ast-cf.o obj-$(CONFIG_FSI_SCOM) += fsi-scom.o obj-$(CONFIG_FSI_SBEFIFO) += fsi-sbefifo.o +obj-$(CONFIG_FSI_OCC) += fsi-occ.o diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c new file mode 100644 index 000000000000..a2301cea1cbb --- /dev/null +++ b/drivers/fsi/fsi-occ.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OCC_SRAM_BYTES 4096 +#define OCC_CMD_DATA_BYTES 4090 +#define OCC_RESP_DATA_BYTES 4089 + +#define OCC_SRAM_CMD_ADDR 0xFFFBE000 +#define OCC_SRAM_RSP_ADDR 0xFFFBF000 + +/* + * Assume we don't have much FFDC, if we do we'll overflow and + * fail the command. This needs to be big enough for simple + * commands as well. + */ +#define OCC_SBE_STATUS_WORDS 32 + +#define OCC_TIMEOUT_MS 1000 +#define OCC_CMD_IN_PRG_WAIT_MS 50 + +struct occ { + struct device *dev; + struct device *sbefifo; + char name[32]; + int idx; + struct miscdevice mdev; + struct mutex occ_lock; +}; + +#define to_occ(x) container_of((x), struct occ, mdev) + +struct occ_response { + u8 seq_no; + u8 cmd_type; + u8 return_status; + __be16 data_length; + u8 data[OCC_RESP_DATA_BYTES + 2]; /* two bytes checksum */ +} __packed; + +struct occ_client { + struct occ *occ; + struct mutex lock; + size_t data_size; + size_t read_offset; + u8 *buffer; +}; + +#define to_client(x) container_of((x), struct occ_client, xfr) + +static DEFINE_IDA(occ_ida); + +static int occ_open(struct inode *inode, struct file *file) +{ + struct occ_client *client = kzalloc(sizeof(*client), GFP_KERNEL); + struct miscdevice *mdev = file->private_data; + struct occ *occ = to_occ(mdev); + + if (!client) + return -ENOMEM; + + client->buffer = (u8 *)__get_free_page(GFP_KERNEL); + if (!client->buffer) { + kfree(client); + return -ENOMEM; + } + + client->occ = occ; + mutex_init(&client->lock); + file->private_data = client; + + /* We allocate a 1-page buffer, make sure it all fits */ + BUILD_BUG_ON((OCC_CMD_DATA_BYTES + 3) > PAGE_SIZE); + BUILD_BUG_ON((OCC_RESP_DATA_BYTES + 7) > PAGE_SIZE); + + return 0; +} + +static ssize_t occ_read(struct file *file, char __user *buf, size_t len, + loff_t *offset) +{ + struct occ_client *client = file->private_data; + ssize_t rc = 0; + + if (!client) + return -ENODEV; + + if (len > OCC_SRAM_BYTES) + return -EINVAL; + + mutex_lock(&client->lock); + + /* This should not be possible ... */ + if (WARN_ON_ONCE(client->read_offset > client->data_size)) { + rc = -EIO; + goto done; + } + + /* Grab how much data we have to read */ + rc = min(len, client->data_size - client->read_offset); + if (copy_to_user(buf, client->buffer + client->read_offset, rc)) + rc = -EFAULT; + else + client->read_offset += rc; + + done: + mutex_unlock(&client->lock); + + return rc; +} + +static ssize_t occ_write(struct file *file, const char __user *buf, + size_t len, loff_t *offset) +{ + struct occ_client *client = file->private_data; + size_t rlen, data_length; + u16 checksum = 0; + ssize_t rc, i; + u8 *cmd; + + if (!client) + return -ENODEV; + + if (len > (OCC_CMD_DATA_BYTES + 3) || len < 3) + return -EINVAL; + + mutex_lock(&client->lock); + + /* Construct the command */ + cmd = client->buffer; + + /* Sequence number (we could increment and compare with response) */ + cmd[0] = 1; + + /* + * Copy the user command (assume user data follows the occ command + * format) + * byte 0: command type + * bytes 1-2: data length (msb first) + * bytes 3-n: data + */ + if (copy_from_user(&cmd[1], buf, len)) { + rc = -EFAULT; + goto done; + } + + /* Extract data length */ + data_length = (cmd[2] << 8) + cmd[3]; + if (data_length > OCC_CMD_DATA_BYTES) { + rc = -EINVAL; + goto done; + } + + /* Calculate checksum */ + for (i = 0; i < data_length + 4; ++i) + checksum += cmd[i]; + + cmd[data_length + 4] = checksum >> 8; + cmd[data_length + 5] = checksum & 0xFF; + + /* Submit command */ + rlen = PAGE_SIZE; + rc = fsi_occ_submit(client->occ->dev, cmd, data_length + 6, cmd, + &rlen); + if (rc) + goto done; + + /* Set read tracking data */ + client->data_size = rlen; + client->read_offset = 0; + + /* Done */ + rc = len; + + done: + mutex_unlock(&client->lock); + + return rc; +} + +static int occ_release(struct inode *inode, struct file *file) +{ + struct occ_client *client = file->private_data; + + free_page((unsigned long)client->buffer); + kfree(client); + + return 0; +} + +static const struct file_operations occ_fops = { + .owner = THIS_MODULE, + .open = occ_open, + .read = occ_read, + .write = occ_write, + .release = occ_release, +}; + +static int occ_verify_checksum(struct occ_response *resp, u16 data_length) +{ + /* Fetch the two bytes after the data for the checksum. */ + u16 checksum_resp = get_unaligned_be16(&resp->data[data_length]); + u16 checksum; + u16 i; + + checksum = resp->seq_no; + checksum += resp->cmd_type; + checksum += resp->return_status; + checksum += (data_length >> 8) + (data_length & 0xFF); + + for (i = 0; i < data_length; ++i) + checksum += resp->data[i]; + + if (checksum != checksum_resp) + return -EBADMSG; + + return 0; +} + +static int occ_getsram(struct occ *occ, u32 address, void *data, ssize_t len) +{ + u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ + size_t resp_len, resp_data_len; + __be32 *resp, cmd[5]; + int rc; + + /* + * Magic sequence to do SBE getsram command. SBE will fetch data from + * specified SRAM address. + */ + cmd[0] = cpu_to_be32(0x5); + cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_OCC_SRAM); + cmd[2] = cpu_to_be32(1); + cmd[3] = cpu_to_be32(address); + cmd[4] = cpu_to_be32(data_len); + + resp_len = (data_len >> 2) + OCC_SBE_STATUS_WORDS; + resp = kzalloc(resp_len << 2, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + rc = sbefifo_submit(occ->sbefifo, cmd, 5, resp, &resp_len); + if (rc) + goto free; + + rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_GET_OCC_SRAM, + resp, resp_len, &resp_len); + if (rc) + goto free; + + resp_data_len = be32_to_cpu(resp[resp_len - 1]); + if (resp_data_len != data_len) { + dev_err(occ->dev, "SRAM read expected %d bytes got %zd\n", + data_len, resp_data_len); + rc = -EBADMSG; + } else { + memcpy(data, resp, len); + } + +free: + /* Convert positive SBEI status */ + if (rc > 0) { + dev_err(occ->dev, "SRAM read returned failure status: %08x\n", + rc); + rc = -EBADMSG; + } + + kfree(resp); + return rc; +} + +static int occ_putsram(struct occ *occ, u32 address, const void *data, + ssize_t len) +{ + size_t cmd_len, buf_len, resp_len, resp_data_len; + u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ + __be32 *buf; + int rc; + + /* + * We use the same buffer for command and response, make + * sure it's big enough + */ + resp_len = OCC_SBE_STATUS_WORDS; + cmd_len = (data_len >> 2) + 5; + buf_len = max(cmd_len, resp_len); + buf = kzalloc(buf_len << 2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* + * Magic sequence to do SBE putsram command. SBE will transfer + * data to specified SRAM address. + */ + buf[0] = cpu_to_be32(cmd_len); + buf[1] = cpu_to_be32(SBEFIFO_CMD_PUT_OCC_SRAM); + buf[2] = cpu_to_be32(1); + buf[3] = cpu_to_be32(address); + buf[4] = cpu_to_be32(data_len); + + memcpy(&buf[5], data, len); + + rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len); + if (rc) + goto free; + + rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM, + buf, resp_len, &resp_len); + if (rc) + goto free; + + if (resp_len != 1) { + dev_err(occ->dev, "SRAM write response length invalid: %zd\n", + resp_len); + rc = -EBADMSG; + } else { + resp_data_len = be32_to_cpu(buf[0]); + if (resp_data_len != data_len) { + dev_err(occ->dev, + "SRAM write expected %d bytes got %zd\n", + data_len, resp_data_len); + rc = -EBADMSG; + } + } + +free: + /* Convert positive SBEI status */ + if (rc > 0) { + dev_err(occ->dev, "SRAM write returned failure status: %08x\n", + rc); + rc = -EBADMSG; + } + + kfree(buf); + return rc; +} + +static int occ_trigger_attn(struct occ *occ) +{ + __be32 buf[OCC_SBE_STATUS_WORDS]; + size_t resp_len, resp_data_len; + int rc; + + BUILD_BUG_ON(OCC_SBE_STATUS_WORDS < 7); + resp_len = OCC_SBE_STATUS_WORDS; + + buf[0] = cpu_to_be32(0x5 + 0x2); /* Chip-op length in words */ + buf[1] = cpu_to_be32(SBEFIFO_CMD_PUT_OCC_SRAM); + buf[2] = cpu_to_be32(0x3); /* Mode: Circular */ + buf[3] = cpu_to_be32(0x0); /* Address: ignore in mode 3 */ + buf[4] = cpu_to_be32(0x8); /* Data length in bytes */ + buf[5] = cpu_to_be32(0x20010000); /* Trigger OCC attention */ + buf[6] = 0; + + rc = sbefifo_submit(occ->sbefifo, buf, 7, buf, &resp_len); + if (rc) + goto error; + + rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM, + buf, resp_len, &resp_len); + if (rc) + goto error; + + if (resp_len != 1) { + dev_err(occ->dev, "SRAM attn response length invalid: %zd\n", + resp_len); + rc = -EBADMSG; + } else { + resp_data_len = be32_to_cpu(buf[0]); + if (resp_data_len != 8) { + dev_err(occ->dev, + "SRAM attn expected 8 bytes got %zd\n", + resp_data_len); + rc = -EBADMSG; + } + } + + error: + /* Convert positive SBEI status */ + if (rc > 0) { + dev_err(occ->dev, "SRAM attn returned failure status: %08x\n", + rc); + rc = -EBADMSG; + } + + return rc; +} + +int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, + void *response, size_t *resp_len) +{ + const unsigned long timeout = msecs_to_jiffies(OCC_TIMEOUT_MS); + const unsigned long wait_time = + msecs_to_jiffies(OCC_CMD_IN_PRG_WAIT_MS); + struct occ *occ = dev_get_drvdata(dev); + struct occ_response *resp = response; + u16 resp_data_length; + unsigned long start; + int rc; + + if (!occ) + return -ENODEV; + + if (*resp_len < 7) { + dev_dbg(dev, "Bad resplen %zd\n", *resp_len); + return -EINVAL; + } + + mutex_lock(&occ->occ_lock); + + rc = occ_putsram(occ, OCC_SRAM_CMD_ADDR, request, req_len); + if (rc) + goto done; + + rc = occ_trigger_attn(occ); + if (rc) + goto done; + + /* Read occ response header */ + start = jiffies; + do { + rc = occ_getsram(occ, OCC_SRAM_RSP_ADDR, resp, 8); + if (rc) + goto done; + + if (resp->return_status == OCC_RESP_CMD_IN_PRG) { + rc = -ETIMEDOUT; + + if (time_after(jiffies, start + timeout)) + break; + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(wait_time); + } + } while (rc); + + /* Extract size of response data */ + resp_data_length = get_unaligned_be16(&resp->data_length); + + /* Message size is data length + 5 bytes header + 2 bytes checksum */ + if ((resp_data_length + 7) > *resp_len) { + rc = -EMSGSIZE; + goto done; + } + + dev_dbg(dev, "resp_status=%02x resp_data_len=%d\n", + resp->return_status, resp_data_length); + + /* Grab the rest */ + if (resp_data_length > 1) { + /* already got 3 bytes resp, also need 2 bytes checksum */ + rc = occ_getsram(occ, OCC_SRAM_RSP_ADDR + 8, + &resp->data[3], resp_data_length - 1); + if (rc) + goto done; + } + + *resp_len = resp_data_length + 7; + rc = occ_verify_checksum(resp, resp_data_length); + + done: + mutex_unlock(&occ->occ_lock); + + return rc; +} +EXPORT_SYMBOL_GPL(fsi_occ_submit); + +static int occ_unregister_child(struct device *dev, void *data) +{ + struct platform_device *hwmon_dev = to_platform_device(dev); + + platform_device_unregister(hwmon_dev); + + return 0; +} + +static int occ_probe(struct platform_device *pdev) +{ + int rc; + u32 reg; + struct occ *occ; + struct platform_device *hwmon_dev; + struct device *dev = &pdev->dev; + struct platform_device_info hwmon_dev_info = { + .parent = dev, + .name = "occ-hwmon", + }; + + occ = devm_kzalloc(dev, sizeof(*occ), GFP_KERNEL); + if (!occ) + return -ENOMEM; + + occ->dev = dev; + occ->sbefifo = dev->parent; + mutex_init(&occ->occ_lock); + + if (dev->of_node) { + rc = of_property_read_u32(dev->of_node, "reg", ®); + if (!rc) { + /* make sure we don't have a duplicate from dts */ + occ->idx = ida_simple_get(&occ_ida, reg, reg + 1, + GFP_KERNEL); + if (occ->idx < 0) + occ->idx = ida_simple_get(&occ_ida, 1, INT_MAX, + GFP_KERNEL); + } else { + occ->idx = ida_simple_get(&occ_ida, 1, INT_MAX, + GFP_KERNEL); + } + } else { + occ->idx = ida_simple_get(&occ_ida, 1, INT_MAX, GFP_KERNEL); + } + + platform_set_drvdata(pdev, occ); + + snprintf(occ->name, sizeof(occ->name), "occ%d", occ->idx); + occ->mdev.fops = &occ_fops; + occ->mdev.minor = MISC_DYNAMIC_MINOR; + occ->mdev.name = occ->name; + occ->mdev.parent = dev; + + rc = misc_register(&occ->mdev); + if (rc) { + dev_err(dev, "failed to register miscdevice: %d\n", rc); + ida_simple_remove(&occ_ida, occ->idx); + return rc; + } + + hwmon_dev_info.id = occ->idx; + hwmon_dev = platform_device_register_full(&hwmon_dev_info); + if (!hwmon_dev) + dev_warn(dev, "failed to create hwmon device\n"); + + return 0; +} + +static int occ_remove(struct platform_device *pdev) +{ + struct occ *occ = platform_get_drvdata(pdev); + + misc_deregister(&occ->mdev); + + device_for_each_child(&pdev->dev, NULL, occ_unregister_child); + + ida_simple_remove(&occ_ida, occ->idx); + + return 0; +} + +static const struct of_device_id occ_match[] = { + { .compatible = "ibm,p9-occ" }, + { }, +}; + +static struct platform_driver occ_driver = { + .driver = { + .name = "occ", + .of_match_table = occ_match, + }, + .probe = occ_probe, + .remove = occ_remove, +}; + +static int occ_init(void) +{ + return platform_driver_register(&occ_driver); +} + +static void occ_exit(void) +{ + platform_driver_unregister(&occ_driver); + + ida_destroy(&occ_ida); +} + +module_init(occ_init); +module_exit(occ_exit); + +MODULE_AUTHOR("Eddie James "); +MODULE_DESCRIPTION("BMC P9 OCC driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/fsi-occ.h b/include/linux/fsi-occ.h new file mode 100644 index 000000000000..d4cdc2aa6e33 --- /dev/null +++ b/include/linux/fsi-occ.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef LINUX_FSI_OCC_H +#define LINUX_FSI_OCC_H + +struct device; + +#define OCC_RESP_CMD_IN_PRG 0xFF +#define OCC_RESP_SUCCESS 0 +#define OCC_RESP_CMD_INVAL 0x11 +#define OCC_RESP_CMD_LEN_INVAL 0x12 +#define OCC_RESP_DATA_INVAL 0x13 +#define OCC_RESP_CHKSUM_ERR 0x14 +#define OCC_RESP_INT_ERR 0x15 +#define OCC_RESP_BAD_STATE 0x16 +#define OCC_RESP_CRIT_EXCEPT 0xE0 +#define OCC_RESP_CRIT_INIT 0xE1 +#define OCC_RESP_CRIT_WATCHDOG 0xE2 +#define OCC_RESP_CRIT_OCB 0xE3 +#define OCC_RESP_CRIT_HW 0xE4 + +int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, + void *response, size_t *resp_len); + +#endif /* LINUX_FSI_OCC_H */ -- cgit v1.2.3 From dfcb245e28481256a10a9133441baf2a93d26642 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Dec 2018 10:05:56 +0100 Subject: sched: Fix various typos in comments Go over the scheduler source code and fix common typos in comments - and a typo in an actual variable name. No change in functionality intended. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- include/linux/sched/isolation.h | 4 ++-- include/linux/sched/mm.h | 2 +- include/linux/sched/stat.h | 2 +- kernel/sched/core.c | 2 +- kernel/sched/cputime.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 8 ++++---- kernel/sched/isolation.c | 14 +++++++------- kernel/sched/sched.h | 4 ++-- 10 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 291a9bd5b97f..b8c7ba0e3796 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,7 +176,7 @@ struct task_group; * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * * However, with slightly different timing the wakeup TASK_RUNNING store can - * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not + * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not * a problem either because that will result in one extra go around the loop * and our @cond test will save the day. * @@ -515,7 +515,7 @@ struct sched_dl_entity { /* * Actual scheduling parameters. Initialized with the values above, - * they are continously updated during task execution. Note that + * they are continuously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ s64 runtime; /* Remaining runtime for this instance */ diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 4a6582c27dea..b0fb1446fe04 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -16,7 +16,7 @@ enum hk_flags { }; #ifdef CONFIG_CPU_ISOLATION -DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); +DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); extern int housekeeping_any_cpu(enum hk_flags flags); extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); @@ -43,7 +43,7 @@ static inline void housekeeping_init(void) { } static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) { #ifdef CONFIG_CPU_ISOLATION - if (static_branch_unlikely(&housekeeping_overriden)) + if (static_branch_unlikely(&housekeeping_overridden)) return housekeeping_test_cpu(cpu, flags); #endif return true; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index aebb370a0006..3bfa6a0cbba4 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -153,7 +153,7 @@ static inline gfp_t current_gfp_context(gfp_t flags) { /* * NOIO implies both NOIO and NOFS and it is a weaker context - * so always make sure it makes precendence + * so always make sure it makes precedence */ if (unlikely(current->flags & PF_MEMALLOC_NOIO)) flags &= ~(__GFP_IO | __GFP_FS); diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h index f30954cc059d..568286411b43 100644 --- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -8,7 +8,7 @@ * Various counters maintained by the scheduler and fork(), * exposed via /proc, sys.c or used by drivers via these APIs. * - * ( Note that all these values are aquired without locking, + * ( Note that all these values are acquired without locking, * so they can only be relied on in narrow circumstances. ) */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8050f266751a..e4ca15d75541 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2857,7 +2857,7 @@ unsigned long nr_running(void) * preemption, thus the result might have a time-of-check-to-time-of-use * race. The caller is responsible to use it correctly, for example: * - * - from a non-preemptable section (of course) + * - from a non-preemptible section (of course) * * - from a thread that is bound to a single CPU * diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 0796f938c4f0..ba4a143bdcf3 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -525,7 +525,7 @@ void account_idle_ticks(unsigned long ticks) /* * Perform (stime * rtime) / total, but avoid multiplication overflow by - * loosing precision when the numbers are big. + * losing precision when the numbers are big. */ static u64 scale_stime(u64 stime, u64 rtime, u64 total) { diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 470ba6b464fe..b32bc1f7cd14 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -727,7 +727,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * refill the runtime and set the deadline a period in the future, * because keeping the current (absolute) deadline of the task would * result in breaking guarantees promised to other tasks (refer to - * Documentation/scheduler/sched-deadline.txt for more informations). + * Documentation/scheduler/sched-deadline.txt for more information). * * This function returns true if: * diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e30dea59d215..fdc8356ea742 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -703,9 +703,9 @@ void init_entity_runnable_average(struct sched_entity *se) memset(sa, 0, sizeof(*sa)); /* - * Tasks are intialized with full load to be seen as heavy tasks until + * Tasks are initialized with full load to be seen as heavy tasks until * they get a chance to stabilize to their real load level. - * Group entities are intialized with zero load to reflect the fact that + * Group entities are initialized with zero load to reflect the fact that * nothing has been attached to the task group yet. */ if (entity_is_task(se)) @@ -3976,8 +3976,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) /* * When dequeuing a sched_entity, we must: * - Update loads to have both entity and cfs_rq synced with now. - * - Substract its load from the cfs_rq->runnable_avg. - * - Substract its previous weight from cfs_rq->load.weight. + * - Subtract its load from the cfs_rq->runnable_avg. + * - Subtract its previous weight from cfs_rq->load.weight. * - For group entity, update its weight to reflect the new share * of its group cfs_rq. */ diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index e6802181900f..81faddba9e20 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -8,14 +8,14 @@ */ #include "sched.h" -DEFINE_STATIC_KEY_FALSE(housekeeping_overriden); -EXPORT_SYMBOL_GPL(housekeeping_overriden); +DEFINE_STATIC_KEY_FALSE(housekeeping_overridden); +EXPORT_SYMBOL_GPL(housekeeping_overridden); static cpumask_var_t housekeeping_mask; static unsigned int housekeeping_flags; int housekeeping_any_cpu(enum hk_flags flags) { - if (static_branch_unlikely(&housekeeping_overriden)) + if (static_branch_unlikely(&housekeeping_overridden)) if (housekeeping_flags & flags) return cpumask_any_and(housekeeping_mask, cpu_online_mask); return smp_processor_id(); @@ -24,7 +24,7 @@ EXPORT_SYMBOL_GPL(housekeeping_any_cpu); const struct cpumask *housekeeping_cpumask(enum hk_flags flags) { - if (static_branch_unlikely(&housekeeping_overriden)) + if (static_branch_unlikely(&housekeeping_overridden)) if (housekeeping_flags & flags) return housekeeping_mask; return cpu_possible_mask; @@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(housekeeping_cpumask); void housekeeping_affine(struct task_struct *t, enum hk_flags flags) { - if (static_branch_unlikely(&housekeeping_overriden)) + if (static_branch_unlikely(&housekeeping_overridden)) if (housekeeping_flags & flags) set_cpus_allowed_ptr(t, housekeeping_mask); } @@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(housekeeping_affine); bool housekeeping_test_cpu(int cpu, enum hk_flags flags) { - if (static_branch_unlikely(&housekeeping_overriden)) + if (static_branch_unlikely(&housekeeping_overridden)) if (housekeeping_flags & flags) return cpumask_test_cpu(cpu, housekeeping_mask); return true; @@ -53,7 +53,7 @@ void __init housekeeping_init(void) if (!housekeeping_flags) return; - static_branch_enable(&housekeeping_overriden); + static_branch_enable(&housekeeping_overridden); if (housekeeping_flags & HK_FLAG_TICK) sched_tick_offload_init(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 71cd8b710599..9bde60a11805 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -637,7 +637,7 @@ struct dl_rq { /* * Deadline values of the currently executing and the * earliest ready task on this rq. Caching these facilitates - * the decision wether or not a ready but not running task + * the decision whether or not a ready but not running task * should migrate somewhere else. */ struct { @@ -1434,7 +1434,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) #ifdef CONFIG_SMP /* * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be - * successfuly executed on another CPU. We must ensure that updates of + * successfully executed on another CPU. We must ensure that updates of * per-task data have been completed by this moment. */ smp_wmb(); -- cgit v1.2.3 From 4b3ab9372ffa569827c8f7b7ffc7b69ba544a3bd Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Mon, 3 Dec 2018 13:31:18 +0530 Subject: iio: adc: ti_am335x_tscadc: Improve accuracy of measurement When performing single ended measurements with TSCADC, its recommended to set negative input (SEL_INM_SWC_3_0) of ADC step to ADC's VREFN in the corresponding STEP_CONFIGx register. Also, the positive(SEL_RFP_SWC_2_0) and negative(SEL_RFM_SWC_1_0) reference voltage for ADC step needs to be set to VREFP and VREFN respectively in STEP_CONFIGx register. Without these changes, there may be variation of as much as ~2% in the ADC's digital output which is bad for precise measurement. Signed-off-by: Vignesh R Acked-by: Jonathan Cameron Signed-off-by: Lee Jones --- drivers/iio/adc/ti_am335x_adc.c | 5 ++++- include/linux/mfd/ti_am335x_tscadc.h | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index cafb1dcadc48..9d984f2a8ba7 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -142,7 +142,10 @@ static void tiadc_step_config(struct iio_dev *indio_dev) stepconfig |= STEPCONFIG_MODE_SWCNT; tiadc_writel(adc_dev, REG_STEPCONFIG(steps), - stepconfig | STEPCONFIG_INP(chan)); + stepconfig | STEPCONFIG_INP(chan) | + STEPCONFIG_INM_ADCREFM | + STEPCONFIG_RFP_VREFP | + STEPCONFIG_RFM_VREFN); if (adc_dev->open_delay[i] > STEPDELAY_OPEN_MASK) { dev_warn(dev, "chan %d open delay truncating to 0x3FFFF\n", diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index b9a53e013bff..483168403ae5 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -78,6 +78,8 @@ #define STEPCONFIG_YNN BIT(8) #define STEPCONFIG_XNP BIT(9) #define STEPCONFIG_YPN BIT(10) +#define STEPCONFIG_RFP(val) ((val) << 12) +#define STEPCONFIG_RFP_VREFP (0x3 << 12) #define STEPCONFIG_INM_MASK (0xF << 15) #define STEPCONFIG_INM(val) ((val) << 15) #define STEPCONFIG_INM_ADCREFM STEPCONFIG_INM(8) @@ -86,6 +88,8 @@ #define STEPCONFIG_INP_AN4 STEPCONFIG_INP(4) #define STEPCONFIG_INP_ADCREFM STEPCONFIG_INP(8) #define STEPCONFIG_FIFO1 BIT(26) +#define STEPCONFIG_RFM(val) ((val) << 23) +#define STEPCONFIG_RFM_VREFN (0x3 << 23) /* Delay register */ #define STEPDELAY_OPEN_MASK (0x3FFFF << 0) -- cgit v1.2.3 From 9ee4685c9ac591b71af755657c3f6ce428ebcca4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 4 Oct 2018 17:37:49 +0300 Subject: sysfs: constify sysfs create/remove files harder Let the passed in array be const (and thus placed in rodata) instead of a mutable array of const pointers. Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Reviewed-by: Rafael J. Wysocki Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20181004143750.30880-1-jani.nikula@intel.com --- fs/sysfs/file.c | 4 ++-- include/linux/sysfs.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 0a7252aecfa5..bb71db63c99c 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -334,7 +334,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, } EXPORT_SYMBOL_GPL(sysfs_create_file_ns); -int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr) +int sysfs_create_files(struct kobject *kobj, const struct attribute * const *ptr) { int err = 0; int i; @@ -493,7 +493,7 @@ bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) return ret; } -void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) +void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *ptr) { int i; for (i = 0; ptr[i]; i++) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 987cefa337de..786816cf4aa5 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -234,7 +234,7 @@ int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); int __must_check sysfs_create_files(struct kobject *kobj, - const struct attribute **attr); + const struct attribute * const *attr); int __must_check sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, umode_t mode); struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, @@ -243,7 +243,7 @@ void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); -void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr); +void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr); int __must_check sysfs_create_bin_file(struct kobject *kobj, const struct bin_attribute *attr); @@ -342,7 +342,7 @@ static inline int sysfs_create_file_ns(struct kobject *kobj, } static inline int sysfs_create_files(struct kobject *kobj, - const struct attribute **attr) + const struct attribute * const *attr) { return 0; } @@ -377,7 +377,7 @@ static inline bool sysfs_remove_file_self(struct kobject *kobj, } static inline void sysfs_remove_files(struct kobject *kobj, - const struct attribute **attr) + const struct attribute * const *attr) { } -- cgit v1.2.3 From 078dec3326e2244c62e8a8d970ba24359e3464be Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 3 Dec 2018 13:36:14 +0100 Subject: dma-buf: add dma_fence_get_stub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract of useful code from the timeline work. This provides a function to return a stub or dummy fence which is always signaled. Signed-off-by: Christian König Reviewed-by: Chris Wilson Reviewed-by: Chunming Zhou Link: https://patchwork.freedesktop.org/patch/265248/ --- drivers/dma-buf/dma-fence.c | 36 +++++++++++++++++++++++++++++++++++- include/linux/dma-fence.h | 1 + 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 1551ca7df394..136ec04d683f 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -30,13 +30,16 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); +static DEFINE_SPINLOCK(dma_fence_stub_lock); +static struct dma_fence dma_fence_stub; + /* * fence context counter: each execution context should have its own * fence context, this allows checking if fences belong to the same * context or not. One device can have multiple separate contexts, * and they're used if some engine can run independently of another. */ -static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0); +static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); /** * DOC: DMA fences overview @@ -68,6 +71,37 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0); * &dma_buf.resv pointer. */ +static const char *dma_fence_stub_get_name(struct dma_fence *fence) +{ + return "stub"; +} + +static const struct dma_fence_ops dma_fence_stub_ops = { + .get_driver_name = dma_fence_stub_get_name, + .get_timeline_name = dma_fence_stub_get_name, +}; + +/** + * dma_fence_get_stub - return a signaled fence + * + * Return a stub fence which is already signaled. + */ +struct dma_fence *dma_fence_get_stub(void) +{ + spin_lock(&dma_fence_stub_lock); + if (!dma_fence_stub.ops) { + dma_fence_init(&dma_fence_stub, + &dma_fence_stub_ops, + &dma_fence_stub_lock, + 0, 0); + dma_fence_signal_locked(&dma_fence_stub); + } + spin_unlock(&dma_fence_stub_lock); + + return dma_fence_get(&dma_fence_stub); +} +EXPORT_SYMBOL(dma_fence_get_stub); + /** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 02dba8cd033d..999e4b104410 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -541,6 +541,7 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr) return ret < 0 ? ret : 0; } +struct dma_fence *dma_fence_get_stub(void); u64 dma_fence_context_alloc(unsigned num); #define DMA_FENCE_TRACE(f, fmt, args...) \ -- cgit v1.2.3 From 6b03061f882de49b83ccf44beb3a12c920a2da1b Mon Sep 17 00:00:00 2001 From: Yogesh Narayan Gaur Date: Mon, 3 Dec 2018 08:39:06 +0000 Subject: spi: add support for octal mode I/O data transfer Add flags for Octal mode I/O data transfer Required for the SPI controller which can do the data transfer (TX/RX) on 8 data lines e.g. NXP FlexSPI controller. SPI_TX_OCTAL: transmit with 8 wires SPI_RX_OCTAL: receive with 8 wires Signed-off-by: Yogesh Gaur Reviewed-by: Boris Brezillon Signed-off-by: Mark Brown --- drivers/spi/spi.c | 12 ++++++++++-- include/linux/spi/spi.h | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index b6fd8ea8ac0d..18ebc400249c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1633,6 +1633,9 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, case 4: spi->mode |= SPI_TX_QUAD; break; + case 8: + spi->mode |= SPI_TX_OCTAL; + break; default: dev_warn(&ctlr->dev, "spi-tx-bus-width %d not supported\n", @@ -1651,6 +1654,9 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, case 4: spi->mode |= SPI_RX_QUAD; break; + case 8: + spi->mode |= SPI_RX_OCTAL; + break; default: dev_warn(&ctlr->dev, "spi-rx-bus-width %d not supported\n", @@ -2839,7 +2845,8 @@ int spi_setup(struct spi_device *spi) /* if it is SPI_3WIRE mode, DUAL and QUAD should be forbidden */ if ((spi->mode & SPI_3WIRE) && (spi->mode & - (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD))) + (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL | + SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL))) return -EINVAL; /* help drivers fail *cleanly* when they need options * that aren't supported with their current controller @@ -2848,7 +2855,8 @@ int spi_setup(struct spi_device *spi) */ bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD); ugly_bits = bad_bits & - (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD); + (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL | + SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL); if (ugly_bits) { dev_warn(&spi->dev, "setup: ignoring unsupported mode bits %x\n", diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6be77fa5ab90..0c1ca5dedbb4 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -154,7 +154,9 @@ struct spi_device { #define SPI_TX_QUAD 0x200 /* transmit with 4 wires */ #define SPI_RX_DUAL 0x400 /* receive with 2 wires */ #define SPI_RX_QUAD 0x800 /* receive with 4 wires */ -#define SPI_CS_WORD 0x1000 /* toggle cs after each word */ +#define SPI_CS_WORD 0x1000 /* toggle cs after each word */ +#define SPI_TX_OCTAL 0x2000 /* transmit with 8 wires */ +#define SPI_RX_OCTAL 0x4000 /* receive with 8 wires */ int irq; void *controller_state; void *controller_data; -- cgit v1.2.3 From e983da27f70e8d29f4ae7262d52e4d07129498f3 Mon Sep 17 00:00:00 2001 From: "A.s. Dong" Date: Wed, 14 Nov 2018 13:01:39 +0000 Subject: clk: fractional-divider: add CLK_FRAC_DIVIDER_ZERO_BASED flag support Adding CLK_FRAC_DIVIDER_ZERO_BASED flag to indicate the numerator and denominator value in register are start from 0. This can be used to support frac dividers like below: Divider output clock = Divider input clock x [(frac +1) / (div +1)] where frac/div in register is: 000b - Divide by 1. 001b - Divide by 2. 010b - Divide by 3. Cc: Stephen Boyd Cc: Michael Turquette Signed-off-by: Dong Aisheng Signed-off-by: Stephen Boyd --- drivers/clk/clk-fractional-divider.c | 10 ++++++++++ include/linux/clk-provider.h | 8 ++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c index fdf625fb10fa..7ccde6bd8dd5 100644 --- a/drivers/clk/clk-fractional-divider.c +++ b/drivers/clk/clk-fractional-divider.c @@ -40,6 +40,11 @@ static unsigned long clk_fd_recalc_rate(struct clk_hw *hw, m = (val & fd->mmask) >> fd->mshift; n = (val & fd->nmask) >> fd->nshift; + if (fd->flags & CLK_FRAC_DIVIDER_ZERO_BASED) { + m++; + n++; + } + if (!n || !m) return parent_rate; @@ -103,6 +108,11 @@ static int clk_fd_set_rate(struct clk_hw *hw, unsigned long rate, GENMASK(fd->mwidth - 1, 0), GENMASK(fd->nwidth - 1, 0), &m, &n); + if (fd->flags & CLK_FRAC_DIVIDER_ZERO_BASED) { + m--; + n--; + } + if (fd->lock) spin_lock_irqsave(fd->lock, flags); else diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 60c51871b04b..fa0bad94f26b 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -601,6 +601,12 @@ void clk_hw_unregister_fixed_factor(struct clk_hw *hw); * @lock: register lock * * Clock with adjustable fractional divider affecting its output frequency. + * + * Flags: + * CLK_FRAC_DIVIDER_ZERO_BASED - by default the numerator and denominator + * is the value read from the register. If CLK_FRAC_DIVIDER_ZERO_BASED + * is set then the numerator and denominator are both the value read + * plus one. */ struct clk_fractional_divider { struct clk_hw hw; @@ -620,6 +626,8 @@ struct clk_fractional_divider { #define to_clk_fd(_hw) container_of(_hw, struct clk_fractional_divider, hw) +#define CLK_FRAC_DIVIDER_ZERO_BASED BIT(0) + extern const struct clk_ops clk_fractional_divider_ops; struct clk *clk_register_fractional_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, -- cgit v1.2.3 From 0d5102fe85302aa06a3e5fd8e63b09294aed4c48 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 28 Nov 2018 13:45:29 +0200 Subject: i2c: acpi: Introduce i2c_acpi_get_i2c_resource() helper Besides current two users one more is coming. Definitely makes sense to introduce a helper. No functional change intended. Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Reviewed-by: Hans de Goede Acked-by: Mika Westerberg Acked-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 41 +++++++++++++++++++++++++++++------------ include/linux/acpi.h | 11 +++++++++++ 2 files changed, 40 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 8a88586e0902..272800692088 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -45,6 +45,33 @@ struct i2c_acpi_lookup { u32 min_speed; }; +/** + * i2c_acpi_get_i2c_resource - Gets I2cSerialBus resource if type matches + * @ares: ACPI resource + * @i2c: Pointer to I2cSerialBus resource will be returned here + * + * Checks if the given ACPI resource is of type I2cSerialBus. + * In this case, returns a pointer to it to the caller. + * + * Returns true if resource type is of I2cSerialBus, otherwise false. + */ +bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, + struct acpi_resource_i2c_serialbus **i2c) +{ + struct acpi_resource_i2c_serialbus *sb; + + if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) + return false; + + sb = &ares->data.i2c_serial_bus; + if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) + return false; + + *i2c = sb; + return true; +} +EXPORT_SYMBOL_GPL(i2c_acpi_get_i2c_resource); + static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data) { struct i2c_acpi_lookup *lookup = data; @@ -52,11 +79,7 @@ static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data) struct acpi_resource_i2c_serialbus *sb; acpi_status status; - if (info->addr || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) - return 1; - - sb = &ares->data.i2c_serial_bus; - if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) + if (info->addr || !i2c_acpi_get_i2c_resource(ares, &sb)) return 1; if (lookup->index != -1 && lookup->n++ != lookup->index) @@ -534,13 +557,7 @@ i2c_acpi_space_handler(u32 function, acpi_physical_address command, goto err; } - if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) { - ret = AE_BAD_PARAMETER; - goto err; - } - - sb = &ares->data.i2c_serial_bus; - if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) { + if (!value64 || !i2c_acpi_get_i2c_resource(ares, &sb)) { ret = AE_BAD_PARAMETER; goto err; } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ed80f147bd50..6afc6e3c4c5c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1054,6 +1054,17 @@ static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) } #endif +#if defined(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) +bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, + struct acpi_resource_i2c_serialbus **i2c); +#else +static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, + struct acpi_resource_i2c_serialbus **i2c) +{ + return false; +} +#endif + /* Device properties */ #ifdef CONFIG_ACPI -- cgit v1.2.3 From c2a70a319afb9e3dee16567cec4d9bf8dd358b59 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 17 Jun 2018 19:02:15 +0200 Subject: dmaengine: pxa: make the filter function internal As the pxa architecture and all its related drivers do not rely anymore on the filter function, thanks to the slave map conversion, make pxad_filter_fn() static, and remove it from the global namespace. Signed-off-by: Robert Jarzmik Acked-by: Vinod Koul --- drivers/dma/pxa_dma.c | 5 ++--- include/linux/dma/pxa-dma.h | 11 ----------- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 825725057e00..c7a328f81485 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -179,7 +179,7 @@ static unsigned int pxad_drcmr(unsigned int line) return 0x1000 + line * 4; } -bool pxad_filter_fn(struct dma_chan *chan, void *param); +static bool pxad_filter_fn(struct dma_chan *chan, void *param); /* * Debug fs @@ -1500,7 +1500,7 @@ static struct platform_driver pxad_driver = { .remove = pxad_remove, }; -bool pxad_filter_fn(struct dma_chan *chan, void *param) +static bool pxad_filter_fn(struct dma_chan *chan, void *param) { struct pxad_chan *c = to_pxad_chan(chan); struct pxad_param *p = param; @@ -1513,7 +1513,6 @@ bool pxad_filter_fn(struct dma_chan *chan, void *param) return true; } -EXPORT_SYMBOL_GPL(pxad_filter_fn); module_platform_driver(pxad_driver); diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h index 9fc594f69eff..fceb5df07097 100644 --- a/include/linux/dma/pxa-dma.h +++ b/include/linux/dma/pxa-dma.h @@ -23,15 +23,4 @@ struct pxad_param { enum pxad_chan_prio prio; }; -struct dma_chan; - -#ifdef CONFIG_PXA_DMA -bool pxad_filter_fn(struct dma_chan *chan, void *param); -#else -static inline bool pxad_filter_fn(struct dma_chan *chan, void *param) -{ - return false; -} -#endif - #endif /* _PXA_DMA_H_ */ -- cgit v1.2.3 From 82208d0d54ab85d8fedbb1c9a1960bd401a4ca1a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 30 Nov 2018 10:26:50 +1100 Subject: rhashtable: detect when object movement between tables might have invalidated a lookup Some users of rhashtables might need to move an object from one table to another - this appears to be the reason for the incomplete usage of NULLS markers. To support these, we store a unique NULLS_MARKER at the end of each chain, and when a search fails to find a match, we check if the NULLS marker found was the expected one. If not, the search may not have examined all objects in the target bucket, so it is repeated. The unique NULLS_MARKER is derived from the address of the head of the chain. As this cannot be derived at load-time the static rhnull in rht_bucket_nested() needs to be initialised at run time. Any caller of a lookup function must still be prepared for the possibility that the object returned is in a different table - it might have been there for some time. Note that this does NOT provide support for other uses of NULLS_MARKERs such as allocating with SLAB_TYPESAFE_BY_RCU or changing the key of an object and re-inserting it in the same table. These could only be done safely if new objects were inserted at the *start* of a hash chain, and that is not currently the case. Signed-off-by: NeilBrown Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 34 ++++++++++++++++++++++++++-------- lib/rhashtable.c | 8 +++++--- 2 files changed, 31 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index eb7111039247..20f9c6af7473 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -75,8 +75,19 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; +/* + * NULLS_MARKER() expects a hash value with the low + * bits mostly likely to be significant, and it discards + * the msb. + * We git it an address, in which the bottom 2 bits are + * always 0, and the msb might be significant. + * So we shift the address down one bit to align with + * expectations and avoid losing a significant bit. + */ +#define RHT_NULLS_MARKER(ptr) \ + ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1)) #define INIT_RHT_NULLS_HEAD(ptr) \ - ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) + ((ptr) = RHT_NULLS_MARKER(&(ptr))) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { @@ -471,6 +482,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; + struct rhash_head __rcu * const *head; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -478,13 +490,19 @@ static inline struct rhash_head *__rhashtable_lookup( tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); - rht_for_each_rcu(he, tbl, hash) { - if (params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, he)) : - rhashtable_compare(&arg, rht_obj(ht, he))) - continue; - return he; - } + head = rht_bucket(tbl, hash); + do { + rht_for_each_rcu_continue(he, *head, tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + return he; + } + /* An object might have been moved to a different hash chain, + * while we walk along it - better check and retry. + */ + } while (he != RHT_NULLS_MARKER(head)); /* Ensure we see any new tables. */ smp_rmb(); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 30526afa8343..852ffa5160f1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1179,8 +1179,7 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - static struct rhash_head __rcu *rhnull = - (struct rhash_head __rcu *)NULLS_MARKER(0); + static struct rhash_head __rcu *rhnull; unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; unsigned int subhash = hash; @@ -1198,8 +1197,11 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, subhash >>= shift; } - if (!ntbl) + if (!ntbl) { + if (!rhnull) + INIT_RHT_NULLS_HEAD(rhnull); return &rhnull; + } return &ntbl[subhash].bucket; -- cgit v1.2.3 From 0e839df92cf37be4adef7e661813206cd2b32d66 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 30 Nov 2018 09:20:57 +0100 Subject: net: ethernet: provide nvmem_get_mac_address() We already have of_get_nvmem_mac_address() but some non-DT systems want to read the MAC address from NVMEM too. Implement a generalized routine that takes struct device as argument. Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 1 + net/ethernet/eth.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 572e11bb8696..2c0af7b00715 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -32,6 +32,7 @@ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); +int nvmem_get_mac_address(struct device *dev, void *addrbuf); u32 eth_get_headlen(void *data, unsigned int max_len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 58933fa50bb5..4c520110b04f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -550,3 +551,40 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) return 0; } EXPORT_SYMBOL(eth_platform_get_mac_address); + +/** + * Obtain the MAC address from an nvmem cell named 'mac-address' associated + * with given device. + * + * @dev: Device with which the mac-address cell is associated. + * @addrbuf: Buffer to which the MAC address will be copied on success. + * + * Returns 0 on success or a negative error number on failure. + */ +int nvmem_get_mac_address(struct device *dev, void *addrbuf) +{ + struct nvmem_cell *cell; + const void *mac; + size_t len; + + cell = nvmem_cell_get(dev, "mac-address"); + if (IS_ERR(cell)) + return PTR_ERR(cell); + + mac = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(mac)) + return PTR_ERR(mac); + + if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { + kfree(mac); + return -EINVAL; + } + + ether_addr_copy(addrbuf, mac); + kfree(mac); + + return 0; +} +EXPORT_SYMBOL(nvmem_get_mac_address); -- cgit v1.2.3 From afa64a72b862a7a9d04f8d07fba632eaf06b23f8 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 30 Nov 2018 09:20:59 +0100 Subject: of: net: kill of_get_nvmem_mac_address() We've switched all users to nvmem_get_mac_address(). Remove the now dead code. Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- drivers/of/of_net.c | 39 --------------------------------------- include/linux/of_net.h | 6 ------ 2 files changed, 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c index 53189d4022a6..810ab0fbcccb 100644 --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c @@ -81,42 +81,3 @@ const void *of_get_mac_address(struct device_node *np) return of_get_mac_addr(np, "address"); } EXPORT_SYMBOL(of_get_mac_address); - -/** - * Obtain the MAC address from an nvmem provider named 'mac-address' through - * device tree. - * On success, copies the new address into memory pointed to by addr and - * returns 0. Returns a negative error code otherwise. - * @np: Device tree node containing the nvmem-cells phandle - * @addr: Pointer to receive the MAC address using ether_addr_copy() - */ -int of_get_nvmem_mac_address(struct device_node *np, void *addr) -{ - struct nvmem_cell *cell; - const void *mac; - size_t len; - int ret; - - cell = of_nvmem_cell_get(np, "mac-address"); - if (IS_ERR(cell)) - return PTR_ERR(cell); - - mac = nvmem_cell_read(cell, &len); - - nvmem_cell_put(cell); - - if (IS_ERR(mac)) - return PTR_ERR(mac); - - if (len < ETH_ALEN || !is_valid_ether_addr(mac)) { - ret = -EINVAL; - } else { - ether_addr_copy(addr, mac); - ret = 0; - } - - kfree(mac); - - return ret; -} -EXPORT_SYMBOL(of_get_nvmem_mac_address); diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 90d81ee9e6a0..9cd72aab76fe 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -13,7 +13,6 @@ struct net_device; extern int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); -extern int of_get_nvmem_mac_address(struct device_node *np, void *addr); extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else static inline int of_get_phy_mode(struct device_node *np) @@ -26,11 +25,6 @@ static inline const void *of_get_mac_address(struct device_node *np) return NULL; } -static inline int of_get_nvmem_mac_address(struct device_node *np, void *addr) -{ - return -ENODEV; -} - static inline struct net_device *of_find_net_device_by_node(struct device_node *np) { return NULL; -- cgit v1.2.3 From b5947e5d1e710c35ea281247bd27e6975250285c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 30 Nov 2018 15:32:39 -0500 Subject: udp: msg_zerocopy Extend zerocopy to udp sockets. Allow setting sockopt SO_ZEROCOPY and interpret flag MSG_ZEROCOPY. This patch was previously part of the zerocopy RFC patchsets. Zerocopy is not effective at small MTU. With segmentation offload building larger datagrams, the benefit of page flipping outweights the cost of generating a completion notification. tools/testing/selftests/net/msg_zerocopy.sh after applying follow-on test patch and making skb_orphan_frags_rx same as skb_orphan_frags: ipv4 udp -t 1 tx=191312 (11938 MB) txc=0 zc=n rx=191312 (11938 MB) ipv4 udp -z -t 1 tx=304507 (19002 MB) txc=304507 zc=y rx=304507 (19002 MB) ok ipv6 udp -t 1 tx=174485 (10888 MB) txc=0 zc=n rx=174485 (10888 MB) ipv6 udp -z -t 1 tx=294801 (18396 MB) txc=294801 zc=y rx=294801 (18396 MB) ok Changes v1 -> v2 - Fixup reverse christmas tree violation v2 -> v3 - Split refcount avoidance optimization into separate patch - Fix refcount leak on error in fragmented case (thanks to Paolo Abeni for pointing this one out!) - Fix refcount inc on zero - Test sock_flag SOCK_ZEROCOPY directly in __ip_append_data. This is needed since commit 5cf4a8532c99 ("tcp: really ignore MSG_ZEROCOPY if no SO_ZEROCOPY") did the same for tcp. Signed-off-by: Willem de Bruijn Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 6 ++++++ net/core/sock.c | 5 ++++- net/ipv4/ip_output.c | 23 ++++++++++++++++++++++- net/ipv6/ip6_output.c | 23 ++++++++++++++++++++++- 5 files changed, 55 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 73902acf2b71..04f52e719571 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -485,6 +485,7 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg); void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); +int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3c814565ed7c..1350901c5cb8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1105,6 +1105,12 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length); +int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) +{ + return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); +} +EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); + int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) diff --git a/net/core/sock.c b/net/core/sock.c index 6d7e189e3cd9..f5bb89785e47 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1018,7 +1018,10 @@ set_rcvbuf: case SO_ZEROCOPY: if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { - if (sk->sk_protocol != IPPROTO_TCP) + if (!((sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP) || + (sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP))) ret = -ENOTSUPP; } else if (sk->sk_family != PF_RDS) { ret = -ENOTSUPP; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5dbec21856f4..6f843aff628c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -867,6 +867,7 @@ static int __ip_append_data(struct sock *sk, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); + struct ubuf_info *uarg = NULL; struct sk_buff *skb; struct ip_options *opt = cork->opt; @@ -916,6 +917,19 @@ static int __ip_append_data(struct sock *sk, (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM))) csummode = CHECKSUM_PARTIAL; + if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { + uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); + if (!uarg) + return -ENOBUFS; + if (rt->dst.dev->features & NETIF_F_SG && + csummode == CHECKSUM_PARTIAL) { + paged = true; + } else { + uarg->zerocopy = 0; + skb_zcopy_set(skb, uarg); + } + } + cork->length += length; /* So, what's going on in the loop below? @@ -1006,6 +1020,7 @@ alloc_new_skb: cork->tx_flags = 0; skb_shinfo(skb)->tskey = tskey; tskey = 0; + skb_zcopy_set(skb, uarg); /* * Find where to start putting bytes. @@ -1068,7 +1083,7 @@ alloc_new_skb: err = -EFAULT; goto error; } - } else { + } else if (!uarg || !uarg->zerocopy) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; @@ -1098,6 +1113,10 @@ alloc_new_skb: skb->data_len += copy; skb->truesize += copy; wmem_alloc_delta += copy; + } else { + err = skb_zerocopy_iter_dgram(skb, from, copy); + if (err < 0) + goto error; } offset += copy; length -= copy; @@ -1105,11 +1124,13 @@ alloc_new_skb: if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); + sock_zerocopy_put(uarg); return 0; error_efault: err = -EFAULT; error: + sock_zerocopy_put_abort(uarg); cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 827a3f5ff3bb..7df04d20a91f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1245,6 +1245,7 @@ static int __ip6_append_data(struct sock *sk, { struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; + struct ubuf_info *uarg = NULL; int exthdrlen = 0; int dst_exthdrlen = 0; int hh_len; @@ -1322,6 +1323,19 @@ emsgsize: rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; + if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { + uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); + if (!uarg) + return -ENOBUFS; + if (rt->dst.dev->features & NETIF_F_SG && + csummode == CHECKSUM_PARTIAL) { + paged = true; + } else { + uarg->zerocopy = 0; + skb_zcopy_set(skb, uarg); + } + } + /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1445,6 +1459,7 @@ alloc_new_skb: cork->tx_flags = 0; skb_shinfo(skb)->tskey = tskey; tskey = 0; + skb_zcopy_set(skb, uarg); /* * Find where to start putting bytes @@ -1506,7 +1521,7 @@ alloc_new_skb: err = -EFAULT; goto error; } - } else { + } else if (!uarg || !uarg->zerocopy) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; @@ -1536,6 +1551,10 @@ alloc_new_skb: skb->data_len += copy; skb->truesize += copy; wmem_alloc_delta += copy; + } else { + err = skb_zerocopy_iter_dgram(skb, from, copy); + if (err < 0) + goto error; } offset += copy; length -= copy; @@ -1543,11 +1562,13 @@ alloc_new_skb: if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); + sock_zerocopy_put(uarg); return 0; error_efault: err = -EFAULT; error: + sock_zerocopy_put_abort(uarg); cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); -- cgit v1.2.3 From 52900d22288e7d45846037e1db277c665bbc40db Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 30 Nov 2018 15:32:40 -0500 Subject: udp: elide zerocopy operation in hot path With MSG_ZEROCOPY, each skb holds a reference to a struct ubuf_info. Release of its last reference triggers a completion notification. The TCP stack in tcp_sendmsg_locked holds an extra ref independent of the skbs, because it can build, send and free skbs within its loop, possibly reaching refcount zero and freeing the ubuf_info too soon. The UDP stack currently also takes this extra ref, but does not need it as all skbs are sent after return from __ip(6)_append_data. Avoid the extra refcount_inc and refcount_dec_and_test, and generally the sock_zerocopy_put in the common path, by passing the initial reference to the first skb. This approach is taken instead of initializing the refcount to 0, as that would generate error "refcount_t: increment on 0" on the next skb_zcopy_set. Changes v3 -> v4 - Move skb_zcopy_set below the only kfree_skb that might cause a premature uarg destroy before skb_zerocopy_put_abort - Move the entire skb_shinfo assignment block, to keep that cacheline access in one place Signed-off-by: Willem de Bruijn Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++---- net/core/skbuff.c | 9 +++++---- net/ipv4/ip_output.c | 22 +++++++++++----------- net/ipv4/tcp.c | 2 +- net/ipv6/ip6_output.c | 22 +++++++++++----------- 5 files changed, 36 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 04f52e719571..75d50ab7997c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -481,7 +481,7 @@ static inline void sock_zerocopy_get(struct ubuf_info *uarg) } void sock_zerocopy_put(struct ubuf_info *uarg); -void sock_zerocopy_put_abort(struct ubuf_info *uarg); +void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); @@ -1326,10 +1326,14 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) return is_zcopy ? skb_uarg(skb) : NULL; } -static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, + bool *have_ref) { if (skb && uarg && !skb_zcopy(skb)) { - sock_zerocopy_get(uarg); + if (unlikely(have_ref && *have_ref)) + *have_ref = false; + else + sock_zerocopy_get(uarg); skb_shinfo(skb)->destructor_arg = uarg; skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; } @@ -1374,7 +1378,7 @@ static inline void skb_zcopy_abort(struct sk_buff *skb) struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { - sock_zerocopy_put_abort(uarg); + sock_zerocopy_put_abort(uarg, false); skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1350901c5cb8..c78ce114537e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1089,7 +1089,7 @@ void sock_zerocopy_put(struct ubuf_info *uarg) } EXPORT_SYMBOL_GPL(sock_zerocopy_put); -void sock_zerocopy_put_abort(struct ubuf_info *uarg) +void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { struct sock *sk = skb_from_uarg(uarg)->sk; @@ -1097,7 +1097,8 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg) atomic_dec(&sk->sk_zckey); uarg->len--; - sock_zerocopy_put(uarg); + if (have_uref) + sock_zerocopy_put(uarg); } } EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); @@ -1137,7 +1138,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, return err; } - skb_zcopy_set(skb, uarg); + skb_zcopy_set(skb, uarg, NULL); return skb->len - orig_len; } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); @@ -1157,7 +1158,7 @@ static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, if (skb_copy_ubufs(nskb, GFP_ATOMIC)) return -EIO; } - skb_zcopy_set(nskb, skb_uarg(orig)); + skb_zcopy_set(nskb, skb_uarg(orig), NULL); } return 0; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6f843aff628c..78f028bdad30 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -881,8 +881,8 @@ static int __ip_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; unsigned int wmem_alloc_delta = 0; + bool paged, extra_uref; u32 tskey = 0; - bool paged; skb = skb_peek_tail(queue); @@ -921,12 +921,13 @@ static int __ip_append_data(struct sock *sk, uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; + extra_uref = true; if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; } else { uarg->zerocopy = 0; - skb_zcopy_set(skb, uarg); + skb_zcopy_set(skb, uarg, &extra_uref); } } @@ -1015,13 +1016,6 @@ alloc_new_skb: skb->csum = 0; skb_reserve(skb, hh_len); - /* only the initial fragment is time stamped */ - skb_shinfo(skb)->tx_flags = cork->tx_flags; - cork->tx_flags = 0; - skb_shinfo(skb)->tskey = tskey; - tskey = 0; - skb_zcopy_set(skb, uarg); - /* * Find where to start putting bytes. */ @@ -1054,6 +1048,13 @@ alloc_new_skb: exthdrlen = 0; csummode = CHECKSUM_NONE; + /* only the initial fragment is time stamped */ + skb_shinfo(skb)->tx_flags = cork->tx_flags; + cork->tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; + skb_zcopy_set(skb, uarg, &extra_uref); + if ((flags & MSG_CONFIRM) && !skb_prev) skb_set_dst_pending_confirm(skb, 1); @@ -1124,13 +1125,12 @@ alloc_new_skb: if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); - sock_zerocopy_put(uarg); return 0; error_efault: err = -EFAULT; error: - sock_zerocopy_put_abort(uarg); + sock_zerocopy_put_abort(uarg, extra_uref); cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 215e4d3b3616..dc68c408bba0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1423,7 +1423,7 @@ do_error: if (copied + copied_syn) goto out; out_err: - sock_zerocopy_put_abort(uarg); + sock_zerocopy_put_abort(uarg, true); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7df04d20a91f..ec8c235ea891 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1258,7 +1258,7 @@ static int __ip6_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; - bool paged; + bool paged, extra_uref; skb = skb_peek_tail(queue); if (!skb) { @@ -1327,12 +1327,13 @@ emsgsize: uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; + extra_uref = true; if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; } else { uarg->zerocopy = 0; - skb_zcopy_set(skb, uarg); + skb_zcopy_set(skb, uarg, &extra_uref); } } @@ -1454,13 +1455,6 @@ alloc_new_skb: skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); - /* Only the initial fragment is time stamped */ - skb_shinfo(skb)->tx_flags = cork->tx_flags; - cork->tx_flags = 0; - skb_shinfo(skb)->tskey = tskey; - tskey = 0; - skb_zcopy_set(skb, uarg); - /* * Find where to start putting bytes */ @@ -1492,6 +1486,13 @@ alloc_new_skb: exthdrlen = 0; dst_exthdrlen = 0; + /* Only the initial fragment is time stamped */ + skb_shinfo(skb)->tx_flags = cork->tx_flags; + cork->tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; + skb_zcopy_set(skb, uarg, &extra_uref); + if ((flags & MSG_CONFIRM) && !skb_prev) skb_set_dst_pending_confirm(skb, 1); @@ -1562,13 +1563,12 @@ alloc_new_skb: if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); - sock_zerocopy_put(uarg); return 0; error_efault: err = -EFAULT; error: - sock_zerocopy_put_abort(uarg); + sock_zerocopy_put_abort(uarg, extra_uref); cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); -- cgit v1.2.3 From 8c2def893afc60d88160d524acf345765cf0c447 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 3 Dec 2018 14:45:43 -0800 Subject: sbitmap: fix sbitmap_for_each_set() We need to ignore bits in the cleared mask when iterating over all set bits. Fixes: ea86ea2cdced ("sbitmap: ammortize cost of clearing bits") Reported-by: Jens Axboe@kernel.dk> Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 92806a2dbab7..03f50fcedc79 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -265,12 +265,14 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { - struct sbitmap_word *word = &sb->map[index]; - unsigned int depth = min_t(unsigned int, word->depth - nr, + unsigned long word; + unsigned int depth = min_t(unsigned int, + sb->map[index].depth - nr, sb->depth - scanned); scanned += depth; - if (!word->word) + word = sb->map[index].word & ~sb->map[index].cleared; + if (!word) goto next; /* @@ -280,7 +282,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, */ depth += nr; while (1) { - nr = find_next_bit(&word->word, depth, nr); + nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) -- cgit v1.2.3 From 7684bd334d9d4ca4f09873e88d9c0131a2cf6c3b Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Tue, 30 Oct 2018 15:52:34 +0800 Subject: pstore: Avoid duplicate call of persistent_ram_zap() When initialing a prz, if invalid data is found (no PERSISTENT_RAM_SIG), the function call path looks like this: ramoops_init_prz -> persistent_ram_new -> persistent_ram_post_init -> persistent_ram_zap persistent_ram_zap As we can see, persistent_ram_zap() is called twice. We can avoid this by adding an option to persistent_ram_new(), and only call persistent_ram_zap() when it is needed. Signed-off-by: Peng Wang [kees: minor tweak to exit path and commit log] Signed-off-by: Kees Cook --- fs/pstore/ram.c | 4 +--- fs/pstore/ram_core.c | 15 +++++++++------ include/linux/pstore_ram.h | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index e02a9039b5ea..768759841491 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -640,7 +640,7 @@ static int ramoops_init_prz(const char *name, label = kasprintf(GFP_KERNEL, "ramoops:%s", name); *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, - cxt->memtype, 0, label); + cxt->memtype, PRZ_FLAG_ZAP_OLD, label); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); @@ -649,8 +649,6 @@ static int ramoops_init_prz(const char *name, return err; } - persistent_ram_zap(*prz); - *paddr += sz; return 0; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 12e21f789194..23ca6f2c98a0 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -489,6 +489,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, struct persistent_ram_ecc_info *ecc_info) { int ret; + bool zap = !!(prz->flags & PRZ_FLAG_ZAP_OLD); ret = persistent_ram_init_ecc(prz, ecc_info); if (ret) @@ -498,23 +499,25 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, if (prz->buffer->sig == sig) { if (buffer_size(prz) > prz->buffer_size || - buffer_start(prz) > buffer_size(prz)) + buffer_start(prz) > buffer_size(prz)) { pr_info("found existing invalid buffer, size %zu, start %zu\n", buffer_size(prz), buffer_start(prz)); - else { + zap = true; + } else { pr_debug("found existing buffer, size %zu, start %zu\n", buffer_size(prz), buffer_start(prz)); persistent_ram_save_old(prz); - return 0; } } else { pr_debug("no valid data in buffer (sig = 0x%08x)\n", prz->buffer->sig); + prz->buffer->sig = sig; + zap = true; } - /* Rewind missing or invalid memory area. */ - prz->buffer->sig = sig; - persistent_ram_zap(prz); + /* Reset missing, invalid, or single-use memory area. */ + if (zap) + persistent_ram_zap(prz); return 0; } diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 602d64725222..6e94980357d2 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -30,6 +30,7 @@ * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. */ #define PRZ_FLAG_NO_LOCK BIT(0) +#define PRZ_FLAG_ZAP_OLD BIT(1) struct persistent_ram_buffer; struct rs_control; -- cgit v1.2.3 From c208f7d4b037e1c71e5c839bb5dfcc3e0df19890 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Nov 2018 15:11:47 -0700 Subject: pstore/ram: Add kern-doc for struct persistent_ram_zone The struct persistent_ram_zone wasn't well documented. This adds kern-doc for it. Signed-off-by: Kees Cook --- fs/pstore/ram_core.c | 10 ++++++++++ include/linux/pstore_ram.h | 46 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 62830734deee..3e9e3ba4fb07 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -29,6 +29,16 @@ #include #include +/** + * struct persistent_ram_buffer - persistent circular RAM buffer + * + * @sig: + * signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value) + * @start: + * offset into @data where the beginning of the stored bytes begin + * @size: + * number of valid bytes stored in @data + */ struct persistent_ram_buffer { uint32_t sig; atomic_t start; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 6e94980357d2..5d10ad51c1c4 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -30,6 +30,10 @@ * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. */ #define PRZ_FLAG_NO_LOCK BIT(0) +/* + * If a PRZ should only have a single-boot lifetime, this marks it as + * getting wiped after its contents get copied out after boot. + */ #define PRZ_FLAG_ZAP_OLD BIT(1) struct persistent_ram_buffer; @@ -43,17 +47,53 @@ struct persistent_ram_ecc_info { uint16_t *par; }; +/** + * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ) + * used as a pstore backend + * + * @paddr: physical address of the mapped RAM area + * @size: size of mapping + * @label: unique name of this PRZ + * @flags: holds PRZ_FLAGS_* bits + * + * @buffer_lock: + * locks access to @buffer "size" bytes and "start" offset + * @buffer: + * pointer to actual RAM area managed by this PRZ + * @buffer_size: + * bytes in @buffer->data (not including any trailing ECC bytes) + * + * @par_buffer: + * pointer into @buffer->data containing ECC bytes for @buffer->data + * @par_header: + * pointer into @buffer->data containing ECC bytes for @buffer header + * (i.e. all fields up to @data) + * @rs_decoder: + * RSLIB instance for doing ECC calculations + * @corrected_bytes: + * ECC corrected bytes accounting since boot + * @bad_blocks: + * ECC uncorrectable bytes accounting since boot + * @ecc_info: + * ECC configuration details + * + * @old_log: + * saved copy of @buffer->data prior to most recent wipe + * @old_log_size: + * bytes contained in @old_log + * + */ struct persistent_ram_zone { phys_addr_t paddr; size_t size; void *vaddr; char *label; - struct persistent_ram_buffer *buffer; - size_t buffer_size; u32 flags; + raw_spinlock_t buffer_lock; + struct persistent_ram_buffer *buffer; + size_t buffer_size; - /* ECC correction */ char *par_buffer; char *par_header; struct rs_control *rs_decoder; -- cgit v1.2.3 From 0eed84ffb094bbddfb4b9378ef0a2eccf4dda99c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Nov 2018 14:03:07 -0700 Subject: pstore: Improve and update some comments and status output This improves and updates some comments: - dump handler comment out of sync from calling convention - fix kern-doc typo and improves status output: - reminder that only kernel crash dumps are compressed - do not be silent about ECC infrastructure failures Signed-off-by: Kees Cook --- fs/pstore/platform.c | 7 +++---- fs/pstore/ram_core.c | 4 +++- include/linux/pstore.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index a75756c48e10..32340e7dd6a5 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -304,7 +304,7 @@ static void allocate_buf_for_compression(void) big_oops_buf_sz = size; big_oops_buf = buf; - pr_info("Using compression: %s\n", zbackend->name); + pr_info("Using crash dump compression: %s\n", zbackend->name); } static void free_buf_for_compression(void) @@ -354,9 +354,8 @@ void pstore_record_init(struct pstore_record *record, } /* - * callback from kmsg_dump. (s2,l2) has the most recently - * written bytes, older bytes are in (s1,l1). Save as much - * as we can from the end of the buffer. + * callback from kmsg_dump. Save as much as we can (up to kmsg_bytes) from the + * end of the buffer. */ static void pstore_dump(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 3e9e3ba4fb07..e6375439c5ac 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -503,8 +503,10 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, bool zap = !!(prz->flags & PRZ_FLAG_ZAP_OLD); ret = persistent_ram_init_ecc(prz, ecc_info); - if (ret) + if (ret) { + pr_warn("ECC failed %s\n", prz->label); return ret; + } sig ^= PERSISTENT_RAM_SIG; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 30fcec375a3a..81669aa80027 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -85,7 +85,7 @@ struct pstore_record { /** * struct pstore_info - backend pstore driver structure * - * @owner: module which is repsonsible for this backend driver + * @owner: module which is responsible for this backend driver * @name: name of the backend driver * * @buf_lock: spinlock to serialize access to @buf -- cgit v1.2.3 From 4af62a6423d0ad98e3eee2bec4305dde8deefefe Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Nov 2018 15:30:05 -0700 Subject: pstore: Replace open-coded << with BIT() Minor clean-up to use BIT() (as already done in pstore_ram.h). Signed-off-by: Kees Cook --- include/linux/pstore.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 81669aa80027..f46e5df76b58 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -192,10 +192,10 @@ struct pstore_info { }; /* Supported frontends */ -#define PSTORE_FLAGS_DMESG (1 << 0) -#define PSTORE_FLAGS_CONSOLE (1 << 1) -#define PSTORE_FLAGS_FTRACE (1 << 2) -#define PSTORE_FLAGS_PMSG (1 << 3) +#define PSTORE_FLAGS_DMESG BIT(0) +#define PSTORE_FLAGS_CONSOLE BIT(1) +#define PSTORE_FLAGS_FTRACE BIT(2) +#define PSTORE_FLAGS_PMSG BIT(3) extern int pstore_register(struct pstore_info *); extern void pstore_unregister(struct pstore_info *); -- cgit v1.2.3 From f0f23e5469dc80b482d985898a930be0e249a162 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 3 Nov 2018 16:38:16 -0700 Subject: pstore: Map PSTORE_TYPE_* to strings In later patches we will need to map types to names, so create a constant table for that which can also be used in different parts of old and new code. This saves the type in the PRZ which will be useful in later patches. Instead of having an explicit PSTORE_TYPE_UNKNOWN, just use ..._MAX. This includes removing the now redundant filename templates which can use a single format string. Also, there's no reason to limit the "is it still compressed?" test to only PSTORE_TYPE_DMESG when building the pstorefs filename. Records are zero-initialized, so a backend would need to have explicitly set compressed=1. Signed-off-by: Joel Fernandes (Google) Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- drivers/acpi/apei/erst.c | 2 +- fs/pstore/inode.c | 51 ++++------------------------------------------ fs/pstore/platform.c | 37 +++++++++++++++++++++++++++++++++ fs/pstore/ram.c | 4 +++- include/linux/pstore.h | 17 +++++++++++++--- include/linux/pstore_ram.h | 3 +++ 6 files changed, 62 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 3c5ea7cb693e..a5e1d963208e 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1035,7 +1035,7 @@ skip: CPER_SECTION_TYPE_MCE) == 0) record->type = PSTORE_TYPE_MCE; else - record->type = PSTORE_TYPE_UNKNOWN; + record->type = PSTORE_TYPE_MAX; if (rcd->hdr.validation_bits & CPER_VALID_TIMESTAMP) record->time.tv_sec = rcd->hdr.timestamp; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 8cf2218b46a7..c60ee46f3e39 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -335,53 +335,10 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) goto fail_alloc; private->record = record; - switch (record->type) { - case PSTORE_TYPE_DMESG: - scnprintf(name, sizeof(name), "dmesg-%s-%llu%s", - record->psi->name, record->id, - record->compressed ? ".enc.z" : ""); - break; - case PSTORE_TYPE_CONSOLE: - scnprintf(name, sizeof(name), "console-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_FTRACE: - scnprintf(name, sizeof(name), "ftrace-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_MCE: - scnprintf(name, sizeof(name), "mce-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_PPC_RTAS: - scnprintf(name, sizeof(name), "rtas-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_PPC_OF: - scnprintf(name, sizeof(name), "powerpc-ofw-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_PPC_COMMON: - scnprintf(name, sizeof(name), "powerpc-common-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_PMSG: - scnprintf(name, sizeof(name), "pmsg-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_PPC_OPAL: - scnprintf(name, sizeof(name), "powerpc-opal-%s-%llu", - record->psi->name, record->id); - break; - case PSTORE_TYPE_UNKNOWN: - scnprintf(name, sizeof(name), "unknown-%s-%llu", - record->psi->name, record->id); - break; - default: - scnprintf(name, sizeof(name), "type%d-%s-%llu", - record->type, record->psi->name, record->id); - break; - } + scnprintf(name, sizeof(name), "%s-%s-%llu%s", + pstore_type_to_name(record->type), + record->psi->name, record->id, + record->compressed ? ".enc.z" : ""); dentry = d_alloc_name(root, name); if (!dentry) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 32340e7dd6a5..2387cb74f729 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -59,6 +59,19 @@ MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content " "enabling this option is not safe, it may lead to further " "corruption on Oopses)"); +/* Names should be in the same order as the enum pstore_type_id */ +static const char * const pstore_type_names[] = { + "dmesg", + "mce", + "console", + "ftrace", + "rtas", + "powerpc-ofw", + "powerpc-common", + "pmsg", + "powerpc-opal", +}; + static int pstore_new_entry; static void pstore_timefunc(struct timer_list *); @@ -104,6 +117,30 @@ void pstore_set_kmsg_bytes(int bytes) /* Tag each group of saved records with a sequence number */ static int oopscount; +const char *pstore_type_to_name(enum pstore_type_id type) +{ + BUILD_BUG_ON(ARRAY_SIZE(pstore_type_names) != PSTORE_TYPE_MAX); + + if (WARN_ON_ONCE(type >= PSTORE_TYPE_MAX)) + return "unknown"; + + return pstore_type_names[type]; +} +EXPORT_SYMBOL_GPL(pstore_type_to_name); + +enum pstore_type_id pstore_name_to_type(const char *name) +{ + int i; + + for (i = 0; i < PSTORE_TYPE_MAX; i++) { + if (!strcmp(pstore_type_names[i], name)) + return i; + } + + return PSTORE_TYPE_MAX; +} +EXPORT_SYMBOL_GPL(pstore_name_to_type); + static const char *get_reason_str(enum kmsg_dump_reason reason) { switch (reason) { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 10ac4d23c423..b174d0fc009f 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -611,6 +611,7 @@ static int ramoops_init_przs(const char *name, goto fail; } *paddr += zone_sz; + prz_ar[i]->type = pstore_name_to_type(name); } *przs = prz_ar; @@ -650,6 +651,7 @@ static int ramoops_init_prz(const char *name, } *paddr += sz; + (*prz)->type = pstore_name_to_type(name); return 0; } @@ -785,7 +787,7 @@ static int ramoops_probe(struct platform_device *pdev) dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size - cxt->pmsg_size; - err = ramoops_init_przs("dump", dev, cxt, &cxt->dprzs, &paddr, + err = ramoops_init_przs("dmesg", dev, cxt, &cxt->dprzs, &paddr, dump_mem_sz, cxt->record_size, &cxt->max_dump_cnt, 0, 0); if (err) diff --git a/include/linux/pstore.h b/include/linux/pstore.h index f46e5df76b58..a9ec285d85d1 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -32,21 +32,32 @@ struct module; -/* pstore record types (see fs/pstore/inode.c for filename templates) */ +/* + * pstore record types (see fs/pstore/platform.c for pstore_type_names[]) + * These values may be written to storage (see EFI vars backend), so + * they are kind of an ABI. Be careful changing the mappings. + */ enum pstore_type_id { + /* Frontend storage types */ PSTORE_TYPE_DMESG = 0, PSTORE_TYPE_MCE = 1, PSTORE_TYPE_CONSOLE = 2, PSTORE_TYPE_FTRACE = 3, - /* PPC64 partition types */ + + /* PPC64-specific partition types */ PSTORE_TYPE_PPC_RTAS = 4, PSTORE_TYPE_PPC_OF = 5, PSTORE_TYPE_PPC_COMMON = 6, PSTORE_TYPE_PMSG = 7, PSTORE_TYPE_PPC_OPAL = 8, - PSTORE_TYPE_UNKNOWN = 255 + + /* End of the list */ + PSTORE_TYPE_MAX }; +const char *pstore_type_to_name(enum pstore_type_id type); +enum pstore_type_id pstore_name_to_type(const char *name); + struct pstore_info; /** * struct pstore_record - details of a pstore record entry diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 5d10ad51c1c4..337971c41980 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -22,6 +22,7 @@ #include #include #include +#include #include /* @@ -54,6 +55,7 @@ struct persistent_ram_ecc_info { * @paddr: physical address of the mapped RAM area * @size: size of mapping * @label: unique name of this PRZ + * @type: frontend type for this PRZ * @flags: holds PRZ_FLAGS_* bits * * @buffer_lock: @@ -88,6 +90,7 @@ struct persistent_ram_zone { size_t size; void *vaddr; char *label; + enum pstore_type_id type; u32 flags; raw_spinlock_t buffer_lock; -- cgit v1.2.3 From ea84b580b95521644429cc6748b6c2bf27c8b0f3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 30 Nov 2018 14:36:58 -0800 Subject: pstore: Convert buf_lock to semaphore Instead of running with interrupts disabled, use a semaphore. This should make it easier for backends that may need to sleep (e.g. EFI) when performing a write: |BUG: sleeping function called from invalid context at kernel/sched/completion.c:99 |in_atomic(): 1, irqs_disabled(): 1, pid: 2236, name: sig-xstate-bum |Preemption disabled at: |[] pstore_dump+0x72/0x330 |CPU: 26 PID: 2236 Comm: sig-xstate-bum Tainted: G D 4.20.0-rc3 #45 |Call Trace: | dump_stack+0x4f/0x6a | ___might_sleep.cold.91+0xd3/0xe4 | __might_sleep+0x50/0x90 | wait_for_completion+0x32/0x130 | virt_efi_query_variable_info+0x14e/0x160 | efi_query_variable_store+0x51/0x1a0 | efivar_entry_set_safe+0xa3/0x1b0 | efi_pstore_write+0x109/0x140 | pstore_dump+0x11c/0x330 | kmsg_dump+0xa4/0xd0 | oops_exit+0x22/0x30 ... Reported-by: Sebastian Andrzej Siewior Fixes: 21b3ddd39fee ("efi: Don't use spinlocks for efi vars") Signed-off-by: Kees Cook --- arch/powerpc/kernel/nvram_64.c | 2 -- drivers/acpi/apei/erst.c | 1 - drivers/firmware/efi/efi-pstore.c | 4 +--- fs/pstore/platform.c | 44 ++++++++++++++++++++------------------- fs/pstore/ram.c | 1 - include/linux/pstore.h | 7 +++---- 6 files changed, 27 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 22e9d281324d..e7d4ce6964ae 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -563,8 +563,6 @@ static int nvram_pstore_init(void) nvram_pstore_info.buf = oops_data; nvram_pstore_info.bufsize = oops_data_sz; - spin_lock_init(&nvram_pstore_info.buf_lock); - rc = pstore_register(&nvram_pstore_info); if (rc && (rc != -EPERM)) /* Print error only when pstore.backend == nvram */ diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index a5e1d963208e..9953e50667ec 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1176,7 +1176,6 @@ static int __init erst_init(void) "Error Record Serialization Table (ERST) support is initialized.\n"); buf = kmalloc(erst_erange.size, GFP_KERNEL); - spin_lock_init(&erst_info.buf_lock); if (buf) { erst_info.buf = buf + sizeof(struct cper_pstore_record); erst_info.bufsize = erst_erange.size - diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index cfe87b465819..0f7d97917197 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -259,8 +259,7 @@ static int efi_pstore_write(struct pstore_record *record) efi_name[i] = name[i]; ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, - !pstore_cannot_block_path(record->reason), - record->size, record->psi->buf); + preemptible(), record->size, record->psi->buf); if (record->reason == KMSG_DUMP_OOPS) efivar_run_worker(); @@ -369,7 +368,6 @@ static __init int efivars_pstore_init(void) return -ENOMEM; efi_pstore_info.bufsize = 1024; - spin_lock_init(&efi_pstore_info.buf_lock); if (pstore_register(&efi_pstore_info)) { kfree(efi_pstore_info.buf); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 2387cb74f729..2d1066ed3c28 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -161,26 +161,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason) } } -bool pstore_cannot_block_path(enum kmsg_dump_reason reason) +/* + * Should pstore_dump() wait for a concurrent pstore_dump()? If + * not, the current pstore_dump() will report a failure to dump + * and return. + */ +static bool pstore_cannot_wait(enum kmsg_dump_reason reason) { - /* - * In case of NMI path, pstore shouldn't be blocked - * regardless of reason. - */ + /* In NMI path, pstore shouldn't block regardless of reason. */ if (in_nmi()) return true; switch (reason) { /* In panic case, other cpus are stopped by smp_send_stop(). */ case KMSG_DUMP_PANIC: - /* Emergency restart shouldn't be blocked by spin lock. */ + /* Emergency restart shouldn't be blocked. */ case KMSG_DUMP_EMERG: return true; default: return false; } } -EXPORT_SYMBOL_GPL(pstore_cannot_block_path); #if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS) static int zbufsize_deflate(size_t size) @@ -400,23 +401,23 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long total = 0; const char *why; unsigned int part = 1; - unsigned long flags = 0; - int is_locked; int ret; why = get_reason_str(reason); - if (pstore_cannot_block_path(reason)) { - is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags); - if (!is_locked) { - pr_err("pstore dump routine blocked in %s path, may corrupt error record\n" - , in_nmi() ? "NMI" : why); + if (down_trylock(&psinfo->buf_lock)) { + /* Failed to acquire lock: give up if we cannot wait. */ + if (pstore_cannot_wait(reason)) { + pr_err("dump skipped in %s path: may corrupt error record\n", + in_nmi() ? "NMI" : why); + return; + } + if (down_interruptible(&psinfo->buf_lock)) { + pr_err("could not grab semaphore?!\n"); return; } - } else { - spin_lock_irqsave(&psinfo->buf_lock, flags); - is_locked = 1; } + oopscount++; while (total < kmsg_bytes) { char *dst; @@ -433,7 +434,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, record.part = part; record.buf = psinfo->buf; - if (big_oops_buf && is_locked) { + if (big_oops_buf) { dst = big_oops_buf; dst_size = big_oops_buf_sz; } else { @@ -451,7 +452,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, dst_size, &dump_size)) break; - if (big_oops_buf && is_locked) { + if (big_oops_buf) { zipped_len = pstore_compress(dst, psinfo->buf, header_size + dump_size, psinfo->bufsize); @@ -474,8 +475,8 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += record.size; part++; } - if (is_locked) - spin_unlock_irqrestore(&psinfo->buf_lock, flags); + + up(&psinfo->buf_lock); } static struct kmsg_dumper pstore_dumper = { @@ -594,6 +595,7 @@ int pstore_register(struct pstore_info *psi) psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); + sema_init(&psinfo->buf_lock, 1); spin_unlock(&pstore_lock); if (owner && !try_module_get(owner)) { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 202eaa82bcc6..e6d9560ea455 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -815,7 +815,6 @@ static int ramoops_probe(struct platform_device *pdev) err = -ENOMEM; goto fail_clear; } - spin_lock_init(&cxt->pstore.buf_lock); cxt->pstore.flags = PSTORE_FLAGS_DMESG; if (cxt->console_size) diff --git a/include/linux/pstore.h b/include/linux/pstore.h index a9ec285d85d1..b146181e8709 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -99,7 +99,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: spinlock to serialize access to @buf + * @buf_lock: semaphore to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -184,7 +184,7 @@ struct pstore_info { struct module *owner; char *name; - spinlock_t buf_lock; + struct semaphore buf_lock; char *buf; size_t bufsize; @@ -210,7 +210,6 @@ struct pstore_info { extern int pstore_register(struct pstore_info *); extern void pstore_unregister(struct pstore_info *); -extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason); struct pstore_ftrace_record { unsigned long ip; -- cgit v1.2.3 From 96f1e097457506f215adfe3c47aacc15a88f6dd7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Dec 2018 23:16:07 -0500 Subject: jbd2: avoid long hold times of j_state_lock while committing a transaction We can hold j_state_lock for writing at the beginning of jbd2_journal_commit_transaction() for a rather long time (reportedly for 30 ms) due cleaning revoke bits of all revoked buffers under it. The handling of revoke tables as well as cleaning of t_reserved_list, and checkpoint lists does not need j_state_lock for anything. It is only needed to prevent new handles from joining the transaction. Generally T_LOCKED transaction state prevents new handles from joining the transaction - except for reserved handles which have to allowed to join while we wait for other handles to complete. To prevent reserved handles from joining the transaction while cleaning up lists, add new transaction state T_SWITCH and watch for it when starting reserved handles. With this we can just drop the lock for operations that don't need it. Reported-and-tested-by: Adrian Hunter Suggested-by: "Theodore Y. Ts'o" Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 3 +++ fs/jbd2/transaction.c | 43 ++++++++++++++++++++++++++++++++++++++----- include/linux/jbd2.h | 1 + 3 files changed, 42 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 150cc030b4d7..2eb55c3361a8 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -439,6 +439,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) finish_wait(&journal->j_wait_updates, &wait); } spin_unlock(&commit_transaction->t_handle_lock); + commit_transaction->t_state = T_SWITCH; + write_unlock(&journal->j_state_lock); J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= journal->j_max_transaction_buffers); @@ -505,6 +507,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) atomic_sub(atomic_read(&journal->j_reserved_credits), &commit_transaction->t_outstanding_credits); + write_lock(&journal->j_state_lock); trace_jbd2_commit_flushing(journal, commit_transaction); stats.run.rs_flushing = jiffies; stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index c0b66a7a795b..116d8251fbff 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -138,9 +138,9 @@ static inline void update_t_max_wait(transaction_t *transaction, } /* - * Wait until running transaction passes T_LOCKED state. Also starts the commit - * if needed. The function expects running transaction to exist and releases - * j_state_lock. + * Wait until running transaction passes to T_FLUSH state and new transaction + * can thus be started. Also starts the commit if needed. The function expects + * running transaction to exist and releases j_state_lock. */ static void wait_transaction_locked(journal_t *journal) __releases(journal->j_state_lock) @@ -160,6 +160,32 @@ static void wait_transaction_locked(journal_t *journal) finish_wait(&journal->j_wait_transaction_locked, &wait); } +/* + * Wait until running transaction transitions from T_SWITCH to T_FLUSH + * state and new transaction can thus be started. The function releases + * j_state_lock. + */ +static void wait_transaction_switching(journal_t *journal) + __releases(journal->j_state_lock) +{ + DEFINE_WAIT(wait); + + if (WARN_ON(!journal->j_running_transaction || + journal->j_running_transaction->t_state != T_SWITCH)) + return; + prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + TASK_UNINTERRUPTIBLE); + read_unlock(&journal->j_state_lock); + /* + * We don't call jbd2_might_wait_for_commit() here as there's no + * waiting for outstanding handles happening anymore in T_SWITCH state + * and handling of reserved handles actually relies on that for + * correctness. + */ + schedule(); + finish_wait(&journal->j_wait_transaction_locked, &wait); +} + static void sub_reserved_credits(journal_t *journal, int blocks) { atomic_sub(blocks, &journal->j_reserved_credits); @@ -183,7 +209,8 @@ static int add_transaction_credits(journal_t *journal, int blocks, * If the current transaction is locked down for commit, wait * for the lock to be released. */ - if (t->t_state == T_LOCKED) { + if (t->t_state != T_RUNNING) { + WARN_ON_ONCE(t->t_state >= T_FLUSH); wait_transaction_locked(journal); return 1; } @@ -360,8 +387,14 @@ repeat: /* * We have handle reserved so we are allowed to join T_LOCKED * transaction and we don't have to check for transaction size - * and journal space. + * and journal space. But we still have to wait while running + * transaction is being switched to a committing one as it + * won't wait for any handles anymore. */ + if (transaction->t_state == T_SWITCH) { + wait_transaction_switching(journal); + goto repeat; + } sub_reserved_credits(journal, blocks); handle->h_reserved = 0; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b708e5169d1d..118d00a64184 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -575,6 +575,7 @@ struct transaction_s enum { T_RUNNING, T_LOCKED, + T_SWITCH, T_FLUSH, T_COMMIT, T_COMMIT_DFLUSH, -- cgit v1.2.3 From 32ea275008d8c76fa3f40d10d0ffc694a214dfef Mon Sep 17 00:00:00 2001 From: Alexander Lochmann Date: Tue, 4 Dec 2018 00:30:22 -0500 Subject: jbd2: update locking documentation for transaction_t The following members of struct transaction_s aka transaction_t were turned into lock-free variables in the past: - t_updates - t_outstanding_credits - t_handle_count However, the documentation has not been updated yet. This commit replaced the annotated lock by [none]. Found by LockDoc (Alexander Lochmann, Horst Schirmeier and Olaf Spinczyk) Signed-off-by: Alexander Lochmann Signed-off-by: Horst Schirmeier Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 118d00a64184..0f919d5fe84f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -663,13 +663,13 @@ struct transaction_s /* * Number of outstanding updates running on this transaction - * [t_handle_lock] + * [none] */ atomic_t t_updates; /* * Number of buffers reserved for use by all handles in this transaction - * handle but not yet modified. [t_handle_lock] + * handle but not yet modified. [none] */ atomic_t t_outstanding_credits; @@ -691,7 +691,7 @@ struct transaction_s ktime_t t_start_time; /* - * How many handles used this transaction? [t_handle_lock] + * How many handles used this transaction? [none] */ atomic_t t_handle_count; -- cgit v1.2.3 From 6cd0014ab90f6959fa1f8cc8b3f38d302457c919 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:33 +0200 Subject: net/mlx5: Align SRQ licenses and copyright information Ensure that both RDMA and netdev parts of SRQ implementation has same copyright and license information annotated by SPDX tags. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/srq.c | 31 ++------------------------- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 31 ++------------------------- include/linux/mlx5/srq.h | 31 ++------------------------- 3 files changed, 6 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index d012e7dbcc38..28794780062e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ #include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 79c5f0d57956..10036aaa200a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ #include diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 1b1f3c20c6a3..77bc4264066d 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. */ #ifndef MLX5_SRQ_H -- cgit v1.2.3 From 5b5f0f16276021794038f12adc56df70cec42b4f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:34 +0200 Subject: net/mlx5: Remove dead transobj code Delete functions which are not called and not needed. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 66 ---------------------- include/linux/mlx5/transobj.h | 5 -- 2 files changed, 71 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a1ee9a8a769e..ab482124e901 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -301,72 +301,6 @@ int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, rmpn); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - - return err; -} - -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *xsrqn) -{ - u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; - int err; - - MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); - - return err; -} - -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) -{ - u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); - MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, in, op_mod, - MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 7f5ca2cd3a32..39ebb699875b 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -63,11 +63,6 @@ int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn); -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn); -- cgit v1.2.3 From f02d0d6e53ac2c8a75b6cc87dc86675a9351d84d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:37 +0200 Subject: net/mlx5: Move SRQ functions to RDMA part There is no need to keep SRQ which is RDMA object in mlx5_core. In this patch, we partially move the execution code, while next patches will move table initialization/release logic too. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/Makefile | 4 +- drivers/infiniband/hw/mlx5/cq.c | 1 + drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/infiniband/hw/mlx5/srq.h | 46 ++ drivers/infiniband/hw/mlx5/srq_cmd.c | 666 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/srq.c | 634 -------------------- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 43 -- include/linux/mlx5/driver.h | 8 - include/linux/mlx5/srq.h | 31 - include/linux/mlx5/transobj.h | 6 - 10 files changed, 717 insertions(+), 724 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/srq.h create mode 100644 drivers/infiniband/hw/mlx5/srq_cmd.c (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b8e4b15e2674..33f5adb14e4e 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,6 +1,8 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ + srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ + cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7d769b5538b4..c5d2824ada59 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,6 +35,7 @@ #include #include #include "mlx5_ib.h" +#include "srq.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 28794780062e..a86d9f153805 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -9,8 +9,8 @@ #include #include #include - #include "mlx5_ib.h" +#include "srq.h" /* not supported currently */ static int srq_signature; diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h new file mode 100644 index 000000000000..f23d5de12973 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. + */ + +#ifndef MLX5_IB_SRQ_H +#define MLX5_IB_SRQ_H + +enum { + MLX5_SRQ_FLAG_ERR = (1 << 0), + MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), + MLX5_SRQ_FLAG_RNDV = (1 << 2), +}; + +struct mlx5_srq_attr { + u32 type; + u32 flags; + u32 log_size; + u32 wqe_shift; + u32 log_page_size; + u32 wqe_cnt; + u32 srqn; + u32 xrcd; + u32 page_offset; + u32 cqn; + u32 pd; + u32 lwm; + u32 user_index; + u64 db_record; + __be64 *pas; + u32 tm_log_list_size; + u32 tm_next_tag; + u32 tm_hw_phase_cnt; + u32 tm_sw_phase_cnt; + u16 uid; +}; + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in); +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out); +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); +#endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c new file mode 100644 index 000000000000..4a64ad4c9b25 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -0,0 +1,666 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include "srq.h" + +static int get_pas_size(struct mlx5_srq_attr *in) +{ + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); + + return rq_num_pas * sizeof(u64); +} + +static void set_wq(void *wq, struct mlx5_srq_attr *in) +{ + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); +} + +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} + +static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_srq_in, create_in, uid, in->uid); + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); + + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); + + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); + + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); +} + +static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + + MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); +} + +static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; + + srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); + if (!srq_out) + return -ENOMEM; + + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; +} + +static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); + memcpy(pas, in->pas, pas_size); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *xrc_srqc; + int err; + + xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(xrcsrq_out); + return err; +} + +static int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn) +{ + u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = { 0 }; + int err; + + MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rmpn = MLX5_GET(create_rmp_out, out, rmpn); + + return err; +} + +static int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; + + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +static int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; + int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + + MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, in, rmpn, rmpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + void *create_in; + void *rmpc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + + err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + if (!err) + srq->uid = in->uid; + + kvfree(create_in); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + return err; +} + +static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 *rmp_out; + void *rmpc; + int err; + + rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); + if (!rmp_out) + return -ENOMEM; + + err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); + if (err) + goto out; + + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(rmp_out); + return err; +} + +static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + +static int create_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + if (!dev->issi) + return create_srq_cmd(dev, srq, in); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return create_xrc_srq_cmd(dev, srq, in); + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: + return create_rmp_cmd(dev, srq, in); + } +} + +static int destroy_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + if (!dev->issi) + return destroy_srq_cmd(dev, srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return destroy_xrc_srq_cmd(dev, srq); + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: + return destroy_rmp_cmd(dev, srq); + } +} + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + int err; + struct mlx5_srq_table *table = &dev->priv.srq_table; + + switch (in->type) { + case IB_SRQT_XRC: + srq->common.res = MLX5_RES_XSRQ; + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: + srq->common.res = MLX5_RES_SRQ; + } + + err = create_srq_split(dev, srq, in); + if (err) + return err; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) + goto err_destroy_srq_split; + + return 0; + +err_destroy_srq_split: + destroy_srq_split(dev, srq); + + return err; +} + +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp || tmp != srq) + return -EINVAL; + + err = destroy_srq_split(dev, srq); + if (err) + return err; + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} + +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + if (!dev->issi) + return query_srq_cmd(dev, srq, out); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return query_xrc_srq_cmd(dev, srq, out); + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: + return query_rmp_cmd(dev, srq, out); + } +} + +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + if (!dev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return arm_xrc_srq_cmd(dev, srq, lwm); + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: + return arm_rmp_cmd(dev, srq, lwm); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 690815234838..0e80ddbe2510 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -4,12 +4,8 @@ */ #include -#include #include -#include #include -#include -#include static int srq_event_notifier(struct notifier_block *nb, unsigned long type, void *data) @@ -47,636 +43,6 @@ static int srq_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static int get_pas_size(struct mlx5_srq_attr *in) -{ - u32 log_page_size = in->log_page_size + 12; - u32 log_srq_size = in->log_size; - u32 log_rq_stride = in->wqe_shift; - u32 page_offset = in->page_offset; - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); - - return rq_num_pas * sizeof(u64); -} - -static void set_wq(void *wq, struct mlx5_srq_attr *in) -{ - MLX5_SET(wq, wq, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); - MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); - MLX5_SET(wq, wq, log_wq_sz, in->log_size); - MLX5_SET(wq, wq, page_offset, in->page_offset); - MLX5_SET(wq, wq, lwm, in->lwm); - MLX5_SET(wq, wq, pd, in->pd); - MLX5_SET64(wq, wq, dbr_addr, in->db_record); -} - -static void set_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - MLX5_SET(srqc, srqc, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); - MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); - MLX5_SET(srqc, srqc, log_srq_size, in->log_size); - MLX5_SET(srqc, srqc, page_offset, in->page_offset); - MLX5_SET(srqc, srqc, lwm, in->lwm); - MLX5_SET(srqc, srqc, pd, in->pd); - MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); - MLX5_SET(srqc, srqc, xrcd, in->xrcd); - MLX5_SET(srqc, srqc, cqn, in->cqn); -} - -static void get_wq(void *wq, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(wq, wq, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); - in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; - in->log_size = MLX5_GET(wq, wq, log_wq_sz); - in->page_offset = MLX5_GET(wq, wq, page_offset); - in->lwm = MLX5_GET(wq, wq, lwm); - in->pd = MLX5_GET(wq, wq, pd); - in->db_record = MLX5_GET64(wq, wq, dbr_addr); -} - -static void get_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(srqc, srqc, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); - in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); - in->log_size = MLX5_GET(srqc, srqc, log_srq_size); - in->page_offset = MLX5_GET(srqc, srqc, page_offset); - in->lwm = MLX5_GET(srqc, srqc, lwm); - in->pd = MLX5_GET(srqc, srqc, pd); - in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); -} - -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - return srq; -} -EXPORT_SYMBOL(mlx5_core_get_srq); - -static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; - void *create_in; - void *srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_srq_in, create_in, uid, in->uid); - srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); - pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - - set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); - - MLX5_SET(create_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_SRQ); - - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - - MLX5_SET(destroy_srq_in, srq_in, opcode, - MLX5_CMD_OP_DESTROY_SRQ); - MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - - MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); - MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, srq_in, lwm, lwm); - MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; - u32 *srq_out; - void *srqc; - int err; - - srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); - if (!srq_out) - return -ENOMEM; - - MLX5_SET(query_srq_in, srq_in, opcode, - MLX5_CMD_OP_QUERY_SRQ); - MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); - if (err) - goto out; - - srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); - get_srqc(srqc, out); - if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; -out: - kvfree(srq_out); - return err; -} - -static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; - void *create_in; - void *xrc_srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); - xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, - xrc_srq_context_entry); - pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - - set_srqc(xrc_srqc, in); - MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); - MLX5_SET(create_xrc_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_XRC_SRQ); - - memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - if (err) - goto out; - - srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); - srq->uid = in->uid; -out: - kvfree(create_in); - return err; -} - -static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, u16 lwm) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; - u32 *xrcsrq_out; - void *xrc_srqc; - int err; - - xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); - if (!xrcsrq_out) - return -ENOMEM; - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - - MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - - err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); - if (err) - goto out; - - xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, - xrc_srq_context_entry); - get_srqc(xrc_srqc, out); - if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(xrcsrq_out); - return err; -} - -static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - void *create_in; - void *rmpc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - MLX5_SET(create_rmp_in, create_in, uid, in->uid); - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - - err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); - if (!err) - srq->uid = in->uid; - - kvfree(create_in); - return err; -} - -static int destroy_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(modify_rmp_in, in, uid, srq->uid); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - return err; -} - -static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 *rmp_out; - void *rmpc; - int err; - - rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); - if (!rmp_out) - return -ENOMEM; - - err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); - if (err) - goto out; - - rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); - if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(rmp_out); - return err; -} - -static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; - void *create_in; - void *xrqc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); - wq = MLX5_ADDR_OF(xrqc, xrqc, wq); - - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); - - if (in->type == IB_SRQT_TM) { - MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); - if (in->flags & MLX5_SRQ_FLAG_RNDV) - MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); - MLX5_SET(xrqc, xrqc, - tag_matching_topology_context.log_matching_list_sz, - in->tm_log_list_size); - } - MLX5_SET(xrqc, xrqc, user_index, in->user_index); - MLX5_SET(xrqc, xrqc, cqn, in->cqn); - MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); - MLX5_SET(create_xrq_in, create_in, uid, in->uid); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; - - MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); - MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); - MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_xrq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - - MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); - MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, in, lwm, lwm); - MLX5_SET(arm_rq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; - u32 *xrq_out; - int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); - void *xrqc; - int err; - - xrq_out = kvzalloc(outlen, GFP_KERNEL); - if (!xrq_out) - return -ENOMEM; - - MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); - MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); - if (err) - goto out; - - xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); - get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); - if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - out->tm_next_tag = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.append_next_index); - out->tm_hw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.hw_phase_cnt); - out->tm_sw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.sw_phase_cnt); - -out: - kvfree(xrq_out); - return err; -} - -static int create_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - if (!dev->issi) - return create_srq_cmd(dev, srq, in); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return create_xrc_srq_cmd(dev, srq, in); - case MLX5_RES_XRQ: - return create_xrq_cmd(dev, srq, in); - default: - return create_rmp_cmd(dev, srq, in); - } -} - -static int destroy_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - if (!dev->issi) - return destroy_srq_cmd(dev, srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return destroy_xrc_srq_cmd(dev, srq); - case MLX5_RES_XRQ: - return destroy_xrq_cmd(dev, srq); - default: - return destroy_rmp_cmd(dev, srq); - } -} - -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - int err; - struct mlx5_srq_table *table = &dev->priv.srq_table; - - switch (in->type) { - case IB_SRQT_XRC: - srq->common.res = MLX5_RES_XSRQ; - break; - case IB_SRQT_TM: - srq->common.res = MLX5_RES_XRQ; - break; - default: - srq->common.res = MLX5_RES_SRQ; - } - - err = create_srq_split(dev, srq, in); - if (err) - return err; - - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); - if (err) - goto err_destroy_srq_split; - - return 0; - -err_destroy_srq_split: - destroy_srq_split(dev, srq); - - return err; -} -EXPORT_SYMBOL(mlx5_core_create_srq); - -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *tmp; - int err; - - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); - if (!tmp || tmp != srq) - return -EINVAL; - - err = destroy_srq_split(dev, srq); - if (err) - return err; - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); - - return 0; -} -EXPORT_SYMBOL(mlx5_core_destroy_srq); - -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - if (!dev->issi) - return query_srq_cmd(dev, srq, out); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return query_xrc_srq_cmd(dev, srq, out); - case MLX5_RES_XRQ: - return query_xrq_cmd(dev, srq, out); - default: - return query_rmp_cmd(dev, srq, out); - } -} -EXPORT_SYMBOL(mlx5_core_query_srq); - -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - if (!dev->issi) - return arm_srq_cmd(dev, srq, lwm, is_srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return arm_xrc_srq_cmd(dev, srq, lwm); - case MLX5_RES_XRQ: - return arm_xrq_cmd(dev, srq, lwm); - default: - return arm_rmp_cmd(dev, srq, lwm); - } -} -EXPORT_SYMBOL(mlx5_core_arm_srq); - void mlx5_init_srq_table(struct mlx5_core_dev *dev) { struct mlx5_srq_table *table = &dev->priv.srq_table; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index ab482124e901..c4d4b76096dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -258,49 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) } EXPORT_SYMBOL(mlx5_core_destroy_tis); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn) -{ - u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; - int err; - - MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *rmpn = MLX5_GET(create_rmp_out, out, rmpn); - - return err; -} - -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) -{ - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; - - MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); -} - -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, - sizeof(out)); -} - -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) -{ - u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; - int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - - MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); - MLX5_SET(query_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27a481b159ed..1096da4fb368 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -904,13 +904,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in); -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out); -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq); void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -942,7 +935,6 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 77bc4264066d..9343306cd188 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -6,37 +6,6 @@ #ifndef MLX5_SRQ_H #define MLX5_SRQ_H -#include - -enum { - MLX5_SRQ_FLAG_ERR = (1 << 0), - MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), - MLX5_SRQ_FLAG_RNDV = (1 << 2), -}; - -struct mlx5_srq_attr { - u32 type; - u32 flags; - u32 log_size; - u32 wqe_shift; - u32 log_page_size; - u32 wqe_cnt; - u32 srqn; - u32 xrcd; - u32 page_offset; - u32 cqn; - u32 pd; - u32 lwm; - u32 user_index; - u64 db_record; - __be64 *pas; - u32 tm_log_list_size; - u32 tm_next_tag; - u32 tm_hw_phase_cnt; - u32 tm_sw_phase_cnt; - u16 uid; -}; - struct mlx5_core_dev; void mlx5_init_srq_table(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 39ebb699875b..a261d5528ff7 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -58,12 +58,6 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, int inlen); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn); -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, -- cgit v1.2.3 From f3da6577da67a3cd44610ca54e308c6838c92157 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Nov 2018 20:53:41 +0200 Subject: RDMA/mlx5: Initialize SRQ tables on mlx5_ib Transfer initialization and cleanup from mlx5_priv struct of mlx5_core_dev to be part of mlx5_ib_dev. This completes removal of SRQ from mlx5_core. Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ib_rep.c | 4 ++ drivers/infiniband/hw/mlx5/main.c | 7 +++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +- drivers/infiniband/hw/mlx5/srq.c | 1 - drivers/infiniband/hw/mlx5/srq.h | 25 ++++++++ drivers/infiniband/hw/mlx5/srq_cmd.c | 72 ++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 -- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 63 --------------------- include/linux/mlx5/driver.h | 25 -------- include/linux/mlx5/srq.h | 14 ----- 11 files changed, 101 insertions(+), 122 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/srq.c delete mode 100644 include/linux/mlx5/srq.h (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 584ff2ea7810..8a682d86d634 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -4,6 +4,7 @@ */ #include "ib_rep.h" +#include "srq.h" static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, @@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 56472fa3e18b..96515a8c9d2c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -60,6 +60,7 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "srq.h" #include #include #include @@ -6308,6 +6309,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_roce_init, mlx5_ib_stage_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), @@ -6365,6 +6369,9 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 332d5c4d8ab3..861b68f2e330 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -50,6 +49,8 @@ #include #include +#include "srq.h" + #define mlx5_ib_dbg(_dev, format, arg...) \ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) @@ -774,6 +775,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, + MLX5_IB_STAGE_SRQ, MLX5_IB_STAGE_DEVICE_RESOURCES, MLX5_IB_STAGE_DEVICE_NOTIFIER, MLX5_IB_STAGE_ODP, @@ -942,6 +944,7 @@ struct mlx5_ib_dev { u64 sys_image_guid; struct mlx5_memic memic; u16 devx_whitelist_uid; + struct mlx5_srq_table srq_table; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 2b184c7f531a..91dcd3918d96 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 1110aeaa775e..75eb5839ae95 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -37,6 +37,28 @@ struct mlx5_srq_attr { struct mlx5_ib_dev; +struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ + u32 srqn; + int max; + size_t max_gs; + size_t max_avail_gather; + int wqe_shift; + void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); + + atomic_t refcount; + struct completion free; + u16 uid; +}; + +struct mlx5_srq_table { + struct notifier_block nb; + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); @@ -45,4 +67,7 @@ int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn); + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev); #endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index fdb9443f49f0..6be89c6be40f 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -80,12 +80,9 @@ static void get_srqc(void *srqc, struct mlx5_srq_attr *in) struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) { - struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_srq_table *table; + struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *srq; - table = &mdev->priv.srq_table; - spin_lock(&table->lock); srq = radix_tree_lookup(&table->tree, srqn); @@ -576,12 +573,9 @@ static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { - struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_srq_table *table; + struct mlx5_srq_table *table = &dev->srq_table; int err; - table = &mdev->priv.srq_table; - switch (in->type) { case IB_SRQT_XRC: srq->common.res = MLX5_RES_XSRQ; @@ -616,13 +610,10 @@ err_destroy_srq_split: int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_srq_table *table; + struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; int err; - table = &mdev->priv.srq_table; - spin_lock_irq(&table->lock); tmp = radix_tree_delete(&table->tree, srq->srqn); spin_unlock_irq(&table->lock); @@ -669,3 +660,60 @@ int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, return arm_rmp_cmd(dev, srq, lwm); } } + +static int srq_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + struct mlx5_core_srq *srq; + struct mlx5_eqe *eqe; + u32 srqn; + + if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && + type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) + return NOTIFY_DONE; + + table = container_of(nb, struct mlx5_srq_table, nb); + + eqe = data; + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) + return NOTIFY_OK; + + srq->event(srq, eqe->type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + + return NOTIFY_OK; +} + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + memset(table, 0, sizeof(*table)); + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + + table->nb.notifier_call = srq_event_notifier; + mlx5_notifier_register(dev->mdev, &table->nb); + + return 0; +} + +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + mlx5_notifier_unregister(dev->mdev, &table->nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26afe0779a0c..d499b3d00348 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5 core basic # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4bc27a073dc4..778995573812 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -749,8 +748,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); mlx5_init_reserved_gids(dev); @@ -804,7 +801,6 @@ err_rl_cleanup: err_tables_cleanup: mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); err_events_cleanup: @@ -828,7 +824,6 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c deleted file mode 100644 index 0e80ddbe2510..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* - * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. - */ - -#include -#include -#include - -static int srq_event_notifier(struct notifier_block *nb, - unsigned long type, void *data) -{ - struct mlx5_srq_table *table; - struct mlx5_core_srq *srq; - struct mlx5_eqe *eqe; - u32 srqn; - - if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && - type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) - return NOTIFY_DONE; - - table = container_of(nb, struct mlx5_srq_table, nb); - - eqe = data; - srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - if (!srq) - return NOTIFY_OK; - - srq->event(srq, eqe->type); - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - - return NOTIFY_OK; -} - -void mlx5_init_srq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - - memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); - - table->nb.notifier_call = srq_event_notifier; - mlx5_notifier_register(dev, &table->nb); -} - -void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - - mlx5_notifier_unregister(dev, &table->nb); -} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1096da4fb368..584d8a5df7eb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -50,7 +50,6 @@ #include #include -#include #include #include #include @@ -393,20 +392,6 @@ struct mlx5_core_rsc_common { struct completion free; }; -struct mlx5_core_srq { - struct mlx5_core_rsc_common common; /* must be first */ - u32 srqn; - int max; - size_t max_gs; - size_t max_avail_gather; - int wqe_shift; - void (*event) (struct mlx5_core_srq *, enum mlx5_event); - - atomic_t refcount; - struct completion free; - u16 uid; -}; - struct mlx5_uars_page { void __iomem *map; bool wc; @@ -464,14 +449,6 @@ struct mlx5_qp_table { struct radix_tree_root tree; }; -struct mlx5_srq_table { - struct notifier_block nb; - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; -}; - struct mlx5_mkey_table { /* protect radix tree */ @@ -547,8 +524,6 @@ struct mlx5_priv { struct mlx5_core_health health; - struct mlx5_srq_table srq_table; - /* start: qp staff */ struct mlx5_qp_table qp_table; struct dentry *qp_debugfs; diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h deleted file mode 100644 index 9343306cd188..000000000000 --- a/include/linux/mlx5/srq.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* - * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. - */ - -#ifndef MLX5_SRQ_H -#define MLX5_SRQ_H - -struct mlx5_core_dev; - -void mlx5_init_srq_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev); - -#endif /* MLX5_SRQ_H */ -- cgit v1.2.3 From 9d43faac02e3a4a26171f96f4de69fa650d3b6f6 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 26 Nov 2018 08:28:32 +0200 Subject: net/mlx5: Update mlx5_ifc with DEVX UCTX capabilities bits Expose device capabilities for DEVX user context, it includes which caps the device is supported and a matching bit to set as part of user context creation. Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f64e814cc10..ece1b606c909 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -883,6 +883,10 @@ enum { MLX5_CAP_UMR_FENCE_NONE = 0x2, }; +enum { + MLX5_UCTX_CAP_RAW_TX = 1UL << 0, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1193,7 +1197,13 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 num_vhca_ports[0x8]; u8 reserved_at_618[0x6]; u8 sw_owner_id[0x1]; - u8 reserved_at_61f[0x1e1]; + u8 reserved_at_61f[0x1]; + + u8 reserved_at_620[0x80]; + + u8 uctx_cap[0x20]; + + u8 reserved_at_6c0[0x140]; }; enum mlx5_flow_destination_type { @@ -9276,7 +9286,9 @@ struct mlx5_ifc_umem_bits { struct mlx5_ifc_uctx_bits { u8 modify_field_select[0x40]; - u8 reserved_at_40[0x1c0]; + u8 cap[0x20]; + + u8 reserved_at_60[0x1a0]; }; struct mlx5_ifc_create_umem_in_bits { -- cgit v1.2.3 From 875e8939953483d856de226b72d14c6a000f9457 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 4 Dec 2018 08:15:10 +0000 Subject: skbuff: Rename 'offload_mr_fwd_mark' to 'offload_l3_fwd_mark' Commit abf4bb6b63d0 ("skbuff: Add the offload_mr_fwd_mark field") added the 'offload_mr_fwd_mark' field to indicate that a packet has already undergone L3 multicast routing by a capable device. The field is used to prevent the kernel from forwarding a packet through a netdev through which the device has already forwarded the packet. Currently, no unicast packet is routed by both the device and the kernel, but this is about to change by subsequent patches and we need to be able to mark such packets, so that they will no be forwarded twice. Instead of adding yet another field to 'struct sk_buff', we can just rename 'offload_mr_fwd_mark' to 'offload_l3_fwd_mark', as a packet either has a multicast or a unicast destination IP. While at it, add a comment about both 'offload_fwd_mark' and 'offload_l3_fwd_mark'. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 +++++----- include/linux/skbuff.h | 4 +++- net/core/skbuff.c | 2 +- net/ipv4/ipmr.c | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c293ff1eed63..920085fbbf2a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3554,10 +3554,10 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } -static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, +static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb, u8 local_port, void *priv) { - skb->offload_mr_fwd_mark = 1; + skb->offload_l3_fwd_mark = 1; skb->offload_fwd_mark = 1; return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } @@ -3605,8 +3605,8 @@ out: MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) -#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ - MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ +#define MLXSW_SP_RXL_L3_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_l3_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) #define MLXSW_SP_EVENTL(_func, _trap_id) \ @@ -3683,7 +3683,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), - MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), /* NVE traps */ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 75d50ab7997c..b1831a5ca173 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -616,6 +616,8 @@ typedef unsigned char *sk_buff_data_t; * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @offload_fwd_mark: Packet was L2-forwarded in hardware + * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress * @tc_redirected: packet was redirected by a tc action @@ -799,7 +801,7 @@ struct sk_buff { __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; - __u8 offload_mr_fwd_mark:1; + __u8 offload_l3_fwd_mark:1; #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c78ce114537e..40552547c69a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4885,7 +4885,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) #ifdef CONFIG_NET_SWITCHDEV skb->offload_fwd_mark = 0; - skb->offload_mr_fwd_mark = 0; + skb->offload_l3_fwd_mark = 0; #endif if (!xnet) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a6defbec4f1b..5cbc749a50aa 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1802,7 +1802,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, struct vif_device *out_vif = &mrt->vif_table[out_vifi]; struct vif_device *in_vif = &mrt->vif_table[in_vifi]; - if (!skb->offload_mr_fwd_mark) + if (!skb->offload_l3_fwd_mark) return false; if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) return false; -- cgit v1.2.3 From b3ed2ce024c36054e51cca2eb31a1cdbe4a5f11e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 4 Dec 2018 10:31:11 -0800 Subject: acpi/nfit: Add support for Intel DSM 1.8 commands Add command definition for security commands defined in Intel DSM specification v1.8 [1]. This includes "get security state", "set passphrase", "unlock unit", "freeze lock", "secure erase", "overwrite", "overwrite query", "master passphrase enable/disable", and "master erase", . Since this adds several Intel definitions, move the relevant bits to their own header. These commands mutate physical data, but that manipulation is not cache coherent. The requirement to flush and invalidate caches makes these commands unsuitable to be called from userspace, so extra logic is added to detect and block these commands from being submitted via the ioctl command submission path. Lastly, the commands may contain sensitive key material that should not be dumped in a standard debug session. Update the nvdimm-command payload-dump facility to move security command payloads behind a default-off compile time switch. [1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/acpi/nfit/Kconfig | 11 +++++++ drivers/acpi/nfit/core.c | 46 ++++++++++++++++++++++++++--- drivers/acpi/nfit/intel.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/nfit/nfit.h | 21 +++++++++++++- drivers/nvdimm/bus.c | 2 +- include/linux/libnvdimm.h | 2 +- 6 files changed, 149 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig index f7c57e33499e..52eefd732cf2 100644 --- a/drivers/acpi/nfit/Kconfig +++ b/drivers/acpi/nfit/Kconfig @@ -13,3 +13,14 @@ config ACPI_NFIT To compile this driver as a module, choose M here: the module will be called nfit. + +config NFIT_SECURITY_DEBUG + bool "Enable debug for NVDIMM security commands" + depends on ACPI_NFIT + help + Some NVDIMM devices and controllers support encryption and + other security features. The payloads for the commands that + enable those features may contain sensitive clear-text + security material. Disable debug of those command payloads + by default. If you are a kernel developer actively working + on NVDIMM security enabling say Y, otherwise say N. diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 14d9f5bea015..58fb4ce42548 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -24,6 +24,7 @@ #include #include #include +#include "intel.h" #include "nfit.h" #include "intel.h" @@ -380,6 +381,14 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func) [NVDIMM_INTEL_QUERY_FWUPDATE] = 2, [NVDIMM_INTEL_SET_THRESHOLD] = 2, [NVDIMM_INTEL_INJECT_ERROR] = 2, + [NVDIMM_INTEL_GET_SECURITY_STATE] = 2, + [NVDIMM_INTEL_SET_PASSPHRASE] = 2, + [NVDIMM_INTEL_DISABLE_PASSPHRASE] = 2, + [NVDIMM_INTEL_UNLOCK_UNIT] = 2, + [NVDIMM_INTEL_FREEZE_LOCK] = 2, + [NVDIMM_INTEL_SECURE_ERASE] = 2, + [NVDIMM_INTEL_OVERWRITE] = 2, + [NVDIMM_INTEL_QUERY_OVERWRITE] = 2, }, }; u8 id; @@ -394,6 +403,17 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func) return id; } +static bool payload_dumpable(struct nvdimm *nvdimm, unsigned int func) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + if (nfit_mem && nfit_mem->family == NVDIMM_FAMILY_INTEL + && func >= NVDIMM_INTEL_GET_SECURITY_STATE + && func <= NVDIMM_INTEL_MASTER_SECURE_ERASE) + return IS_ENABLED(CONFIG_NFIT_SECURITY_DEBUG); + return true; +} + int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { @@ -478,9 +498,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n", dimm_name, cmd, func, in_buf.buffer.length); - print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, - in_buf.buffer.pointer, - min_t(u32, 256, in_buf.buffer.length), true); + if (payload_dumpable(nvdimm, func)) + print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, + in_buf.buffer.pointer, + min_t(u32, 256, in_buf.buffer.length), true); /* call the BIOS, prefer the named methods over _DSM if available */ if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE @@ -3337,7 +3358,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) return 0; } -static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, +static int __acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); @@ -3359,6 +3380,23 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, return 0; } +/* prevent security commands from being issued via ioctl */ +static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf) +{ + struct nd_cmd_pkg *call_pkg = buf; + unsigned int func; + + if (nvdimm && cmd == ND_CMD_CALL && + call_pkg->nd_family == NVDIMM_FAMILY_INTEL) { + func = call_pkg->nd_command; + if ((1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK) + return -EOPNOTSUPP; + } + + return __acpi_nfit_clear_to_send(nd_desc, nvdimm, cmd); +} + int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, enum nfit_ars_state req_type) { diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h index 86746312381f..1802bd398c23 100644 --- a/drivers/acpi/nfit/intel.h +++ b/drivers/acpi/nfit/intel.h @@ -35,4 +35,78 @@ struct nd_intel_smart { }; } __packed; +#define ND_INTEL_STATUS_SIZE 4 +#define ND_INTEL_PASSPHRASE_SIZE 32 + +#define ND_INTEL_STATUS_NOT_SUPPORTED 1 +#define ND_INTEL_STATUS_RETRY 5 +#define ND_INTEL_STATUS_NOT_READY 9 +#define ND_INTEL_STATUS_INVALID_STATE 10 +#define ND_INTEL_STATUS_INVALID_PASS 11 +#define ND_INTEL_STATUS_OVERWRITE_UNSUPPORTED 0x10007 +#define ND_INTEL_STATUS_OQUERY_INPROGRESS 0x10007 +#define ND_INTEL_STATUS_OQUERY_SEQUENCE_ERR 0x20007 + +#define ND_INTEL_SEC_STATE_ENABLED 0x02 +#define ND_INTEL_SEC_STATE_LOCKED 0x04 +#define ND_INTEL_SEC_STATE_FROZEN 0x08 +#define ND_INTEL_SEC_STATE_PLIMIT 0x10 +#define ND_INTEL_SEC_STATE_UNSUPPORTED 0x20 +#define ND_INTEL_SEC_STATE_OVERWRITE 0x40 + +#define ND_INTEL_SEC_ESTATE_ENABLED 0x01 +#define ND_INTEL_SEC_ESTATE_PLIMIT 0x02 + +struct nd_intel_get_security_state { + u32 status; + u8 extended_state; + u8 reserved[3]; + u8 state; + u8 reserved1[3]; +} __packed; + +struct nd_intel_set_passphrase { + u8 old_pass[ND_INTEL_PASSPHRASE_SIZE]; + u8 new_pass[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_unlock_unit { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_disable_passphrase { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_freeze_lock { + u32 status; +} __packed; + +struct nd_intel_secure_erase { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_overwrite { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_query_overwrite { + u32 status; +} __packed; + +struct nd_intel_set_master_passphrase { + u8 old_pass[ND_INTEL_PASSPHRASE_SIZE]; + u8 new_pass[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; + +struct nd_intel_master_secure_erase { + u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; + u32 status; +} __packed; #endif diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index df0f6b8407e7..ecde13a9199d 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -60,14 +60,33 @@ enum nvdimm_family_cmds { NVDIMM_INTEL_QUERY_FWUPDATE = 16, NVDIMM_INTEL_SET_THRESHOLD = 17, NVDIMM_INTEL_INJECT_ERROR = 18, + NVDIMM_INTEL_GET_SECURITY_STATE = 19, + NVDIMM_INTEL_SET_PASSPHRASE = 20, + NVDIMM_INTEL_DISABLE_PASSPHRASE = 21, + NVDIMM_INTEL_UNLOCK_UNIT = 22, + NVDIMM_INTEL_FREEZE_LOCK = 23, + NVDIMM_INTEL_SECURE_ERASE = 24, + NVDIMM_INTEL_OVERWRITE = 25, + NVDIMM_INTEL_QUERY_OVERWRITE = 26, + NVDIMM_INTEL_SET_MASTER_PASSPHRASE = 27, + NVDIMM_INTEL_MASTER_SECURE_ERASE = 28, }; +#define NVDIMM_INTEL_SECURITY_CMDMASK \ +(1 << NVDIMM_INTEL_GET_SECURITY_STATE | 1 << NVDIMM_INTEL_SET_PASSPHRASE \ +| 1 << NVDIMM_INTEL_DISABLE_PASSPHRASE | 1 << NVDIMM_INTEL_UNLOCK_UNIT \ +| 1 << NVDIMM_INTEL_FREEZE_LOCK | 1 << NVDIMM_INTEL_SECURE_ERASE \ +| 1 << NVDIMM_INTEL_OVERWRITE | 1 << NVDIMM_INTEL_QUERY_OVERWRITE \ +| 1 << NVDIMM_INTEL_SET_MASTER_PASSPHRASE \ +| 1 << NVDIMM_INTEL_MASTER_SECURE_ERASE) + #define NVDIMM_INTEL_CMDMASK \ (NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \ | 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \ | 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \ | 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \ - | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN) + | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN \ + | NVDIMM_INTEL_SECURITY_CMDMASK) enum nfit_uuids { /* for simplicity alias the uuid index with the family id */ diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index f1fb39921236..9743d8083538 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -902,7 +902,7 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, /* ask the bus provider if it would like to block this request */ if (nd_desc->clear_to_send) { - int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd); + int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd, data); if (rc) return rc; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 097072c5a852..472171af7f60 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -87,7 +87,7 @@ struct nvdimm_bus_descriptor { ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, - struct nvdimm *nvdimm, unsigned int cmd); + struct nvdimm *nvdimm, unsigned int cmd, void *data); }; struct nd_cmd_desc { -- cgit v1.2.3 From e20ba6e1da029136ded295f33076483d65ddf50a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 2 Dec 2018 17:46:16 +0100 Subject: block: move queues types to the block layer Having another indirect all in the fast path doesn't really help in our post-spectre world. Also having too many queue type is just going to create confusion, so I'd rather manage them centrally. Note that the queue type naming and ordering changes a bit - the first index now is the default queue for everything not explicitly marked, the optional ones are read and poll queues. Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 9 ++++++- block/blk-mq.h | 21 ++++++++------- drivers/nvme/host/pci.c | 68 ++++++++++++++++++------------------------------- include/linux/blk-mq.h | 15 +++++------ 4 files changed, 51 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 6efef1f679f0..9c2df137256a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -173,9 +173,16 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) return ret; } +static const char *const hctx_types[] = { + [HCTX_TYPE_DEFAULT] = "default", + [HCTX_TYPE_READ] = "read", + [HCTX_TYPE_POLL] = "poll", +}; + static ssize_t blk_mq_hw_sysfs_type_show(struct blk_mq_hw_ctx *hctx, char *page) { - return sprintf(page, "%u\n", hctx->type); + BUILD_BUG_ON(ARRAY_SIZE(hctx_types) != HCTX_MAX_TYPES); + return sprintf(page, "%s\n", hctx_types[hctx->type]); } static struct attribute *default_ctx_attrs[] = { diff --git a/block/blk-mq.h b/block/blk-mq.h index 7291e5379358..a664ea44ffd4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -81,16 +81,14 @@ extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); /* * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue * @q: request queue - * @hctx_type: the hctx type index + * @type: the hctx type index * @cpu: CPU */ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, - unsigned int hctx_type, + enum hctx_type type, unsigned int cpu) { - struct blk_mq_tag_set *set = q->tag_set; - - return q->queue_hw_ctx[set->map[hctx_type].mq_map[cpu]]; + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; } /* @@ -103,12 +101,17 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, unsigned int flags, unsigned int cpu) { - int hctx_type = 0; + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if (q->tag_set->nr_maps > HCTX_TYPE_POLL && + ((flags & REQ_HIPRI) && test_bit(QUEUE_FLAG_POLL, &q->queue_flags))) + type = HCTX_TYPE_POLL; - if (q->mq_ops->rq_flags_to_type) - hctx_type = q->mq_ops->rq_flags_to_type(q, flags); + else if (q->tag_set->nr_maps > HCTX_TYPE_READ && + ((flags & REQ_OP_MASK) == REQ_OP_READ)) + type = HCTX_TYPE_READ; - return blk_mq_map_queue_type(q, hctx_type, cpu); + return blk_mq_map_queue_type(q, type, cpu); } /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 527907aa6903..a1bb4bb92e7f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -95,13 +95,6 @@ struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); -enum { - NVMEQ_TYPE_READ, - NVMEQ_TYPE_WRITE, - NVMEQ_TYPE_POLL, - NVMEQ_TYPE_NR, -}; - /* * Represents an NVM Express device. Each nvme_dev is a PCI function. */ @@ -115,7 +108,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; unsigned online_queues; unsigned max_qid; - unsigned io_queues[NVMEQ_TYPE_NR]; + unsigned io_queues[HCTX_MAX_TYPES]; unsigned int num_vecs; int q_depth; u32 db_stride; @@ -499,10 +492,10 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) map->nr_queues = dev->io_queues[i]; if (!map->nr_queues) { - BUG_ON(i == NVMEQ_TYPE_READ); + BUG_ON(i == HCTX_TYPE_DEFAULT); /* shared set, resuse read set parameters */ - map->nr_queues = dev->io_queues[NVMEQ_TYPE_READ]; + map->nr_queues = dev->io_queues[HCTX_TYPE_DEFAULT]; qoff = 0; offset = queue_irq_offset(dev); } @@ -512,7 +505,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) * affinity), so use the regular blk-mq cpu mapping */ map->queue_offset = qoff; - if (i != NVMEQ_TYPE_POLL) + if (i != HCTX_TYPE_POLL) blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); else blk_mq_map_queues(map); @@ -961,16 +954,6 @@ out_free_cmd: return ret; } -static int nvme_rq_flags_to_type(struct request_queue *q, unsigned int flags) -{ - if ((flags & REQ_HIPRI) && test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - return NVMEQ_TYPE_POLL; - if ((flags & REQ_OP_MASK) == REQ_OP_READ) - return NVMEQ_TYPE_READ; - - return NVMEQ_TYPE_WRITE; -} - static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -1634,7 +1617,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { #define NVME_SHARED_MQ_OPS \ .queue_rq = nvme_queue_rq, \ .commit_rqs = nvme_commit_rqs, \ - .rq_flags_to_type = nvme_rq_flags_to_type, \ .complete = nvme_pci_complete_rq, \ .init_hctx = nvme_init_hctx, \ .init_request = nvme_init_request, \ @@ -1785,9 +1767,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } max = min(dev->max_qid, dev->ctrl.queue_count - 1); - if (max != 1 && dev->io_queues[NVMEQ_TYPE_POLL]) { - rw_queues = dev->io_queues[NVMEQ_TYPE_READ] + - dev->io_queues[NVMEQ_TYPE_WRITE]; + if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) { + rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] + + dev->io_queues[HCTX_TYPE_READ]; } else { rw_queues = max; } @@ -2076,9 +2058,9 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) * Setup read/write queue split */ if (nr_io_queues == 1) { - dev->io_queues[NVMEQ_TYPE_READ] = 1; - dev->io_queues[NVMEQ_TYPE_WRITE] = 0; - dev->io_queues[NVMEQ_TYPE_POLL] = 0; + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; + dev->io_queues[HCTX_TYPE_READ] = 0; + dev->io_queues[HCTX_TYPE_POLL] = 0; return; } @@ -2095,10 +2077,10 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) this_p_queues = nr_io_queues - 1; } - dev->io_queues[NVMEQ_TYPE_POLL] = this_p_queues; + dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; nr_io_queues -= this_p_queues; } else - dev->io_queues[NVMEQ_TYPE_POLL] = 0; + dev->io_queues[HCTX_TYPE_POLL] = 0; /* * If 'write_queues' is set, ensure it leaves room for at least @@ -2112,11 +2094,11 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) * a queue set. */ if (!this_w_queues) { - dev->io_queues[NVMEQ_TYPE_WRITE] = 0; - dev->io_queues[NVMEQ_TYPE_READ] = nr_io_queues; + dev->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues; + dev->io_queues[HCTX_TYPE_READ] = 0; } else { - dev->io_queues[NVMEQ_TYPE_WRITE] = this_w_queues; - dev->io_queues[NVMEQ_TYPE_READ] = nr_io_queues - this_w_queues; + dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; + dev->io_queues[HCTX_TYPE_READ] = nr_io_queues - this_w_queues; } } @@ -2138,8 +2120,8 @@ static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues) */ do { nvme_calc_io_queues(dev, nr_io_queues); - irq_sets[0] = dev->io_queues[NVMEQ_TYPE_READ]; - irq_sets[1] = dev->io_queues[NVMEQ_TYPE_WRITE]; + irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT]; + irq_sets[1] = dev->io_queues[HCTX_TYPE_READ]; if (!irq_sets[1]) affd.nr_sets = 1; @@ -2226,12 +2208,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->num_vecs = result; result = max(result - 1, 1); - dev->max_qid = result + dev->io_queues[NVMEQ_TYPE_POLL]; + dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; - dev_info(dev->ctrl.device, "%d/%d/%d read/write/poll queues\n", - dev->io_queues[NVMEQ_TYPE_READ], - dev->io_queues[NVMEQ_TYPE_WRITE], - dev->io_queues[NVMEQ_TYPE_POLL]); + dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", + dev->io_queues[HCTX_TYPE_DEFAULT], + dev->io_queues[HCTX_TYPE_READ], + dev->io_queues[HCTX_TYPE_POLL]); /* * Should investigate if there's a performance win from allocating @@ -2332,13 +2314,13 @@ static int nvme_dev_add(struct nvme_dev *dev) int ret; if (!dev->ctrl.tagset) { - if (!dev->io_queues[NVMEQ_TYPE_POLL]) + if (!dev->io_queues[HCTX_TYPE_POLL]) dev->tagset.ops = &nvme_mq_ops; else dev->tagset.ops = &nvme_mq_poll_noirq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; - dev->tagset.nr_maps = NVMEQ_TYPE_NR; + dev->tagset.nr_maps = HCTX_MAX_TYPES; dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 467f1dd21ccf..57eda7b20243 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -81,8 +81,12 @@ struct blk_mq_queue_map { unsigned int queue_offset; }; -enum { - HCTX_MAX_TYPES = 3, +enum hctx_type { + HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */ + HCTX_TYPE_READ, /* just for READ I/O */ + HCTX_TYPE_POLL, /* polled I/O of any kind */ + + HCTX_MAX_TYPES, }; struct blk_mq_tag_set { @@ -118,8 +122,6 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); -/* takes rq->cmd_flags as input, returns a hardware type index */ -typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); @@ -154,11 +156,6 @@ struct blk_mq_ops { */ commit_rqs_fn *commit_rqs; - /* - * Return a queue map type for the given request/bio flags - */ - rq_flags_to_type_fn *rq_flags_to_type; - /* * Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the -- cgit v1.2.3 From 529262d56dbebe6a26df5d2fd24cc0e1bc8579e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 2 Dec 2018 17:46:26 +0100 Subject: block: remove ->poll_fn This was intended to support users like nvme multipath, but is just getting in the way and adding another indirect call. Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 23 ----------------------- block/blk-mq.c | 24 +++++++++++++++++++----- include/linux/blkdev.h | 2 -- 3 files changed, 19 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index a1a5e1c14898..ad59102ee30a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1250,29 +1250,6 @@ blk_qc_t submit_bio(struct bio *bio) } EXPORT_SYMBOL(submit_bio); -/** - * blk_poll - poll for IO completions - * @q: the queue - * @cookie: cookie passed back at IO submission time - * @spin: whether to spin for completions - * - * Description: - * Poll for completions on the passed in queue. Returns number of - * completed entries found. If @spin is true, then blk_poll will continue - * looping until at least one completion is found, unless the task is - * otherwise marked running (or we need to reschedule). - */ -int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) -{ - if (!q->poll_fn || !blk_qc_t_valid(cookie)) - return 0; - - if (current->plug) - blk_flush_plug_list(current->plug, false); - return q->poll_fn(q, cookie, spin); -} -EXPORT_SYMBOL_GPL(blk_poll); - /** * blk_cloned_rq_check_limits - Helper function to check a cloned request * for new the queue limits diff --git a/block/blk-mq.c b/block/blk-mq.c index e09d7f500077..50d529602e05 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -38,7 +38,6 @@ #include "blk-mq-sched.h" #include "blk-rq-qos.h" -static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -2838,8 +2837,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, spin_lock_init(&q->requeue_lock); blk_queue_make_request(q, blk_mq_make_request); - if (q->mq_ops->poll) - q->poll_fn = blk_mq_poll; /* * Do this after blk_queue_make_request() overrides it... @@ -3400,14 +3397,30 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, return blk_mq_poll_hybrid_sleep(q, hctx, rq); } -static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin) +/** + * blk_poll - poll for IO completions + * @q: the queue + * @cookie: cookie passed back at IO submission time + * @spin: whether to spin for completions + * + * Description: + * Poll for completions on the passed in queue. Returns number of + * completed entries found. If @spin is true, then blk_poll will continue + * looping until at least one completion is found, unless the task is + * otherwise marked running (or we need to reschedule). + */ +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) { struct blk_mq_hw_ctx *hctx; long state; - if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + if (!blk_qc_t_valid(cookie) || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) return 0; + if (current->plug) + blk_flush_plug_list(current->plug, false); + hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; /* @@ -3448,6 +3461,7 @@ static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, bool spin) __set_current_state(TASK_RUNNING); return 0; } +EXPORT_SYMBOL_GPL(blk_poll); unsigned int blk_mq_rq_cpu(struct request *rq) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 08d940f85fa0..0b3874bdbc6a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,7 +283,6 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t, bool spin); struct bio_vec; typedef int (dma_drain_needed_fn)(struct request *); @@ -401,7 +400,6 @@ struct request_queue { struct rq_qos *rq_qos; make_request_fn *make_request_fn; - poll_q_fn *poll_fn; dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; -- cgit v1.2.3 From 719598c98d1961e78e2ad514a2cc15deb5e41db5 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 26 Nov 2018 08:28:37 +0200 Subject: IB/mlx5: Update the supported DEVX commands Update the supported DEVX commands, it includes adding to the query/modify command's list and to the encoding handling. In addition, a valid range for general commands was added to be used for future commands. Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/devx.c | 17 +++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 10 ++++++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 80053324dd31..5271469aad10 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -314,6 +314,8 @@ static u64 devx_get_obj_id(const void *in) MLX5_GET(query_dct_in, in, dctn)); break; case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(query_xrq_in, in, xrqn)); break; @@ -340,9 +342,16 @@ static u64 devx_get_obj_id(const void *in) MLX5_GET(drain_dct_in, in, dctn)); break; case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(arm_xrq_in, in, xrqn)); break; + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + obj_id = get_enc_obj_id + (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT, + MLX5_GET(query_packet_reformat_context_in, + in, packet_reformat_id)); + break; default: obj_id = 0; } @@ -601,6 +610,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: return true; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: { @@ -642,6 +652,9 @@ static bool devx_is_obj_query_cmd(const void *in) case MLX5_CMD_OP_QUERY_XRC_SRQ: case MLX5_CMD_OP_QUERY_DCT: case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: return true; default: return false; @@ -685,6 +698,10 @@ static bool devx_is_general_cmd(void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + if (opcode >= MLX5_CMD_OP_GENERAL_START && + opcode < MLX5_CMD_OP_GENERAL_END) + return true; + switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ece1b606c909..171d68663640 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -144,6 +144,9 @@ enum { MLX5_CMD_OP_DESTROY_XRQ = 0x718, MLX5_CMD_OP_QUERY_XRQ = 0x719, MLX5_CMD_OP_ARM_XRQ = 0x71a, + MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, + MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, + MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -245,6 +248,7 @@ enum { MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e, + MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, @@ -260,6 +264,12 @@ enum { MLX5_CMD_OP_MAX }; +/* Valid range for general commands that don't work over an object */ +enum { + MLX5_CMD_OP_GENERAL_START = 0xb00, + MLX5_CMD_OP_GENERAL_END = 0xd00, +}; + struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; -- cgit v1.2.3 From 770399df90b6e43bd086653f0a35888dca056576 Mon Sep 17 00:00:00 2001 From: Eric Long Date: Tue, 6 Nov 2018 13:01:36 +0800 Subject: dmaengine: sprd: Support DMA 2-stage transfer mode The Spreadtrum DMA controller supports channel 2-stage tansfer mode, that means we can request 2 dma channels, one for source channel, and another one for destination channel. Once the source channel's transaction is done, it will trigger the destination channel's transaction automatically by hardware signal. Signed-off-by: Eric Long Signed-off-by: Baolin Wang Signed-off-by: Vinod Koul --- drivers/dma/sprd-dma.c | 98 +++++++++++++++++++++++++++++++++++++++++++- include/linux/dma/sprd-dma.h | 62 ++++++++++++++++++++++++++-- 2 files changed, 156 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c index cefe42fb7100..50d6569585b4 100644 --- a/drivers/dma/sprd-dma.c +++ b/drivers/dma/sprd-dma.c @@ -36,6 +36,8 @@ #define SPRD_DMA_GLB_CHN_EN_STS 0x1c #define SPRD_DMA_GLB_DEBUG_STS 0x20 #define SPRD_DMA_GLB_ARB_SEL_STS 0x24 +#define SPRD_DMA_GLB_2STAGE_GRP1 0x28 +#define SPRD_DMA_GLB_2STAGE_GRP2 0x2c #define SPRD_DMA_GLB_REQ_UID(uid) (0x4 * ((uid) - 1)) #define SPRD_DMA_GLB_REQ_UID_OFFSET 0x2000 @@ -57,6 +59,18 @@ #define SPRD_DMA_CHN_SRC_BLK_STEP 0x38 #define SPRD_DMA_CHN_DES_BLK_STEP 0x3c +/* SPRD_DMA_GLB_2STAGE_GRP register definition */ +#define SPRD_DMA_GLB_2STAGE_EN BIT(24) +#define SPRD_DMA_GLB_CHN_INT_MASK GENMASK(23, 20) +#define SPRD_DMA_GLB_LIST_DONE_TRG BIT(19) +#define SPRD_DMA_GLB_TRANS_DONE_TRG BIT(18) +#define SPRD_DMA_GLB_BLOCK_DONE_TRG BIT(17) +#define SPRD_DMA_GLB_FRAG_DONE_TRG BIT(16) +#define SPRD_DMA_GLB_TRG_OFFSET 16 +#define SPRD_DMA_GLB_DEST_CHN_MASK GENMASK(13, 8) +#define SPRD_DMA_GLB_DEST_CHN_OFFSET 8 +#define SPRD_DMA_GLB_SRC_CHN_MASK GENMASK(5, 0) + /* SPRD_DMA_CHN_INTC register definition */ #define SPRD_DMA_INT_MASK GENMASK(4, 0) #define SPRD_DMA_INT_CLR_OFFSET 24 @@ -118,6 +132,10 @@ #define SPRD_DMA_SRC_TRSF_STEP_OFFSET 0 #define SPRD_DMA_TRSF_STEP_MASK GENMASK(15, 0) +/* define DMA channel mode & trigger mode mask */ +#define SPRD_DMA_CHN_MODE_MASK GENMASK(7, 0) +#define SPRD_DMA_TRG_MODE_MASK GENMASK(7, 0) + /* define the DMA transfer step type */ #define SPRD_DMA_NONE_STEP 0 #define SPRD_DMA_BYTE_STEP 1 @@ -170,6 +188,8 @@ struct sprd_dma_chn { struct dma_slave_config slave_cfg; u32 chn_num; u32 dev_id; + enum sprd_dma_chn_mode chn_mode; + enum sprd_dma_trg_mode trg_mode; struct sprd_dma_desc *cur_desc; }; @@ -206,6 +226,16 @@ static inline struct sprd_dma_desc *to_sprd_dma_desc(struct virt_dma_desc *vd) return container_of(vd, struct sprd_dma_desc, vd); } +static void sprd_dma_glb_update(struct sprd_dma_dev *sdev, u32 reg, + u32 mask, u32 val) +{ + u32 orig = readl(sdev->glb_base + reg); + u32 tmp; + + tmp = (orig & ~mask) | val; + writel(tmp, sdev->glb_base + reg); +} + static void sprd_dma_chn_update(struct sprd_dma_chn *schan, u32 reg, u32 mask, u32 val) { @@ -389,6 +419,49 @@ static enum sprd_dma_req_mode sprd_dma_get_req_type(struct sprd_dma_chn *schan) return (frag_reg >> SPRD_DMA_REQ_MODE_OFFSET) & SPRD_DMA_REQ_MODE_MASK; } +static int sprd_dma_set_2stage_config(struct sprd_dma_chn *schan) +{ + struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan); + u32 val, chn = schan->chn_num + 1; + + switch (schan->chn_mode) { + case SPRD_DMA_SRC_CHN0: + val = chn & SPRD_DMA_GLB_SRC_CHN_MASK; + val |= BIT(schan->trg_mode - 1) << SPRD_DMA_GLB_TRG_OFFSET; + val |= SPRD_DMA_GLB_2STAGE_EN; + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP1, val, val); + break; + + case SPRD_DMA_SRC_CHN1: + val = chn & SPRD_DMA_GLB_SRC_CHN_MASK; + val |= BIT(schan->trg_mode - 1) << SPRD_DMA_GLB_TRG_OFFSET; + val |= SPRD_DMA_GLB_2STAGE_EN; + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP2, val, val); + break; + + case SPRD_DMA_DST_CHN0: + val = (chn << SPRD_DMA_GLB_DEST_CHN_OFFSET) & + SPRD_DMA_GLB_DEST_CHN_MASK; + val |= SPRD_DMA_GLB_2STAGE_EN; + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP1, val, val); + break; + + case SPRD_DMA_DST_CHN1: + val = (chn << SPRD_DMA_GLB_DEST_CHN_OFFSET) & + SPRD_DMA_GLB_DEST_CHN_MASK; + val |= SPRD_DMA_GLB_2STAGE_EN; + sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP2, val, val); + break; + + default: + dev_err(sdev->dma_dev.dev, "invalid channel mode setting %d\n", + schan->chn_mode); + return -EINVAL; + } + + return 0; +} + static void sprd_dma_set_chn_config(struct sprd_dma_chn *schan, struct sprd_dma_desc *sdesc) { @@ -422,6 +495,13 @@ static void sprd_dma_start(struct sprd_dma_chn *schan) list_del(&vd->node); schan->cur_desc = to_sprd_dma_desc(vd); + /* + * Set 2-stage configuration if the channel starts one 2-stage + * transfer. + */ + if (schan->chn_mode && sprd_dma_set_2stage_config(schan)) + return; + /* * Copy the DMA configuration from DMA descriptor to this hardware * channel. @@ -617,6 +697,7 @@ static int sprd_dma_fill_desc(struct dma_chan *chan, { struct sprd_dma_dev *sdev = to_sprd_dma_dev(chan); struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); + enum sprd_dma_chn_mode chn_mode = schan->chn_mode; u32 req_mode = (flags >> SPRD_DMA_REQ_SHIFT) & SPRD_DMA_REQ_MODE_MASK; u32 int_mode = flags & SPRD_DMA_INT_MASK; int src_datawidth, dst_datawidth, src_step, dst_step; @@ -628,7 +709,16 @@ static int sprd_dma_fill_desc(struct dma_chan *chan, dev_err(sdev->dma_dev.dev, "invalid source step\n"); return src_step; } - dst_step = SPRD_DMA_NONE_STEP; + + /* + * For 2-stage transfer, destination channel step can not be 0, + * since destination device is AON IRAM. + */ + if (chn_mode == SPRD_DMA_DST_CHN0 || + chn_mode == SPRD_DMA_DST_CHN1) + dst_step = src_step; + else + dst_step = SPRD_DMA_NONE_STEP; } else { dst_step = sprd_dma_get_step(slave_cfg->dst_addr_width); if (dst_step < 0) { @@ -855,6 +945,12 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } } + /* Set channel mode and trigger mode for 2-stage transfer */ + schan->chn_mode = + (flags >> SPRD_DMA_CHN_MODE_SHIFT) & SPRD_DMA_CHN_MODE_MASK; + schan->trg_mode = + (flags >> SPRD_DMA_TRG_MODE_SHIFT) & SPRD_DMA_TRG_MODE_MASK; + ret = sprd_dma_fill_desc(chan, &sdesc->chn_hw, 0, 0, src, dst, len, dir, flags, slave_cfg); if (ret) { diff --git a/include/linux/dma/sprd-dma.h b/include/linux/dma/sprd-dma.h index b42b80e52cc2..ab82df64682a 100644 --- a/include/linux/dma/sprd-dma.h +++ b/include/linux/dma/sprd-dma.h @@ -3,9 +3,65 @@ #ifndef _SPRD_DMA_H_ #define _SPRD_DMA_H_ -#define SPRD_DMA_REQ_SHIFT 16 -#define SPRD_DMA_FLAGS(req_mode, int_type) \ - ((req_mode) << SPRD_DMA_REQ_SHIFT | (int_type)) +#define SPRD_DMA_REQ_SHIFT 8 +#define SPRD_DMA_TRG_MODE_SHIFT 16 +#define SPRD_DMA_CHN_MODE_SHIFT 24 +#define SPRD_DMA_FLAGS(chn_mode, trg_mode, req_mode, int_type) \ + ((chn_mode) << SPRD_DMA_CHN_MODE_SHIFT | \ + (trg_mode) << SPRD_DMA_TRG_MODE_SHIFT | \ + (req_mode) << SPRD_DMA_REQ_SHIFT | (int_type)) + +/* + * The Spreadtrum DMA controller supports channel 2-stage tansfer, that means + * we can request 2 dma channels, one for source channel, and another one for + * destination channel. Each channel is independent, and has its own + * configurations. Once the source channel's transaction is done, it will + * trigger the destination channel's transaction automatically by hardware + * signal. + * + * To support 2-stage tansfer, we must configure the channel mode and trigger + * mode as below definition. + */ + +/* + * enum sprd_dma_chn_mode: define the DMA channel mode for 2-stage transfer + * @SPRD_DMA_CHN_MODE_NONE: No channel mode setting which means channel doesn't + * support the 2-stage transfer. + * @SPRD_DMA_SRC_CHN0: Channel used as source channel 0. + * @SPRD_DMA_SRC_CHN1: Channel used as source channel 1. + * @SPRD_DMA_DST_CHN0: Channel used as destination channel 0. + * @SPRD_DMA_DST_CHN1: Channel used as destination channel 1. + * + * Now the DMA controller can supports 2 groups 2-stage transfer. + */ +enum sprd_dma_chn_mode { + SPRD_DMA_CHN_MODE_NONE, + SPRD_DMA_SRC_CHN0, + SPRD_DMA_SRC_CHN1, + SPRD_DMA_DST_CHN0, + SPRD_DMA_DST_CHN1, +}; + +/* + * enum sprd_dma_trg_mode: define the DMA channel trigger mode for 2-stage + * transfer + * @SPRD_DMA_NO_TRG: No trigger setting. + * @SPRD_DMA_FRAG_DONE_TRG: Trigger the transaction of destination channel + * automatically once the source channel's fragment request is done. + * @SPRD_DMA_BLOCK_DONE_TRG: Trigger the transaction of destination channel + * automatically once the source channel's block request is done. + * @SPRD_DMA_TRANS_DONE_TRG: Trigger the transaction of destination channel + * automatically once the source channel's transfer request is done. + * @SPRD_DMA_LIST_DONE_TRG: Trigger the transaction of destination channel + * automatically once the source channel's link-list request is done. + */ +enum sprd_dma_trg_mode { + SPRD_DMA_NO_TRG, + SPRD_DMA_FRAG_DONE_TRG, + SPRD_DMA_BLOCK_DONE_TRG, + SPRD_DMA_TRANS_DONE_TRG, + SPRD_DMA_LIST_DONE_TRG, +}; /* * enum sprd_dma_req_mode: define the DMA request mode -- cgit v1.2.3 From 617654aae50eb59dd98aa53fb562e850937f4cde Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 16 Aug 2018 12:28:48 +0300 Subject: PCI / ACPI: Identify untrusted PCI devices A malicious PCI device may use DMA to attack the system. An external Thunderbolt port is a convenient point to attach such a device. The OS may use IOMMU to defend against DMA attacks. Some BIOSes mark these externally facing root ports with this ACPI _DSD [1]: Name (_DSD, Package () { ToUUID ("efcc06cc-73ac-4bc3-bff0-76143807c389"), Package () { Package () {"ExternalFacingPort", 1}, Package () {"UID", 0 } } }) If we find such a root port, mark it and all its children as untrusted. The rest of the OS may use this information to enable DMA protection against malicious devices. For instance the device may be put behind an IOMMU to keep it from accessing memory outside of what the driver has allocated for it. While at it, add a comment on top of prp_guids array explaining the possible caveat resulting when these GUIDs are treated equivalent. [1] https://docs.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports#identifying-externally-exposed-pcie-root-ports Signed-off-by: Mika Westerberg Acked-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas --- drivers/acpi/property.c | 11 +++++++++++ drivers/pci/pci-acpi.c | 19 +++++++++++++++++++ drivers/pci/probe.c | 15 +++++++++++++++ include/linux/pci.h | 8 ++++++++ 4 files changed, 53 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 8c7c4583b52d..77abe0ec4043 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -24,6 +24,14 @@ static int acpi_data_get_property_array(const struct acpi_device_data *data, acpi_object_type type, const union acpi_object **obj); +/* + * The GUIDs here are made equivalent to each other in order to avoid extra + * complexity in the properties handling code, with the caveat that the + * kernel will accept certain combinations of GUID and properties that are + * not defined without a warning. For instance if any of the properties + * from different GUID appear in a property list of another, it will be + * accepted by the kernel. Firmware validation tools should catch these. + */ static const guid_t prp_guids[] = { /* ACPI _DSD device properties GUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 */ GUID_INIT(0xdaffd814, 0x6eba, 0x4d8c, @@ -31,6 +39,9 @@ static const guid_t prp_guids[] = { /* Hotplug in D3 GUID: 6211e2c0-58a3-4af3-90e1-927a4e0c55a4 */ GUID_INIT(0x6211e2c0, 0x58a3, 0x4af3, 0x90, 0xe1, 0x92, 0x7a, 0x4e, 0x0c, 0x55, 0xa4), + /* External facing port GUID: efcc06cc-73ac-4bc3-bff0-76143807c389 */ + GUID_INIT(0xefcc06cc, 0x73ac, 0x4bc3, + 0xbf, 0xf0, 0x76, 0x14, 0x38, 0x07, 0xc3, 0x89), }; static const guid_t ads_guid = diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 921db6f80340..e1949f7efd9c 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -789,6 +789,24 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev, ACPI_FREE(obj); } +static void pci_acpi_set_untrusted(struct pci_dev *dev) +{ + u8 val; + + if (pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) + return; + if (device_property_read_u8(&dev->dev, "ExternalFacingPort", &val)) + return; + + /* + * These root ports expose PCIe (including DMA) outside of the + * system so make sure we treat them and everything behind as + * untrusted. + */ + if (val) + dev->untrusted = 1; +} + static void pci_acpi_setup(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); @@ -798,6 +816,7 @@ static void pci_acpi_setup(struct device *dev) return; pci_acpi_optimize_delay(pci_dev, adev->handle); + pci_acpi_set_untrusted(pci_dev); pci_acpi_add_pm_notifier(adev, pci_dev); if (!adev->wakeup.flags.valid) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b1c05b5054a0..257b9f6f2ebb 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1378,6 +1378,19 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) } } +static void set_pcie_untrusted(struct pci_dev *dev) +{ + struct pci_dev *parent; + + /* + * If the upstream bridge is untrusted we treat this device + * untrusted as well. + */ + parent = pci_upstream_bridge(dev); + if (parent && parent->untrusted) + dev->untrusted = true; +} + /** * pci_ext_cfg_is_aliased - Is ext config space just an alias of std config? * @dev: PCI device @@ -1638,6 +1651,8 @@ int pci_setup_device(struct pci_dev *dev) /* Need to have dev->cfg_size ready */ set_pcie_thunderbolt(dev); + set_pcie_untrusted(dev); + /* "Unknown power state" */ dev->current_state = PCI_UNKNOWN; diff --git a/include/linux/pci.h b/include/linux/pci.h index 11c71c4ecf75..c786a2f27bee 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -396,6 +396,14 @@ struct pci_dev { unsigned int is_hotplug_bridge:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + /* + * Devices marked being untrusted are the ones that can potentially + * execute DMA attacks and similar. They are typically connected + * through external ports such as Thunderbolt but not limited to + * that. When an IOMMU is enabled they should be getting full + * mappings to make sure they cannot access arbitrary memory. + */ + unsigned int untrusted:1; unsigned int __aer_firmware_first_valid:1; unsigned int __aer_firmware_first:1; unsigned int broken_intx_masking:1; /* INTx masking can't be used */ -- cgit v1.2.3 From 89a6079df791aeace2044ea93be1b397195824ec Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Oct 2018 15:45:01 +0800 Subject: iommu/vt-d: Force IOMMU on for platform opt in hint Intel VT-d spec added a new DMA_CTRL_PLATFORM_OPT_IN_FLAG flag in DMAR ACPI table [1] for BIOS to report compliance about platform initiated DMA restricted to RMRR ranges when transferring control to the OS. This means that during OS boot, before it enables IOMMU none of the connected devices can bypass DMA protection for instance by overwriting the data structures used by the IOMMU. The OS also treats this as a hint that the IOMMU should be enabled to prevent DMA attacks from possible malicious devices. A use of this flag is Kernel DMA protection for Thunderbolt [2] which in practice means that IOMMU should be enabled for PCIe devices connected to the Thunderbolt ports. With IOMMU enabled for these devices, all DMA operations are limited in the range reserved for it, thus the DMA attacks are prevented. All these devices are enumerated in the PCI/PCIe module and marked with an untrusted flag. This forces IOMMU to be enabled if DMA_CTRL_PLATFORM_OPT_IN_FLAG is set in DMAR ACPI table and there are PCIe devices marked as untrusted in the system. This can be turned off by adding "intel_iommu=off" in the kernel command line, if any problems are found. [1] https://software.intel.com/sites/default/files/managed/c5/15/vt-directed-io-spec.pdf [2] https://docs.microsoft.com/en-us/windows/security/information-protection/kernel-dma-protection-for-thunderbolt Cc: Jacob Pan Cc: Sohil Mehta Signed-off-by: Lu Baolu Signed-off-by: Mika Westerberg Reviewed-by: Ashok Raj Reviewed-by: Joerg Roedel Acked-by: Joerg Roedel --- drivers/iommu/dmar.c | 25 +++++++++++++++++++++ drivers/iommu/intel-iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/dmar.h | 8 +++++++ 3 files changed, 84 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index d9c748b6f9e4..1edf2a251336 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -2042,3 +2042,28 @@ int dmar_device_remove(acpi_handle handle) { return dmar_device_hotplug(handle, false); } + +/* + * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table + * + * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in + * the ACPI DMAR table. This means that the platform boot firmware has made + * sure no device can issue DMA outside of RMRR regions. + */ +bool dmar_platform_optin(void) +{ + struct acpi_table_dmar *dmar; + acpi_status status; + bool ret; + + status = acpi_get_table(ACPI_SIG_DMAR, 0, + (struct acpi_table_header **)&dmar); + if (ACPI_FAILURE(status)) + return false; + + ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN); + acpi_put_table((struct acpi_table_header *)dmar); + + return ret; +} +EXPORT_SYMBOL_GPL(dmar_platform_optin); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 41a4b8808802..30e8584137f5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -184,6 +184,7 @@ static int rwbf_quirk; */ static int force_on = 0; int intel_iommu_tboot_noforce; +static int no_platform_optin; #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) @@ -503,6 +504,7 @@ static int __init intel_iommu_setup(char *str) pr_info("IOMMU enabled\n"); } else if (!strncmp(str, "off", 3)) { dmar_disabled = 1; + no_platform_optin = 1; pr_info("IOMMU disabled\n"); } else if (!strncmp(str, "igfx_off", 8)) { dmar_map_gfx = 0; @@ -2895,6 +2897,13 @@ static int iommu_should_identity_map(struct device *dev, int startup) if (device_is_rmrr_locked(dev)) return 0; + /* + * Prevent any device marked as untrusted from getting + * placed into the statically identity mapping domain. + */ + if (pdev->untrusted) + return 0; + if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return 1; @@ -4728,14 +4737,54 @@ const struct attribute_group *intel_iommu_groups[] = { NULL, }; +static int __init platform_optin_force_iommu(void) +{ + struct pci_dev *pdev = NULL; + bool has_untrusted_dev = false; + + if (!dmar_platform_optin() || no_platform_optin) + return 0; + + for_each_pci_dev(pdev) { + if (pdev->untrusted) { + has_untrusted_dev = true; + break; + } + } + + if (!has_untrusted_dev) + return 0; + + if (no_iommu || dmar_disabled) + pr_info("Intel-IOMMU force enabled due to platform opt in\n"); + + /* + * If Intel-IOMMU is disabled by default, we will apply identity + * map for all devices except those marked as being untrusted. + */ + if (dmar_disabled) + iommu_identity_mapping |= IDENTMAP_ALL; + + dmar_disabled = 0; +#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) + swiotlb = 0; +#endif + no_iommu = 0; + + return 1; +} + int __init intel_iommu_init(void) { int ret = -ENODEV; struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - /* VT-d is required for a TXT/tboot launch, so enforce that */ - force_on = tboot_force_iommu(); + /* + * Intel IOMMU is required for a TXT/tboot launch or platform + * opt in, so enforce that. + */ + force_on = tboot_force_iommu() || platform_optin_force_iommu(); if (iommu_init_mempool()) { if (force_on) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 843a41ba7e28..f8af1d770520 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -39,6 +39,7 @@ struct acpi_dmar_header; /* DMAR Flags */ #define DMAR_INTR_REMAP 0x1 #define DMAR_X2APIC_OPT_OUT 0x2 +#define DMAR_PLATFORM_OPT_IN 0x4 struct intel_iommu; @@ -170,6 +171,8 @@ static inline int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert) { return 0; } #endif /* CONFIG_IRQ_REMAP */ +extern bool dmar_platform_optin(void); + #else /* CONFIG_DMAR_TABLE */ static inline int dmar_device_add(void *handle) @@ -182,6 +185,11 @@ static inline int dmar_device_remove(void *handle) return 0; } +static inline bool dmar_platform_optin(void) +{ + return false; +} + #endif /* CONFIG_DMAR_TABLE */ struct irte { -- cgit v1.2.3 From 6b69753fa0078c5222d6b4aeb5017c5503e0dc8e Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 15 Nov 2018 19:03:21 -0800 Subject: usb: gadget: Introduce frame_number to usb_request Add a field frame_number to the usb_request to report the interval number in (micro)frames in which the isochronous transfer was transmitted or received. The gadget driver can use this knowledge to synchronize with the host. Also, this option is useful for debugging purposes. Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index e5cd84a0f84a..7595056b96c1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -61,6 +61,8 @@ struct usb_ep; * invalidated by the error may first be dequeued. * @context: For use by the completion callback * @list: For use by the gadget driver. + * @frame_number: Reports the interval number in (micro)frame in which the + * isochronous transfer was transmitted or received. * @status: Reports completion code, zero or a negative errno. * Normally, faults block the transfer queue from advancing until * the completion callback returns. @@ -112,6 +114,8 @@ struct usb_request { void *context; struct list_head list; + unsigned frame_number; /* ISO ONLY */ + int status; unsigned actual; }; -- cgit v1.2.3 From c96cf923a98d1b094df9f0cf97a83e118817e31b Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 1 Nov 2018 00:24:48 +0000 Subject: tty: Don't block on IO when ldisc change is pending There might be situations where tty_ldisc_lock() has blocked, but there is already IO on tty and it prevents line discipline changes. It might theoretically turn into dead-lock. Basically, provide more priority to pending tty_ldisc_lock() than to servicing reads/writes over tty. User-visible issue was reported by Mikulas where on pa-risc with Debian 5 reboot took either 80 seconds, 3 minutes or 3:25 after proper locking in tty_reopen(). Cc: Jiri Slaby Reported-by: Mikulas Patocka Signed-off-by: Dmitry Safonov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 4 ++-- drivers/tty/n_r3964.c | 2 +- drivers/tty/n_tty.c | 8 ++++---- drivers/tty/tty_ldisc.c | 7 +++++++ include/linux/tty.h | 7 +++++++ 5 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index dabb391909aa..99460af61b77 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -612,7 +612,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, } /* no data */ - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { ret = -EAGAIN; break; } @@ -679,7 +679,7 @@ static ssize_t n_hdlc_tty_write(struct tty_struct *tty, struct file *file, if (tbuf) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { error = -EAGAIN; break; } diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index 749a608c40b0..f75696f0ee2d 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -1085,7 +1085,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, pMsg = remove_msg(pInfo, pClient); if (pMsg == NULL) { /* no messages available. */ - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { ret = -EAGAIN; goto unlock; } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 3ad460219fd6..5dc9686697cf 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1702,7 +1702,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, down_read(&tty->termios_rwsem); - while (1) { + do { /* * When PARMRK is set, each input char may take up to 3 chars * in the read buf; reduce the buffer space avail by 3x @@ -1744,7 +1744,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, fp += n; count -= n; rcvd += n; - } + } while (!test_bit(TTY_LDISC_CHANGING, &tty->flags)); tty->receive_room = room; @@ -2211,7 +2211,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, break; if (!timeout) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } @@ -2365,7 +2365,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, } if (!nr) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index fc4c97cae01e..9434d20cf3ca 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -327,6 +327,11 @@ int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; + /* Kindly asking blocked readers to release the read side */ + set_bit(TTY_LDISC_CHANGING, &tty->flags); + wake_up_interruptible_all(&tty->read_wait); + wake_up_interruptible_all(&tty->write_wait); + ret = __tty_ldisc_lock(tty, timeout); if (!ret) return -EBUSY; @@ -337,6 +342,8 @@ int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); + /* Can be cleared here - ldisc_unlock will wake up writers firstly */ + clear_bit(TTY_LDISC_CHANGING, &tty->flags); __tty_ldisc_unlock(tty); } diff --git a/include/linux/tty.h b/include/linux/tty.h index 414db2bce715..80ae5528ef8e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -366,6 +366,7 @@ struct tty_file_private { #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_HUPPING 19 /* Hangup in progress */ +#define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ @@ -383,6 +384,12 @@ static inline void tty_set_flow_change(struct tty_struct *tty, int val) smp_mb(); } +static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) +{ + return file->f_flags & O_NONBLOCK || + test_bit(TTY_LDISC_CHANGING, &tty->flags); +} + static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); -- cgit v1.2.3 From 2fc00c1e0f9d2abe0df74c33cf9f40d12b9b892f Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 3 Dec 2018 14:46:20 +0800 Subject: HID: use macros in IS_INPUT_APPLICATION Add missing definition for HID_DG_WHITEBOARD then replace the hid usage hex with macros for better readibility. Signed-off-by: Chris Chiu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index a355d61940f2..ce5f996c8d3d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -238,6 +238,7 @@ struct hid_item { #define HID_DG_LIGHTPEN 0x000d0003 #define HID_DG_TOUCHSCREEN 0x000d0004 #define HID_DG_TOUCHPAD 0x000d0005 +#define HID_DG_WHITEBOARD 0x000d0006 #define HID_DG_STYLUS 0x000d0020 #define HID_DG_PUCK 0x000d0021 #define HID_DG_FINGER 0x000d0022 @@ -836,7 +837,10 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, /* Applications from HID Usage Tables 4/8/99 Version 1.1 */ /* We ignore a few input applications that are not widely used */ -#define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || ((a >= 0x000d0002) && (a <= 0x000d0006))) +#define IS_INPUT_APPLICATION(a) \ + (((a >= HID_UP_GENDESK) && (a <= HID_GD_MULTIAXIS)) \ + || ((a >= HID_DG_PEN) && (a <= HID_DG_WHITEBOARD)) \ + || (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL)) /* HID core API */ -- cgit v1.2.3 From 7f5592742a429b4de770fc5b796d18de43a15fdc Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 3 Dec 2018 14:46:21 +0800 Subject: HID: input: support Microsoft wireless radio control hotkey The ASUS laptops start to support the airplane mode radio management to replace the original mechanism of airplane mode toggle hotkey. On the ASUS P5440FF, it presents as a HID device connecting via I2C, named i2c-AMPD0001. When pressing it, the Embedded Controller send hid report via I2C and switch the airplane mode indicator LED based on the status. However, it's not working because it fails to be identified as a hidinput device. It fails in hidinput_connect() due to the macro IS_INPUT_APPLICATION doesn't have HID_GD_WIRELESS_RADIO_CTLS as a legit application code. It's easy to add the HID I2C vendor and product id to the quirk list and apply HID_QUIRK_HIDINPUT_FORCE to make it work. But it makes more sense to support it as a generic input application. Signed-off-by: Chris Chiu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index ce5f996c8d3d..42079116fb61 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -840,7 +840,8 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, #define IS_INPUT_APPLICATION(a) \ (((a >= HID_UP_GENDESK) && (a <= HID_GD_MULTIAXIS)) \ || ((a >= HID_DG_PEN) && (a <= HID_DG_WHITEBOARD)) \ - || (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL)) + || (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL) \ + || (a == HID_GD_WIRELESS_RADIO_CTLS)) /* HID core API */ -- cgit v1.2.3 From 51eb78098ab79bba8b1df24da2304e61deb74629 Mon Sep 17 00:00:00 2001 From: tom Date: Tue, 4 Dec 2018 18:27:34 +0000 Subject: iommu: Change tlb_range_add to iotlb_range_add and tlb_sync to iotlb_sync Someone forgot to update this comment. Signed-off-by: Tom Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a1d28f42cb77..11db18b9ffe8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -168,8 +168,8 @@ struct iommu_resv_region { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain - * @tlb_range_add: Add a given iova range to the flush queue for this domain - * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * @iotlb_range_add: Add a given iova range to the flush queue for this domain + * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping -- cgit v1.2.3 From 7693b5643fd2d682de90733b67fc8032b9646911 Mon Sep 17 00:00:00 2001 From: Oskari Lemmela Date: Tue, 20 Nov 2018 19:52:09 +0200 Subject: power: supply: add AC power supply driver for AXP813 AXP813 and AXP803 PMICs can control input current and minimum voltage. Both of these values are configurable. Signed-off-by: Oskari Lemmela Reviewed-by: Quentin Schulz Reviewed-by: Chen-Yu Tsai Acked-by: Lee Jones Signed-off-by: Sebastian Reichel --- drivers/power/supply/axp20x_ac_power.c | 94 ++++++++++++++++++++++++++++++++++ include/linux/mfd/axp20x.h | 1 + 2 files changed, 95 insertions(+) (limited to 'include/linux') diff --git a/drivers/power/supply/axp20x_ac_power.c b/drivers/power/supply/axp20x_ac_power.c index 0771f951b11f..59b4c8d3b961 100644 --- a/drivers/power/supply/axp20x_ac_power.c +++ b/drivers/power/supply/axp20x_ac_power.c @@ -27,6 +27,16 @@ #define AXP20X_PWR_STATUS_ACIN_PRESENT BIT(7) #define AXP20X_PWR_STATUS_ACIN_AVAIL BIT(6) +#define AXP813_VHOLD_MASK GENMASK(5, 3) +#define AXP813_VHOLD_UV_TO_BIT(x) ((((x) / 100000) - 40) << 3) +#define AXP813_VHOLD_REG_TO_UV(x) \ + (((((x) & AXP813_VHOLD_MASK) >> 3) + 40) * 100000) + +#define AXP813_CURR_LIMIT_MASK GENMASK(2, 0) +#define AXP813_CURR_LIMIT_UA_TO_BIT(x) (((x) / 500000) - 3) +#define AXP813_CURR_LIMIT_REG_TO_UA(x) \ + ((((x) & AXP813_CURR_LIMIT_MASK) + 3) * 500000) + #define DRVNAME "axp20x-ac-power-supply" struct axp20x_ac_power { @@ -102,6 +112,57 @@ static int axp20x_ac_power_get_property(struct power_supply *psy, return 0; + case POWER_SUPPLY_PROP_VOLTAGE_MIN: + ret = regmap_read(power->regmap, AXP813_ACIN_PATH_CTRL, ®); + if (ret) + return ret; + + val->intval = AXP813_VHOLD_REG_TO_UV(reg); + + return 0; + + case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT: + ret = regmap_read(power->regmap, AXP813_ACIN_PATH_CTRL, ®); + if (ret) + return ret; + + val->intval = AXP813_CURR_LIMIT_REG_TO_UA(reg); + /* AXP813 datasheet defines values 11x as 4000mA */ + if (val->intval > 4000000) + val->intval = 4000000; + + return 0; + + default: + return -EINVAL; + } + + return -EINVAL; +} + +static int axp813_ac_power_set_property(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val) +{ + struct axp20x_ac_power *power = power_supply_get_drvdata(psy); + + switch (psp) { + case POWER_SUPPLY_PROP_VOLTAGE_MIN: + if (val->intval < 4000000 || val->intval > 4700000) + return -EINVAL; + + return regmap_update_bits(power->regmap, AXP813_ACIN_PATH_CTRL, + AXP813_VHOLD_MASK, + AXP813_VHOLD_UV_TO_BIT(val->intval)); + + case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT: + if (val->intval < 1500000 || val->intval > 4000000) + return -EINVAL; + + return regmap_update_bits(power->regmap, AXP813_ACIN_PATH_CTRL, + AXP813_CURR_LIMIT_MASK, + AXP813_CURR_LIMIT_UA_TO_BIT(val->intval)); + default: return -EINVAL; } @@ -109,6 +170,13 @@ static int axp20x_ac_power_get_property(struct power_supply *psy, return -EINVAL; } +static int axp813_ac_power_prop_writeable(struct power_supply *psy, + enum power_supply_property psp) +{ + return psp == POWER_SUPPLY_PROP_VOLTAGE_MIN || + psp == POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT; +} + static enum power_supply_property axp20x_ac_power_properties[] = { POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_PRESENT, @@ -123,6 +191,14 @@ static enum power_supply_property axp22x_ac_power_properties[] = { POWER_SUPPLY_PROP_ONLINE, }; +static enum power_supply_property axp813_ac_power_properties[] = { + POWER_SUPPLY_PROP_HEALTH, + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_ONLINE, + POWER_SUPPLY_PROP_VOLTAGE_MIN, + POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT, +}; + static const struct power_supply_desc axp20x_ac_power_desc = { .name = "axp20x-ac", .type = POWER_SUPPLY_TYPE_MAINS, @@ -139,6 +215,16 @@ static const struct power_supply_desc axp22x_ac_power_desc = { .get_property = axp20x_ac_power_get_property, }; +static const struct power_supply_desc axp813_ac_power_desc = { + .name = "axp813-ac", + .type = POWER_SUPPLY_TYPE_MAINS, + .properties = axp813_ac_power_properties, + .num_properties = ARRAY_SIZE(axp813_ac_power_properties), + .property_is_writeable = axp813_ac_power_prop_writeable, + .get_property = axp20x_ac_power_get_property, + .set_property = axp813_ac_power_set_property, +}; + struct axp_data { const struct power_supply_desc *power_desc; bool acin_adc; @@ -154,6 +240,11 @@ static const struct axp_data axp22x_data = { .acin_adc = false, }; +static const struct axp_data axp813_data = { + .power_desc = &axp813_ac_power_desc, + .acin_adc = false, +}; + static int axp20x_ac_power_probe(struct platform_device *pdev) { struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent); @@ -234,6 +325,9 @@ static const struct of_device_id axp20x_ac_power_match[] = { }, { .compatible = "x-powers,axp221-ac-power-supply", .data = &axp22x_data, + }, { + .compatible = "x-powers,axp813-ac-power-supply", + .data = &axp813_data, }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, axp20x_ac_power_match); diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 517e60eecbcb..2302b620d238 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -266,6 +266,7 @@ enum axp20x_variants { #define AXP288_RT_BATT_V_H 0xa0 #define AXP288_RT_BATT_V_L 0xa1 +#define AXP813_ACIN_PATH_CTRL 0x3a #define AXP813_ADC_RATE 0x85 /* Fuel Gauge */ -- cgit v1.2.3 From 16ad9501b1f2edebe24f8cf3c09da0695871986b Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 21 Nov 2018 21:32:25 -0500 Subject: firmware: qcom: scm: fix compilation error when disabled This fixes the case when CONFIG_QCOM_SCM is not enabled, and linux/errno.h has not been included previously. Signed-off-by: Jonathan Marek Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 06996ad4f2bc..1637385bcc17 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -67,6 +67,9 @@ extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); #else + +#include + static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) { -- cgit v1.2.3 From 5f15eed245bc6d7c82d44f0ebcaf62071a9d55bd Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 5 Dec 2018 21:49:40 +0100 Subject: net: mii: Fix autoneg in mii_lpa_to_linkmode_lpa_t() mii_adv_to_linkmode_adv_t() clears all bits before setting it needs to set. This means the freshly set Autoneg gets cleared. Change the order, and add comments about it clearing the old content of the bitmap. Fixes: c0ec3c273677 ("net: phy: Convert u32 phydev->lp_advertising to linkmode") Reported-by: Heiner Kallweit Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index fb7ae4ae8ce3..57365224306c 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -378,7 +378,8 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) * @adv: value of the MII_ADVERTISE register * * A small helper function that translates MII_ADVERTISE bits - * to linkmode advertisement settings. + * to linkmode advertisement settings. Clears the old value + * of advertising. */ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, u32 adv) @@ -408,16 +409,18 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, * @adv: value of the MII_LPA register * * A small helper function that translates MII_LPA bits, when in - * 1000Base-T mode, to linkmode LP advertisement settings. + * 1000Base-T mode, to linkmode LP advertisement settings. Clears the + * old value of advertising */ static inline void mii_lpa_to_linkmode_lpa_t(unsigned long *lp_advertising, u32 lpa) { + mii_adv_to_linkmode_adv_t(lp_advertising, lpa); + if (lpa & LPA_LPACK) linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, lp_advertising); - mii_adv_to_linkmode_adv_t(lp_advertising, lpa); } /** -- cgit v1.2.3 From 78a24df370072ea3b7c0a466efd776fc8f87c73a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 5 Dec 2018 21:49:41 +0100 Subject: net: mii: Rename mii_stat1000_to_linkmode_lpa_t Rename mii_stat1000_to_linkmode_lpa_t to mii_stat1000_mod_linkmode_lpa_t to indicate it modifies the passed linkmode bitmap, without clearing any other bits. Add a helper to set/clear bits in a linkmode. Use this helper to ensure bit are clear which the stat1000 indicates should not be set. Fixes: c0ec3c273677 ("net: phy: Convert u32 phydev->lp_advertising to linkmode") Suggested-by: Heiner Kallweit Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 2 +- drivers/net/phy/marvell10g.c | 2 +- drivers/net/phy/phy_device.c | 4 ++-- include/linux/linkmode.h | 9 +++++++++ include/linux/mii.h | 20 ++++++++++---------- 5 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 6a9881942e53..03dafe0e68a2 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1138,7 +1138,7 @@ static int marvell_read_status_page_an(struct phy_device *phydev, if (!fiber) { mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); - mii_stat1000_to_linkmode_lpa_t(phydev->lp_advertising, lpagb); + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, lpagb); if (phydev->duplex == DUPLEX_FULL) { phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 6f6e886fc836..82ab6ed3b74e 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -490,7 +490,7 @@ static int mv3310_read_status(struct phy_device *phydev) if (val < 0) return val; - mii_stat1000_to_linkmode_lpa_t(phydev->lp_advertising, val); + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, val); if (phydev->autoneg == AUTONEG_ENABLE) phy_resolve_aneg_linkmode(phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e6720e2a2da6..c20b5ecc0f4b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1739,8 +1739,8 @@ int genphy_read_status(struct phy_device *phydev) return -ENOLINK; } - mii_stat1000_to_linkmode_lpa_t(phydev->lp_advertising, - lpagb); + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, + lpagb); common_adv_gb = lpagb & adv << 2; } diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 22443d7fb5cd..a99c58866860 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -57,6 +57,15 @@ static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) __clear_bit(nr, addr); } +static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr, + int set) +{ + if (set) + linkmode_set_bit(nr, addr); + else + linkmode_clear_bit(nr, addr); +} + static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) { __change_bit(nr, addr); diff --git a/include/linux/mii.h b/include/linux/mii.h index 57365224306c..b915ef6c3692 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -288,22 +288,22 @@ static inline u32 mii_stat1000_to_ethtool_lpa_t(u32 lpa) } /** - * mii_stat1000_to_linkmode_lpa_t + * mii_stat1000_mod_linkmode_lpa_t * @advertising: target the linkmode advertisement settings * @adv: value of the MII_STAT1000 register * * A small helper function that translates MII_STAT1000 bits, when in - * 1000Base-T mode, to linkmode advertisement settings. + * 1000Base-T mode, to linkmode advertisement settings. Other bits in + * advertising are not changes. */ -static inline void mii_stat1000_to_linkmode_lpa_t(unsigned long *advertising, - u32 lpa) +static inline void mii_stat1000_mod_linkmode_lpa_t(unsigned long *advertising, + u32 lpa) { - if (lpa & LPA_1000HALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - advertising); - if (lpa & LPA_1000FULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - advertising); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + advertising, lpa & LPA_1000HALF); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + advertising, lpa & LPA_1000FULL); } /** -- cgit v1.2.3 From d3351931a37bdb329b5ea761424579fa91c866ee Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 5 Dec 2018 21:49:43 +0100 Subject: net: mii: Add mii_lpa_mod_linkmode_lpa_t Add a _mod_ variant of mii_lpa_to_linkmode_lpa_t. Use this to fix the genphy_read_status() where the 1G link partner features are getting lost. Fixes: c0ec3c273677 ("net: phy: Convert u32 phydev->lp_advertising to linkmode") Reported-by: Heiner Kallweit Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- include/linux/mii.h | 68 +++++++++++++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c20b5ecc0f4b..7d5d698604aa 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1748,7 +1748,7 @@ int genphy_read_status(struct phy_device *phydev) if (lpa < 0) return lpa; - mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); + mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); adv = phy_read(phydev, MII_ADVERTISE); if (adv < 0) diff --git a/include/linux/mii.h b/include/linux/mii.h index b915ef6c3692..e72447778a08 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -372,6 +372,36 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) return result | mii_adv_to_ethtool_adv_x(lpa); } +/** + * mii_adv_mod_linkmode_adv_t + * @advertising:pointer to destination link mode. + * @adv: value of the MII_ADVERTISE register + * + * A small helper function that translates MII_ADVERTISE bits to + * linkmode advertisement settings. Leaves other bits unchanged. + */ +static inline void mii_adv_mod_linkmode_adv_t(unsigned long *advertising, + u32 adv) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + advertising, adv & ADVERTISE_10HALF); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + advertising, adv & ADVERTISE_10FULL); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + advertising, adv & ADVERTISE_100HALF); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + advertising, adv & ADVERTISE_100FULL); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising, + adv & ADVERTISE_PAUSE_CAP); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + advertising, adv & ADVERTISE_PAUSE_ASYM); +} + /** * mii_adv_to_linkmode_adv_t * @advertising:pointer to destination link mode. @@ -386,22 +416,7 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, { linkmode_zero(advertising); - if (adv & ADVERTISE_10HALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, - advertising); - if (adv & ADVERTISE_10FULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, - advertising); - if (adv & ADVERTISE_100HALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - advertising); - if (adv & ADVERTISE_100FULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - advertising); - if (adv & ADVERTISE_PAUSE_CAP) - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising); - if (adv & ADVERTISE_PAUSE_ASYM) - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising); + mii_adv_mod_linkmode_adv_t(advertising, adv); } /** @@ -423,6 +438,27 @@ static inline void mii_lpa_to_linkmode_lpa_t(unsigned long *lp_advertising, } +/** + * mii_lpa_mod_linkmode_lpa_t + * @adv: value of the MII_LPA register + * + * A small helper function that translates MII_LPA bits, when in + * 1000Base-T mode, to linkmode LP advertisement settings. Leaves + * other bits unchanged. + */ +static inline void mii_lpa_mod_linkmode_lpa_t(unsigned long *lp_advertising, + u32 lpa) +{ + mii_adv_mod_linkmode_adv_t(lp_advertising, lpa); + + if (lpa & LPA_LPACK) + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + lp_advertising); + else + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + lp_advertising); +} + /** * linkmode_adv_to_lcl_adv_t * @advertising:pointer to linkmode advertising -- cgit v1.2.3 From 6dbd0090f999c443763c0742b574da1ce189404c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 5 Dec 2018 21:49:44 +0100 Subject: net: mii: mii_lpa_mod_linkmode_lpa_t: Make use of linkmode_mod_bit helper Replace the if else code structure with a call to the helper linkmode_mod_bit. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index e72447778a08..6fee8b1a4400 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -451,12 +451,8 @@ static inline void mii_lpa_mod_linkmode_lpa_t(unsigned long *lp_advertising, { mii_adv_mod_linkmode_adv_t(lp_advertising, lpa); - if (lpa & LPA_LPACK) - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - lp_advertising); - else - linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - lp_advertising); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + lp_advertising, lpa & LPA_LPACK); } /** -- cgit v1.2.3 From 186bddb28ff9f61250d1b33554321d0bf5d085f6 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 3 Dec 2018 13:44:35 -0300 Subject: kref/kobject: Improve documentation The current kref and kobject documentation may be insufficient to understand these common pitfalls regarding object lifetime and object releasing. Add a bit more documentation and improve the warnings seen by the user, pointing to the right piece of documentation. Also, it's important to understand that making fun of people publicly is not at all helpful, doesn't provide any value, and it's not a healthy way of encouraging developers to do better. "Mocking mercilessly" will, if anything, make developers feel bad and go away. This kind of behavior should not be encouraged or justified. Signed-off-by: Ezequiel Garcia Signed-off-by: Enric Balletbo i Serra Signed-off-by: Gustavo Padovan Signed-off-by: Matthias Brugger Acked-by: Daniel Vetter Acked-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Documentation/kobject.txt | 10 +++++++--- drivers/base/core.c | 3 +-- include/linux/kref.h | 5 +---- lib/kobject.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt index fc9485d79061..ff4c25098119 100644 --- a/Documentation/kobject.txt +++ b/Documentation/kobject.txt @@ -279,10 +279,14 @@ such a method has a form like:: One important point cannot be overstated: every kobject must have a release() method, and the kobject must persist (in a consistent state) until that method is called. If these constraints are not met, the code is -flawed. Note that the kernel will warn you if you forget to provide a +flawed. Note that the kernel will warn you if you forget to provide a release() method. Do not try to get rid of this warning by providing an -"empty" release function; you will be mocked mercilessly by the kobject -maintainer if you attempt this. +"empty" release function. + +If all your cleanup function needs to do is call kfree(), then you must +create a wrapper function which uses container_of() to upcast to the correct +type (as shown in the example above) and then calls kfree() on the overall +structure. Note, the name of the kobject is available in the release function, but it must NOT be changed within this callback. Otherwise there will be a memory diff --git a/drivers/base/core.c b/drivers/base/core.c index ed145fbfeddf..e2285059161d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -897,8 +897,7 @@ static void device_release(struct kobject *kobj) else if (dev->class && dev->class->dev_release) dev->class->dev_release(dev); else - WARN(1, KERN_ERR "Device '%s' does not have a release() " - "function, it is broken and must be fixed.\n", + WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n", dev_name(dev)); kfree(p); } diff --git a/include/linux/kref.h b/include/linux/kref.h index 29220724bf1c..cb00a0268061 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -53,10 +53,7 @@ static inline void kref_get(struct kref *kref) * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree - * in as this function. If the caller does pass kfree to this - * function, you will be publicly mocked mercilessly by the kref - * maintainer, and anyone else who happens to notice it. You have - * been warned. + * in as this function. * * Decrement the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this diff --git a/lib/kobject.c b/lib/kobject.c index 97d86dc17c42..b72e00fd7d09 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -639,7 +639,7 @@ static void kobject_cleanup(struct kobject *kobj) kobject_name(kobj), kobj, __func__, kobj->parent); if (t && !t->release) - pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed.\n", + pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n", kobject_name(kobj), kobj); /* send "remove" if the caller did not do it but sent "add" */ -- cgit v1.2.3 From 69c32972d59388c041268e8206e8eb1acff29b9a Mon Sep 17 00:00:00 2001 From: "Kulkarni, Ganapatrao" Date: Thu, 6 Dec 2018 11:51:31 +0000 Subject: drivers/perf: Add Cavium ThunderX2 SoC UNCORE PMU driver This patch adds a perf driver for the PMU UNCORE devices DDR4 Memory Controller(DMC) and Level 3 Cache(L3C). Each PMU supports up to 4 counters. All counters lack overflow interrupt and are sampled periodically. Reviewed-by: Suzuki K Poulose Signed-off-by: Ganapatrao Kulkarni [will: consistent enum cpuhp_state naming] Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 9 + drivers/perf/Makefile | 1 + drivers/perf/thunderx2_pmu.c | 861 +++++++++++++++++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 4 files changed, 872 insertions(+) create mode 100644 drivers/perf/thunderx2_pmu.c (limited to 'include/linux') diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 08ebaf7cca8b..af9bc178495d 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -87,6 +87,15 @@ config QCOM_L3_PMU Adds the L3 cache PMU into the perf events subsystem for monitoring L3 cache events. +config THUNDERX2_PMU + tristate "Cavium ThunderX2 SoC PMU UNCORE" + depends on ARCH_THUNDER2 && ARM64 && ACPI && NUMA + default m + help + Provides support for ThunderX2 UNCORE events. + The SoC has PMU support in its L3 cache controller (L3C) and + in the DDR4 Memory Controller (DMC). + config XGENE_PMU depends on ARCH_XGENE bool "APM X-Gene SoC PMU" diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index b3902bd37d53..909f27fd9db3 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o +obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c new file mode 100644 index 000000000000..c9a1701d3e54 --- /dev/null +++ b/drivers/perf/thunderx2_pmu.c @@ -0,0 +1,861 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CAVIUM THUNDERX2 SoC PMU UNCORE + * Copyright (C) 2018 Cavium Inc. + * Author: Ganapatrao Kulkarni + */ + +#include +#include +#include +#include + +/* Each ThunderX2(TX2) Socket has a L3C and DMC UNCORE PMU device. + * Each UNCORE PMU device consists of 4 independent programmable counters. + * Counters are 32 bit and do not support overflow interrupt, + * they need to be sampled before overflow(i.e, at every 2 seconds). + */ + +#define TX2_PMU_MAX_COUNTERS 4 +#define TX2_PMU_DMC_CHANNELS 8 +#define TX2_PMU_L3_TILES 16 + +#define TX2_PMU_HRTIMER_INTERVAL (2 * NSEC_PER_SEC) +#define GET_EVENTID(ev) ((ev->hw.config) & 0x1f) +#define GET_COUNTERID(ev) ((ev->hw.idx) & 0x3) + /* 1 byte per counter(4 counters). + * Event id is encoded in bits [5:1] of a byte, + */ +#define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1)) + +#define L3C_COUNTER_CTL 0xA8 +#define L3C_COUNTER_DATA 0xAC +#define DMC_COUNTER_CTL 0x234 +#define DMC_COUNTER_DATA 0x240 + +/* L3C event IDs */ +#define L3_EVENT_READ_REQ 0xD +#define L3_EVENT_WRITEBACK_REQ 0xE +#define L3_EVENT_INV_N_WRITE_REQ 0xF +#define L3_EVENT_INV_REQ 0x10 +#define L3_EVENT_EVICT_REQ 0x13 +#define L3_EVENT_INV_N_WRITE_HIT 0x14 +#define L3_EVENT_INV_HIT 0x15 +#define L3_EVENT_READ_HIT 0x17 +#define L3_EVENT_MAX 0x18 + +/* DMC event IDs */ +#define DMC_EVENT_COUNT_CYCLES 0x1 +#define DMC_EVENT_WRITE_TXNS 0xB +#define DMC_EVENT_DATA_TRANSFERS 0xD +#define DMC_EVENT_READ_TXNS 0xF +#define DMC_EVENT_MAX 0x10 + +enum tx2_uncore_type { + PMU_TYPE_L3C, + PMU_TYPE_DMC, + PMU_TYPE_INVALID, +}; + +/* + * pmu on each socket has 2 uncore devices(dmc and l3c), + * each device has 4 counters. + */ +struct tx2_uncore_pmu { + struct hlist_node hpnode; + struct list_head entry; + struct pmu pmu; + char *name; + int node; + int cpu; + u32 max_counters; + u32 prorate_factor; + u32 max_events; + u64 hrtimer_interval; + void __iomem *base; + DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS); + struct perf_event *events[TX2_PMU_MAX_COUNTERS]; + struct device *dev; + struct hrtimer hrtimer; + const struct attribute_group **attr_groups; + enum tx2_uncore_type type; + void (*init_cntr_base)(struct perf_event *event, + struct tx2_uncore_pmu *tx2_pmu); + void (*stop_event)(struct perf_event *event); + void (*start_event)(struct perf_event *event, int flags); +}; + +static LIST_HEAD(tx2_pmus); + +static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu) +{ + return container_of(pmu, struct tx2_uncore_pmu, pmu); +} + +PMU_FORMAT_ATTR(event, "config:0-4"); + +static struct attribute *l3c_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute *dmc_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static const struct attribute_group l3c_pmu_format_attr_group = { + .name = "format", + .attrs = l3c_pmu_format_attrs, +}; + +static const struct attribute_group dmc_pmu_format_attr_group = { + .name = "format", + .attrs = dmc_pmu_format_attrs, +}; + +/* + * sysfs event attributes + */ +static ssize_t tx2_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + return sprintf(buf, "event=0x%lx\n", (unsigned long) eattr->var); +} + +#define TX2_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR(name, tx2_pmu_event_attr_##name, \ + config, tx2_pmu_event_show) + +TX2_EVENT_ATTR(read_request, L3_EVENT_READ_REQ); +TX2_EVENT_ATTR(writeback_request, L3_EVENT_WRITEBACK_REQ); +TX2_EVENT_ATTR(inv_nwrite_request, L3_EVENT_INV_N_WRITE_REQ); +TX2_EVENT_ATTR(inv_request, L3_EVENT_INV_REQ); +TX2_EVENT_ATTR(evict_request, L3_EVENT_EVICT_REQ); +TX2_EVENT_ATTR(inv_nwrite_hit, L3_EVENT_INV_N_WRITE_HIT); +TX2_EVENT_ATTR(inv_hit, L3_EVENT_INV_HIT); +TX2_EVENT_ATTR(read_hit, L3_EVENT_READ_HIT); + +static struct attribute *l3c_pmu_events_attrs[] = { + &tx2_pmu_event_attr_read_request.attr.attr, + &tx2_pmu_event_attr_writeback_request.attr.attr, + &tx2_pmu_event_attr_inv_nwrite_request.attr.attr, + &tx2_pmu_event_attr_inv_request.attr.attr, + &tx2_pmu_event_attr_evict_request.attr.attr, + &tx2_pmu_event_attr_inv_nwrite_hit.attr.attr, + &tx2_pmu_event_attr_inv_hit.attr.attr, + &tx2_pmu_event_attr_read_hit.attr.attr, + NULL, +}; + +TX2_EVENT_ATTR(cnt_cycles, DMC_EVENT_COUNT_CYCLES); +TX2_EVENT_ATTR(write_txns, DMC_EVENT_WRITE_TXNS); +TX2_EVENT_ATTR(data_transfers, DMC_EVENT_DATA_TRANSFERS); +TX2_EVENT_ATTR(read_txns, DMC_EVENT_READ_TXNS); + +static struct attribute *dmc_pmu_events_attrs[] = { + &tx2_pmu_event_attr_cnt_cycles.attr.attr, + &tx2_pmu_event_attr_write_txns.attr.attr, + &tx2_pmu_event_attr_data_transfers.attr.attr, + &tx2_pmu_event_attr_read_txns.attr.attr, + NULL, +}; + +static const struct attribute_group l3c_pmu_events_attr_group = { + .name = "events", + .attrs = l3c_pmu_events_attrs, +}; + +static const struct attribute_group dmc_pmu_events_attr_group = { + .name = "events", + .attrs = dmc_pmu_events_attrs, +}; + +/* + * sysfs cpumask attributes + */ +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tx2_uncore_pmu *tx2_pmu; + + tx2_pmu = pmu_to_tx2_pmu(dev_get_drvdata(dev)); + return cpumap_print_to_pagebuf(true, buf, cpumask_of(tx2_pmu->cpu)); +} +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *tx2_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group pmu_cpumask_attr_group = { + .attrs = tx2_pmu_cpumask_attrs, +}; + +/* + * Per PMU device attribute groups + */ +static const struct attribute_group *l3c_pmu_attr_groups[] = { + &l3c_pmu_format_attr_group, + &pmu_cpumask_attr_group, + &l3c_pmu_events_attr_group, + NULL +}; + +static const struct attribute_group *dmc_pmu_attr_groups[] = { + &dmc_pmu_format_attr_group, + &pmu_cpumask_attr_group, + &dmc_pmu_events_attr_group, + NULL +}; + +static inline u32 reg_readl(unsigned long addr) +{ + return readl((void __iomem *)addr); +} + +static inline void reg_writel(u32 val, unsigned long addr) +{ + writel(val, (void __iomem *)addr); +} + +static int alloc_counter(struct tx2_uncore_pmu *tx2_pmu) +{ + int counter; + + counter = find_first_zero_bit(tx2_pmu->active_counters, + tx2_pmu->max_counters); + if (counter == tx2_pmu->max_counters) + return -ENOSPC; + + set_bit(counter, tx2_pmu->active_counters); + return counter; +} + +static inline void free_counter(struct tx2_uncore_pmu *tx2_pmu, int counter) +{ + clear_bit(counter, tx2_pmu->active_counters); +} + +static void init_cntr_base_l3c(struct perf_event *event, + struct tx2_uncore_pmu *tx2_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + + /* counter ctrl/data reg offset at 8 */ + hwc->config_base = (unsigned long)tx2_pmu->base + + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event)); + hwc->event_base = (unsigned long)tx2_pmu->base + + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event)); +} + +static void init_cntr_base_dmc(struct perf_event *event, + struct tx2_uncore_pmu *tx2_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->config_base = (unsigned long)tx2_pmu->base + + DMC_COUNTER_CTL; + /* counter data reg offset at 0xc */ + hwc->event_base = (unsigned long)tx2_pmu->base + + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event)); +} + +static void uncore_start_event_l3c(struct perf_event *event, int flags) +{ + u32 val; + struct hw_perf_event *hwc = &event->hw; + + /* event id encoded in bits [07:03] */ + val = GET_EVENTID(event) << 3; + reg_writel(val, hwc->config_base); + local64_set(&hwc->prev_count, 0); + reg_writel(0, hwc->event_base); +} + +static inline void uncore_stop_event_l3c(struct perf_event *event) +{ + reg_writel(0, event->hw.config_base); +} + +static void uncore_start_event_dmc(struct perf_event *event, int flags) +{ + u32 val; + struct hw_perf_event *hwc = &event->hw; + int idx = GET_COUNTERID(event); + int event_id = GET_EVENTID(event); + + /* enable and start counters. + * 8 bits for each counter, bits[05:01] of a counter to set event type. + */ + val = reg_readl(hwc->config_base); + val &= ~DMC_EVENT_CFG(idx, 0x1f); + val |= DMC_EVENT_CFG(idx, event_id); + reg_writel(val, hwc->config_base); + local64_set(&hwc->prev_count, 0); + reg_writel(0, hwc->event_base); +} + +static void uncore_stop_event_dmc(struct perf_event *event) +{ + u32 val; + struct hw_perf_event *hwc = &event->hw; + int idx = GET_COUNTERID(event); + + /* clear event type(bits[05:01]) to stop counter */ + val = reg_readl(hwc->config_base); + val &= ~DMC_EVENT_CFG(idx, 0x1f); + reg_writel(val, hwc->config_base); +} + +static void tx2_uncore_event_update(struct perf_event *event) +{ + s64 prev, delta, new = 0; + struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + enum tx2_uncore_type type; + u32 prorate_factor; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + type = tx2_pmu->type; + prorate_factor = tx2_pmu->prorate_factor; + + new = reg_readl(hwc->event_base); + prev = local64_xchg(&hwc->prev_count, new); + + /* handles rollover of 32 bit counter */ + delta = (u32)(((1UL << 32) - prev) + new); + + /* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */ + if (type == PMU_TYPE_DMC && + GET_EVENTID(event) == DMC_EVENT_DATA_TRANSFERS) + delta = delta/4; + + /* L3C and DMC has 16 and 8 interleave channels respectively. + * The sampled value is for channel 0 and multiplied with + * prorate_factor to get the count for a device. + */ + local64_add(delta * prorate_factor, &event->count); +} + +static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev) +{ + int i = 0; + struct acpi_tx2_pmu_device { + __u8 id[ACPI_ID_LEN]; + enum tx2_uncore_type type; + } devices[] = { + {"CAV901D", PMU_TYPE_L3C}, + {"CAV901F", PMU_TYPE_DMC}, + {"", PMU_TYPE_INVALID} + }; + + while (devices[i].type != PMU_TYPE_INVALID) { + if (!strcmp(acpi_device_hid(adev), devices[i].id)) + break; + i++; + } + + return devices[i].type; +} + +static bool tx2_uncore_validate_event(struct pmu *pmu, + struct perf_event *event, int *counters) +{ + if (is_software_event(event)) + return true; + /* Reject groups spanning multiple HW PMUs. */ + if (event->pmu != pmu) + return false; + + *counters = *counters + 1; + return true; +} + +/* + * Make sure the group of events can be scheduled at once + * on the PMU. + */ +static bool tx2_uncore_validate_event_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + int counters = 0; + + if (event->group_leader == event) + return true; + + if (!tx2_uncore_validate_event(event->pmu, leader, &counters)) + return false; + + for_each_sibling_event(sibling, leader) { + if (!tx2_uncore_validate_event(event->pmu, sibling, &counters)) + return false; + } + + if (!tx2_uncore_validate_event(event->pmu, event, &counters)) + return false; + + /* + * If the group requires more counters than the HW has, + * it cannot ever be scheduled. + */ + return counters <= TX2_PMU_MAX_COUNTERS; +} + + +static int tx2_uncore_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + /* Test the event attr type check for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* + * SOC PMU counters are shared across all cores. + * Therefore, it does not support per-process mode. + * Also, it does not support event sampling mode. + */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + /* We have no filtering of any kind */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + if (tx2_pmu->cpu >= nr_cpu_ids) + return -EINVAL; + event->cpu = tx2_pmu->cpu; + + if (event->attr.config >= tx2_pmu->max_events) + return -EINVAL; + + /* store event id */ + hwc->config = event->attr.config; + + /* Validate the group */ + if (!tx2_uncore_validate_event_group(event)) + return -EINVAL; + + return 0; +} + +static void tx2_uncore_event_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + hwc->state = 0; + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + + tx2_pmu->start_event(event, flags); + perf_event_update_userpage(event); + + /* Start timer for first event */ + if (bitmap_weight(tx2_pmu->active_counters, + tx2_pmu->max_counters) == 1) { + hrtimer_start(&tx2_pmu->hrtimer, + ns_to_ktime(tx2_pmu->hrtimer_interval), + HRTIMER_MODE_REL_PINNED); + } +} + +static void tx2_uncore_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + tx2_pmu->stop_event(event); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + if (flags & PERF_EF_UPDATE) { + tx2_uncore_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int tx2_uncore_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + + /* Allocate a free counter */ + hwc->idx = alloc_counter(tx2_pmu); + if (hwc->idx < 0) + return -EAGAIN; + + tx2_pmu->events[hwc->idx] = event; + /* set counter control and data registers base address */ + tx2_pmu->init_cntr_base(event, tx2_pmu); + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + tx2_uncore_event_start(event, flags); + + return 0; +} + +static void tx2_uncore_event_del(struct perf_event *event, int flags) +{ + struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + tx2_uncore_event_stop(event, PERF_EF_UPDATE); + + /* clear the assigned counter */ + free_counter(tx2_pmu, GET_COUNTERID(event)); + + perf_event_update_userpage(event); + tx2_pmu->events[hwc->idx] = NULL; + hwc->idx = -1; +} + +static void tx2_uncore_event_read(struct perf_event *event) +{ + tx2_uncore_event_update(event); +} + +static enum hrtimer_restart tx2_hrtimer_callback(struct hrtimer *timer) +{ + struct tx2_uncore_pmu *tx2_pmu; + int max_counters, idx; + + tx2_pmu = container_of(timer, struct tx2_uncore_pmu, hrtimer); + max_counters = tx2_pmu->max_counters; + + if (bitmap_empty(tx2_pmu->active_counters, max_counters)) + return HRTIMER_NORESTART; + + for_each_set_bit(idx, tx2_pmu->active_counters, max_counters) { + struct perf_event *event = tx2_pmu->events[idx]; + + tx2_uncore_event_update(event); + } + hrtimer_forward_now(timer, ns_to_ktime(tx2_pmu->hrtimer_interval)); + return HRTIMER_RESTART; +} + +static int tx2_uncore_pmu_register( + struct tx2_uncore_pmu *tx2_pmu) +{ + struct device *dev = tx2_pmu->dev; + char *name = tx2_pmu->name; + + /* Perf event registration */ + tx2_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .attr_groups = tx2_pmu->attr_groups, + .task_ctx_nr = perf_invalid_context, + .event_init = tx2_uncore_event_init, + .add = tx2_uncore_event_add, + .del = tx2_uncore_event_del, + .start = tx2_uncore_event_start, + .stop = tx2_uncore_event_stop, + .read = tx2_uncore_event_read, + }; + + tx2_pmu->pmu.name = devm_kasprintf(dev, GFP_KERNEL, + "%s", name); + + return perf_pmu_register(&tx2_pmu->pmu, tx2_pmu->pmu.name, -1); +} + +static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu) +{ + int ret, cpu; + + cpu = cpumask_any_and(cpumask_of_node(tx2_pmu->node), + cpu_online_mask); + + tx2_pmu->cpu = cpu; + hrtimer_init(&tx2_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + tx2_pmu->hrtimer.function = tx2_hrtimer_callback; + + ret = tx2_uncore_pmu_register(tx2_pmu); + if (ret) { + dev_err(tx2_pmu->dev, "%s PMU: Failed to init driver\n", + tx2_pmu->name); + return -ENODEV; + } + + /* register hotplug callback for the pmu */ + ret = cpuhp_state_add_instance( + CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, + &tx2_pmu->hpnode); + if (ret) { + dev_err(tx2_pmu->dev, "Error %d registering hotplug", ret); + return ret; + } + + /* Add to list */ + list_add(&tx2_pmu->entry, &tx2_pmus); + + dev_dbg(tx2_pmu->dev, "%s PMU UNCORE registered\n", + tx2_pmu->pmu.name); + return ret; +} + +static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, + acpi_handle handle, struct acpi_device *adev, u32 type) +{ + struct tx2_uncore_pmu *tx2_pmu; + void __iomem *base; + struct resource res; + struct resource_entry *rentry; + struct list_head list; + int ret; + + INIT_LIST_HEAD(&list); + ret = acpi_dev_get_resources(adev, &list, NULL, NULL); + if (ret <= 0) { + dev_err(dev, "failed to parse _CRS method, error %d\n", ret); + return NULL; + } + + list_for_each_entry(rentry, &list, node) { + if (resource_type(rentry->res) == IORESOURCE_MEM) { + res = *rentry->res; + break; + } + } + + if (!rentry->res) + return NULL; + + acpi_dev_free_resource_list(&list); + base = devm_ioremap_resource(dev, &res); + if (IS_ERR(base)) { + dev_err(dev, "PMU type %d: Fail to map resource\n", type); + return NULL; + } + + tx2_pmu = devm_kzalloc(dev, sizeof(*tx2_pmu), GFP_KERNEL); + if (!tx2_pmu) + return NULL; + + tx2_pmu->dev = dev; + tx2_pmu->type = type; + tx2_pmu->base = base; + tx2_pmu->node = dev_to_node(dev); + INIT_LIST_HEAD(&tx2_pmu->entry); + + switch (tx2_pmu->type) { + case PMU_TYPE_L3C: + tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->prorate_factor = TX2_PMU_L3_TILES; + tx2_pmu->max_events = L3_EVENT_MAX; + tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; + tx2_pmu->attr_groups = l3c_pmu_attr_groups; + tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, + "uncore_l3c_%d", tx2_pmu->node); + tx2_pmu->init_cntr_base = init_cntr_base_l3c; + tx2_pmu->start_event = uncore_start_event_l3c; + tx2_pmu->stop_event = uncore_stop_event_l3c; + break; + case PMU_TYPE_DMC: + tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS; + tx2_pmu->max_events = DMC_EVENT_MAX; + tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; + tx2_pmu->attr_groups = dmc_pmu_attr_groups; + tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, + "uncore_dmc_%d", tx2_pmu->node); + tx2_pmu->init_cntr_base = init_cntr_base_dmc; + tx2_pmu->start_event = uncore_start_event_dmc; + tx2_pmu->stop_event = uncore_stop_event_dmc; + break; + case PMU_TYPE_INVALID: + devm_kfree(dev, tx2_pmu); + return NULL; + } + + return tx2_pmu; +} + +static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level, + void *data, void **return_value) +{ + struct tx2_uncore_pmu *tx2_pmu; + struct acpi_device *adev; + enum tx2_uncore_type type; + + if (acpi_bus_get_device(handle, &adev)) + return AE_OK; + if (acpi_bus_get_status(adev) || !adev->status.present) + return AE_OK; + + type = get_tx2_pmu_type(adev); + if (type == PMU_TYPE_INVALID) + return AE_OK; + + tx2_pmu = tx2_uncore_pmu_init_dev((struct device *)data, + handle, adev, type); + + if (!tx2_pmu) + return AE_ERROR; + + if (tx2_uncore_pmu_add_dev(tx2_pmu)) { + /* Can't add the PMU device, abort */ + return AE_ERROR; + } + return AE_OK; +} + +static int tx2_uncore_pmu_online_cpu(unsigned int cpu, + struct hlist_node *hpnode) +{ + struct tx2_uncore_pmu *tx2_pmu; + + tx2_pmu = hlist_entry_safe(hpnode, + struct tx2_uncore_pmu, hpnode); + + /* Pick this CPU, If there is no CPU/PMU association and both are + * from same node. + */ + if ((tx2_pmu->cpu >= nr_cpu_ids) && + (tx2_pmu->node == cpu_to_node(cpu))) + tx2_pmu->cpu = cpu; + + return 0; +} + +static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, + struct hlist_node *hpnode) +{ + int new_cpu; + struct tx2_uncore_pmu *tx2_pmu; + struct cpumask cpu_online_mask_temp; + + tx2_pmu = hlist_entry_safe(hpnode, + struct tx2_uncore_pmu, hpnode); + + if (cpu != tx2_pmu->cpu) + return 0; + + hrtimer_cancel(&tx2_pmu->hrtimer); + cpumask_copy(&cpu_online_mask_temp, cpu_online_mask); + cpumask_clear_cpu(cpu, &cpu_online_mask_temp); + new_cpu = cpumask_any_and( + cpumask_of_node(tx2_pmu->node), + &cpu_online_mask_temp); + + tx2_pmu->cpu = new_cpu; + if (new_cpu >= nr_cpu_ids) + return 0; + perf_pmu_migrate_context(&tx2_pmu->pmu, cpu, new_cpu); + + return 0; +} + +static const struct acpi_device_id tx2_uncore_acpi_match[] = { + {"CAV901C", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, tx2_uncore_acpi_match); + +static int tx2_uncore_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + acpi_handle handle; + acpi_status status; + + set_dev_node(dev, acpi_get_node(ACPI_HANDLE(dev))); + + if (!has_acpi_companion(dev)) + return -ENODEV; + + handle = ACPI_HANDLE(dev); + if (!handle) + return -EINVAL; + + /* Walk through the tree for all PMU UNCORE devices */ + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, + tx2_uncore_pmu_add, + NULL, dev, NULL); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to probe PMU devices\n"); + return_ACPI_STATUS(status); + } + + dev_info(dev, "node%d: pmu uncore registered\n", dev_to_node(dev)); + return 0; +} + +static int tx2_uncore_remove(struct platform_device *pdev) +{ + struct tx2_uncore_pmu *tx2_pmu, *temp; + struct device *dev = &pdev->dev; + + if (!list_empty(&tx2_pmus)) { + list_for_each_entry_safe(tx2_pmu, temp, &tx2_pmus, entry) { + if (tx2_pmu->node == dev_to_node(dev)) { + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, + &tx2_pmu->hpnode); + perf_pmu_unregister(&tx2_pmu->pmu); + list_del(&tx2_pmu->entry); + } + } + } + return 0; +} + +static struct platform_driver tx2_uncore_driver = { + .driver = { + .name = "tx2-uncore-pmu", + .acpi_match_table = ACPI_PTR(tx2_uncore_acpi_match), + }, + .probe = tx2_uncore_probe, + .remove = tx2_uncore_remove, +}; + +static int __init tx2_uncore_driver_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, + "perf/tx2/uncore:online", + tx2_uncore_pmu_online_cpu, + tx2_uncore_pmu_offline_cpu); + if (ret) { + pr_err("TX2 PMU: setup hotplug failed(%d)\n", ret); + return ret; + } + ret = platform_driver_register(&tx2_uncore_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE); + + return ret; +} +module_init(tx2_uncore_driver_init); + +static void __exit tx2_uncore_driver_exit(void) +{ + platform_driver_unregister(&tx2_uncore_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE); +} +module_exit(tx2_uncore_driver_exit); + +MODULE_DESCRIPTION("ThunderX2 UNCORE PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Ganapatrao Kulkarni "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d007a319dfd4..fd586d0301e7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -165,6 +165,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, + CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, -- cgit v1.2.3 From 92a98a2b9f64a8b3c200a7709ceae04d09c39451 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 15 Nov 2018 14:52:41 +0900 Subject: kexec_file: make kexec_image_post_load_cleanup_default() global Change this function from static to global so that arm64 can implement its own arch_kimage_file_post_load_cleanup() later using kexec_image_post_load_cleanup_default(). Signed-off-by: AKASHI Takahiro Acked-by: Dave Young Cc: Vivek Goyal Cc: Baoquan He Signed-off-by: Will Deacon --- include/linux/kexec.h | 1 + kernel/kexec_file.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 9e4e638fb505..49ab758f4d91 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -143,6 +143,7 @@ extern const struct kexec_file_ops * const kexec_file_loaders[]; int kexec_image_probe_default(struct kimage *image, void *buf, unsigned long buf_len); +int kexec_image_post_load_cleanup_default(struct kimage *image); /** * struct kexec_buf - parameters for finding a place for a buffer in memory diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 35cf0ad29718..9ce6672f4fa3 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -76,7 +76,7 @@ void * __weak arch_kexec_kernel_image_load(struct kimage *image) return kexec_image_load_default(image); } -static int kexec_image_post_load_cleanup_default(struct kimage *image) +int kexec_image_post_load_cleanup_default(struct kimage *image) { if (!image->fops || !image->fops->cleanup) return 0; -- cgit v1.2.3 From b6664ba42f1424d2768b605dd60cecc4428d9364 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 15 Nov 2018 14:52:42 +0900 Subject: s390, kexec_file: drop arch_kexec_mem_walk() Since s390 already knows where to locate buffers, calling arch_kexec_mem_walk() has no sense. So we can just drop it as kbuf->mem indicates this while all other architectures sets it to 0 initially. This change is a preparatory work for the next patch, where all the variant memory walks, either on system resource or memblock, will be put in one common place so that it will satisfy all the architectures' need. Signed-off-by: AKASHI Takahiro Reviewed-by: Philipp Rudo Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Dave Young Cc: Vivek Goyal Cc: Baoquan He Signed-off-by: Will Deacon --- arch/s390/kernel/machine_kexec_file.c | 10 ---------- include/linux/kexec.h | 8 ++++++++ kernel/kexec_file.c | 4 ++++ 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index f413f57f8d20..32023b4f9dc0 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -134,16 +134,6 @@ int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data, return ret; } -/* - * The kernel is loaded to a fixed location. Turn off kexec_locate_mem_hole - * and provide kbuf->mem by hand. - */ -int arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(struct resource *, void *)) -{ - return 1; -} - int arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, const Elf_Shdr *relsec, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 49ab758f4d91..f378cb786f1b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -145,6 +145,14 @@ int kexec_image_probe_default(struct kimage *image, void *buf, unsigned long buf_len); int kexec_image_post_load_cleanup_default(struct kimage *image); +/* + * If kexec_buf.mem is set to this value, kexec_locate_mem_hole() + * will try to allocate free memory. Arch may overwrite it. + */ +#ifndef KEXEC_BUF_MEM_UNKNOWN +#define KEXEC_BUF_MEM_UNKNOWN 0 +#endif + /** * struct kexec_buf - parameters for finding a place for a buffer in memory * @image: kexec image in which memory to search. diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 9ce6672f4fa3..9e6529da12ed 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -532,6 +532,10 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) { int ret; + /* Arch knows where to place */ + if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN) + return 0; + ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback); return ret == 1 ? 0 : -EADDRNOTAVAIL; -- cgit v1.2.3 From 735c2f90e333b3d0adee52a8e7e855a0c0eca284 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 15 Nov 2018 14:52:43 +0900 Subject: powerpc, kexec_file: factor out memblock-based arch_kexec_walk_mem() Memblock list is another source for usable system memory layout. So move powerpc's arch_kexec_walk_mem() to common code so that other memblock-based architectures, particularly arm64, can also utilise it. A moved function is now renamed to kexec_walk_memblock() and integrated into kexec_locate_mem_hole(), which will now be usable for all architectures with no need for overriding arch_kexec_walk_mem(). With this change, arch_kexec_walk_mem() need no longer be a weak function, and was now renamed to kexec_walk_resources(). Since powerpc doesn't support kdump in its kexec_file_load(), the current kexec_walk_memblock() won't work for kdump either in this form, this will be fixed in the next patch. Signed-off-by: AKASHI Takahiro Cc: "Eric W. Biederman" Acked-by: Dave Young Cc: Vivek Goyal Cc: Baoquan He Acked-by: James Morse Signed-off-by: Will Deacon --- arch/powerpc/kernel/machine_kexec_file_64.c | 54 ------------------------- include/linux/kexec.h | 2 - kernel/kexec_file.c | 61 +++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index c77e95e9b384..0d20c7ad40fa 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -24,7 +24,6 @@ #include #include -#include #include #include #include @@ -46,59 +45,6 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, return kexec_image_probe_default(image, buf, buf_len); } -/** - * arch_kexec_walk_mem - call func(data) for each unreserved memory block - * @kbuf: Context info for the search. Also passed to @func. - * @func: Function to call for each memory block. - * - * This function is used by kexec_add_buffer and kexec_locate_mem_hole - * to find unreserved memory to load kexec segments into. - * - * Return: The memory walk will stop when func returns a non-zero value - * and that value will be returned. If all free regions are visited without - * func returning non-zero, then zero will be returned. - */ -int arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(struct resource *, void *)) -{ - int ret = 0; - u64 i; - phys_addr_t mstart, mend; - struct resource res = { }; - - if (kbuf->top_down) { - for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, - &mstart, &mend, NULL) { - /* - * In memblock, end points to the first byte after the - * range while in kexec, end points to the last byte - * in the range. - */ - res.start = mstart; - res.end = mend - 1; - ret = func(&res, kbuf); - if (ret) - break; - } - } else { - for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, - NULL) { - /* - * In memblock, end points to the first byte after the - * range while in kexec, end points to the last byte - * in the range. - */ - res.start = mstart; - res.end = mend - 1; - ret = func(&res, kbuf); - if (ret) - break; - } - } - - return ret; -} - /** * setup_purgatory - initialize the purgatory's global variables * @image: kexec image. diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f378cb786f1b..d58d1f2fab10 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -192,8 +192,6 @@ int __weak arch_kexec_apply_relocations(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); -int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(struct resource *, void *)); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 9e6529da12ed..d03195a8cb6e 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -499,8 +500,57 @@ static int locate_mem_hole_callback(struct resource *res, void *arg) return locate_mem_hole_bottom_up(start, end, kbuf); } +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK +static int kexec_walk_memblock(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) +{ + return 0; +} +#else +static int kexec_walk_memblock(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) +{ + int ret = 0; + u64 i; + phys_addr_t mstart, mend; + struct resource res = { }; + + if (kbuf->top_down) { + for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, + &mstart, &mend, NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + res.start = mstart; + res.end = mend - 1; + ret = func(&res, kbuf); + if (ret) + break; + } + } else { + for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, + NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + res.start = mstart; + res.end = mend - 1; + ret = func(&res, kbuf); + if (ret) + break; + } + } + + return ret; +} +#endif + /** - * arch_kexec_walk_mem - call func(data) on free memory regions + * kexec_walk_resources - call func(data) on free memory regions * @kbuf: Context info for the search. Also passed to @func. * @func: Function to call for each memory region. * @@ -508,8 +558,8 @@ static int locate_mem_hole_callback(struct resource *res, void *arg) * and that value will be returned. If all free regions are visited without * func returning non-zero, then zero will be returned. */ -int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(struct resource *, void *)) +static int kexec_walk_resources(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) { if (kbuf->image->type == KEXEC_TYPE_CRASH) return walk_iomem_res_desc(crashk_res.desc, @@ -536,7 +586,10 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN) return 0; - ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback); + if (IS_ENABLED(CONFIG_ARCH_DISCARD_MEMBLOCK)) + ret = kexec_walk_resources(kbuf, locate_mem_hole_callback); + else + ret = kexec_walk_memblock(kbuf, locate_mem_hole_callback); return ret == 1 ? 0 : -EADDRNOTAVAIL; } -- cgit v1.2.3 From 702ed5bb75306c030ab6598b24b56ba8d21a48dd Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 15 Nov 2018 14:52:53 +0900 Subject: include: pe.h: remove message[] from mz header definition message[] field won't be part of the definition of mz header. This change is crucial for enabling kexec_file_load on arm64 because arm64's "Image" binary, as in PE format, doesn't have any data for it and accordingly the following check in pefile_parse_binary() will fail: chkaddr(cursor, mz->peaddr, sizeof(*pe)); Signed-off-by: AKASHI Takahiro Reviewed-by: Ard Biesheuvel Cc: David Howells Cc: Vivek Goyal Cc: Herbert Xu Cc: David S. Miller Signed-off-by: Will Deacon --- include/linux/pe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pe.h b/include/linux/pe.h index 143ce75be5f0..3482b18a48b5 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -166,7 +166,7 @@ struct mz_hdr { uint16_t oem_info; /* oem specific */ uint16_t reserved1[10]; /* reserved */ uint32_t peaddr; /* address of pe header */ - char message[64]; /* message to print */ + char message[]; /* message to print */ }; struct mz_reloc { -- cgit v1.2.3 From 16688453661b6d5159be558a1f8c1f54463a420f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 30 Nov 2018 11:53:20 +0000 Subject: nvmem: add type attribute Add a type attribute so userspace is able to know how the data is stored as this can help taking the correct decision when selecting which device to use. This will also help program display the proper warnings when burning fuses for example. Signed-off-by: Alexandre Belloni Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 21 +++++++++++++++++++++ include/linux/nvmem-provider.h | 16 ++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 27f67dfa649d..d9fd11033c1c 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -28,6 +28,7 @@ struct nvmem_device { size_t size; bool read_only; int flags; + enum nvmem_type type; struct bin_attribute eeprom; struct device *base_dev; struct list_head cells; @@ -83,6 +84,21 @@ static int nvmem_reg_write(struct nvmem_device *nvmem, unsigned int offset, return -EINVAL; } +static ssize_t type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvmem_device *nvmem = to_nvmem_device(dev); + + return sprintf(buf, "%s\n", nvmem_type_str[nvmem->type]); +} + +static DEVICE_ATTR_RO(type); + +static struct attribute *nvmem_attrs[] = { + &dev_attr_type.attr, + NULL, +}; + static ssize_t bin_attr_nvmem_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t pos, size_t count) @@ -168,6 +184,7 @@ static struct bin_attribute *nvmem_bin_rw_attributes[] = { static const struct attribute_group nvmem_bin_rw_group = { .bin_attrs = nvmem_bin_rw_attributes, + .attrs = nvmem_attrs, }; static const struct attribute_group *nvmem_rw_dev_groups[] = { @@ -191,6 +208,7 @@ static struct bin_attribute *nvmem_bin_ro_attributes[] = { static const struct attribute_group nvmem_bin_ro_group = { .bin_attrs = nvmem_bin_ro_attributes, + .attrs = nvmem_attrs, }; static const struct attribute_group *nvmem_ro_dev_groups[] = { @@ -215,6 +233,7 @@ static struct bin_attribute *nvmem_bin_rw_root_attributes[] = { static const struct attribute_group nvmem_bin_rw_root_group = { .bin_attrs = nvmem_bin_rw_root_attributes, + .attrs = nvmem_attrs, }; static const struct attribute_group *nvmem_rw_root_dev_groups[] = { @@ -238,6 +257,7 @@ static struct bin_attribute *nvmem_bin_ro_root_attributes[] = { static const struct attribute_group nvmem_bin_ro_root_group = { .bin_attrs = nvmem_bin_ro_root_attributes, + .attrs = nvmem_attrs, }; static const struct attribute_group *nvmem_ro_root_dev_groups[] = { @@ -605,6 +625,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) nvmem->dev.bus = &nvmem_bus_type; nvmem->dev.parent = config->dev; nvmem->priv = config->priv; + nvmem->type = config->type; nvmem->reg_read = config->reg_read; nvmem->reg_write = config->reg_write; nvmem->dev.of_node = config->dev->of_node; diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 1e3283c2af77..00ff92571683 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -19,6 +19,20 @@ typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, void *val, size_t bytes); +enum nvmem_type { + NVMEM_TYPE_UNKNOWN = 0, + NVMEM_TYPE_EEPROM, + NVMEM_TYPE_OTP, + NVMEM_TYPE_BATTERY_BACKED, +}; + +static const char * const nvmem_type_str[] = { + [NVMEM_TYPE_UNKNOWN] = "Unknown", + [NVMEM_TYPE_EEPROM] = "EEPROM", + [NVMEM_TYPE_OTP] = "OTP", + [NVMEM_TYPE_BATTERY_BACKED] = "Battery backed", +}; + /** * struct nvmem_config - NVMEM device configuration * @@ -28,6 +42,7 @@ typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, * @owner: Pointer to exporter module. Used for refcounting. * @cells: Optional array of pre-defined NVMEM cells. * @ncells: Number of elements in cells. + * @type: Type of the nvmem storage * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. * @reg_read: Callback to read data. @@ -51,6 +66,7 @@ struct nvmem_config { struct module *owner; const struct nvmem_cell_info *cells; int ncells; + enum nvmem_type type; bool read_only; bool root_only; nvmem_reg_read_t reg_read; -- cgit v1.2.3 From a8b44d5d2e38e94e4c20a3fba294c3375753b469 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Nov 2018 11:53:24 +0000 Subject: nvmem: Move nvmem_type_str array to its only user Since we put static variable to a header file it's copied to each module that includes the header. But not all of them are actually using it. Move nvmem_type_str array to its only user to make a compiler happy: In file included from include/linux/rtc.h:18, from drivers/rtc/rtc-proc.c:15: include/linux/nvmem-provider.h:29:27: warning: 'nvmem_type_str' defined but not used [-Wunused-const-variable=] static const char * const nvmem_type_str[] = { ^~~~~~~~~~~~~~ Suggested-by: Alexandre Belloni Suggested-by: Joe Perches Cc: Srinivas Kandagatla Signed-off-by: Andy Shevchenko Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 7 +++++++ include/linux/nvmem-provider.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index d9fd11033c1c..22345e65a301 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -61,6 +61,13 @@ static LIST_HEAD(nvmem_lookup_list); static BLOCKING_NOTIFIER_HEAD(nvmem_notifier); +static const char * const nvmem_type_str[] = { + [NVMEM_TYPE_UNKNOWN] = "Unknown", + [NVMEM_TYPE_EEPROM] = "EEPROM", + [NVMEM_TYPE_OTP] = "OTP", + [NVMEM_TYPE_BATTERY_BACKED] = "Battery backed", +}; + #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key eeprom_lock_key; #endif diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 00ff92571683..5b2dd0a987d2 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -26,13 +26,6 @@ enum nvmem_type { NVMEM_TYPE_BATTERY_BACKED, }; -static const char * const nvmem_type_str[] = { - [NVMEM_TYPE_UNKNOWN] = "Unknown", - [NVMEM_TYPE_EEPROM] = "EEPROM", - [NVMEM_TYPE_OTP] = "OTP", - [NVMEM_TYPE_BATTERY_BACKED] = "Battery backed", -}; - /** * struct nvmem_config - NVMEM device configuration * -- cgit v1.2.3 From 517f14d9cf3533d5ab4fded195ab6f80a92e378f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 30 Nov 2018 11:53:25 +0000 Subject: nvmem: add new config option We want to add nvmem support for MTD. TI DaVinci is the first platform that will be using it, but only in non-DT mode. In order not to introduce any new interface to supporting of which we would have to commit - add a new config option that tells nvmem not to use the DT node of the parent device. This way we won't be creating nvmem devices corresponding with MTD partitions defined in device tree. By default MTD will set this new field to true. Once a set of bindings for MTD nvmem cells is agreed upon, we'll be able to remove this option. Signed-off-by: Bartosz Golaszewski Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 3 ++- include/linux/nvmem-provider.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 22345e65a301..f7301bb4ef3b 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -635,7 +635,8 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) nvmem->type = config->type; nvmem->reg_read = config->reg_read; nvmem->reg_write = config->reg_write; - nvmem->dev.of_node = config->dev->of_node; + if (!config->no_of_node) + nvmem->dev.of_node = config->dev->of_node; if (config->id == -1 && config->name) { dev_set_name(&nvmem->dev, "%s", config->name); diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 5b2dd0a987d2..fe051323be0a 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -38,6 +38,7 @@ enum nvmem_type { * @type: Type of the nvmem storage * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. + * @no_of_node: Device should not use the parent's of_node even if it's !NULL. * @reg_read: Callback to read data. * @reg_write: Callback to write data. * @size: Device size. @@ -62,6 +63,7 @@ struct nvmem_config { enum nvmem_type type; bool read_only; bool root_only; + bool no_of_node; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; int size; -- cgit v1.2.3 From c4dfa25ab307a277eafa7067cd927fbe4d9be4ba Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Tue, 13 Nov 2018 15:01:10 +0100 Subject: mtd: add support for reading MTD devices via the nvmem API Allow drivers that use the nvmem API to read data stored on MTD devices. For this the mtd devices are registered as read-only NVMEM providers. We don't support device tree systems for now. Signed-off-by: Alban Bedel [Bartosz: - include linux/nvmem-provider.h - set the name of the nvmem provider - set no_of_node to true in nvmem_config - don't check the return value of nvmem_unregister() - it cannot fail - tweaked the commit message] Signed-off-by: Bartosz Golaszewski Acked-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/Kconfig | 1 + drivers/mtd/mtdcore.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/mtd.h | 2 ++ 3 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index c77f537323ec..efbe7a6f1d8f 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -1,5 +1,6 @@ menuconfig MTD tristate "Memory Technology Device (MTD) support" + imply NVMEM help Memory Technology Devices are flash, RAM and similar chips, often used for solid state file systems on embedded devices. This option diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 97ac219c082e..5f1053d995b0 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -488,6 +489,50 @@ int mtd_pairing_groups(struct mtd_info *mtd) } EXPORT_SYMBOL_GPL(mtd_pairing_groups); +static int mtd_nvmem_reg_read(void *priv, unsigned int offset, + void *val, size_t bytes) +{ + struct mtd_info *mtd = priv; + size_t retlen; + int err; + + err = mtd_read(mtd, offset, bytes, &retlen, val); + if (err && err != -EUCLEAN) + return err; + + return retlen == bytes ? 0 : -EIO; +} + +static int mtd_nvmem_add(struct mtd_info *mtd) +{ + struct nvmem_config config = {}; + + config.dev = &mtd->dev; + config.name = mtd->name; + config.owner = THIS_MODULE; + config.reg_read = mtd_nvmem_reg_read; + config.size = mtd->size; + config.word_size = 1; + config.stride = 1; + config.read_only = true; + config.root_only = true; + config.no_of_node = true; + config.priv = mtd; + + mtd->nvmem = nvmem_register(&config); + if (IS_ERR(mtd->nvmem)) { + /* Just ignore if there is no NVMEM support in the kernel */ + if (PTR_ERR(mtd->nvmem) == -ENOSYS) { + mtd->nvmem = NULL; + } else { + dev_err(&mtd->dev, "Failed to register NVMEM device\n"); + return PTR_ERR(mtd->nvmem); + } + } + + return 0; +} + static struct dentry *dfs_dir_mtd; /** @@ -570,6 +615,11 @@ int add_mtd_device(struct mtd_info *mtd) if (error) goto fail_added; + /* Add the nvmem provider */ + error = mtd_nvmem_add(mtd); + if (error) + goto fail_nvmem_add; + if (!IS_ERR_OR_NULL(dfs_dir_mtd)) { mtd->dbg.dfs_dir = debugfs_create_dir(dev_name(&mtd->dev), dfs_dir_mtd); if (IS_ERR_OR_NULL(mtd->dbg.dfs_dir)) { @@ -595,6 +645,8 @@ int add_mtd_device(struct mtd_info *mtd) __module_get(THIS_MODULE); return 0; +fail_nvmem_add: + device_unregister(&mtd->dev); fail_added: of_node_put(mtd_get_of_node(mtd)); idr_remove(&mtd_idr, i); @@ -637,6 +689,10 @@ int del_mtd_device(struct mtd_info *mtd) mtd->index, mtd->name, mtd->usecount); ret = -EBUSY; } else { + /* Try to remove the NVMEM provider */ + if (mtd->nvmem) + nvmem_unregister(mtd->nvmem); + device_unregister(&mtd->dev); idr_remove(&mtd_idr, mtd->index); diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index cd0be91bdefa..545070c2ee64 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -341,6 +342,7 @@ struct mtd_info { struct device dev; int usecount; struct mtd_debug_info dbg; + struct nvmem_device *nvmem; }; int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, -- cgit v1.2.3 From d693eb39f5f8500ac950378b010fba78452fcf14 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 15 Nov 2018 12:12:12 +0000 Subject: bus: fsl-mc: explicitly define the fsl_mc_command endianness Both the header and the command parameters of the fsl_mc_command are 64-bit little-endian words. Use the appropriate type to explicitly specify their endianness. Signed-off-by: Ioana Ciornei Reviewed-by: Laurentiu Tudor Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 9d3f668df7df..741f567253ef 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -210,8 +210,8 @@ struct mc_cmd_header { }; struct fsl_mc_command { - u64 header; - u64 params[MC_CMD_NUM_OF_PARAMS]; + __le64 header; + __le64 params[MC_CMD_NUM_OF_PARAMS]; }; enum mc_cmd_status { @@ -238,11 +238,11 @@ enum mc_cmd_status { /* Command completion flag */ #define MC_CMD_FLAG_INTR_DIS 0x01 -static inline u64 mc_encode_cmd_header(u16 cmd_id, - u32 cmd_flags, - u16 token) +static inline __le64 mc_encode_cmd_header(u16 cmd_id, + u32 cmd_flags, + u16 token) { - u64 header = 0; + __le64 header = 0; struct mc_cmd_header *hdr = (struct mc_cmd_header *)&header; hdr->cmd_id = cpu_to_le16(cmd_id); -- cgit v1.2.3 From 42ee3cae0ed38b6c04038bf851ea2496da2135bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:52:35 +0100 Subject: dma-mapping: provide a generic DMA_MAPPING_ERROR Error handling of the dma_map_single and dma_map_page APIs is a little problematic at the moment, in that we use different encodings in the returned dma_addr_t to indicate an error. That means we require an additional indirect call to figure out if a dma mapping call returned an error, and a lot of boilerplate code to implement these semantics. Instead return the maximum addressable value as the error. As long as we don't allow mapping single-byte ranges with single-byte alignment this value can never be a valid return. Additionaly if drivers do not check the return value from the dma_map* routines this values means they will generally not be pointed to actual memory. Once the default value is added here we can start removing the various mapping_error methods and just rely on this generic check. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Acked-by: Russell King Acked-by: Linus Torvalds --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 1a0edcde7d14..f89d277cc8ed 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -133,6 +133,8 @@ struct dma_map_ops { u64 (*get_required_mask)(struct device *dev); }; +#define DMA_MAPPING_ERROR (~(dma_addr_t)0) + extern const struct dma_map_ops dma_direct_ops; extern const struct dma_map_ops dma_virt_ops; @@ -581,8 +583,11 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) const struct dma_map_ops *ops = get_dma_ops(dev); debug_dma_mapping_error(dev, dma_addr); + if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); + if (dma_addr == DMA_MAPPING_ERROR) + return 1; return 0; } -- cgit v1.2.3 From b0cbeae4944924640bf550b75487729a20204c14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:52:35 +0100 Subject: dma-direct: remove the mapping_error dma_map_ops method The dma-direct code already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/powerpc/kernel/dma-swiotlb.c | 1 - include/linux/dma-direct.h | 3 --- kernel/dma/direct.c | 8 +------- kernel/dma/swiotlb.c | 11 +++++------ 4 files changed, 6 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 5fc335f4d9cd..3d8df2cf8be9 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -59,7 +59,6 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = dma_direct_mapping_error, .get_required_mask = swiotlb_powerpc_get_required, }; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 61b78f934f64..6e5a47ae7d64 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,8 +5,6 @@ #include #include -#define DIRECT_MAPPING_ERROR (~(dma_addr_t)0) - #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else @@ -76,5 +74,4 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index c49849bcced6..308f88a750c8 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -289,7 +289,7 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, dma_addr_t dma_addr = phys_to_dma(dev, phys); if (!check_addr(dev, dma_addr, size, __func__)) - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_device(dev, dma_addr, size, dir); @@ -336,11 +336,6 @@ int dma_direct_supported(struct device *dev, u64 mask) return mask >= phys_to_dma(dev, min_mask); } -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == DIRECT_MAPPING_ERROR; -} - const struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc, .free = dma_direct_free, @@ -359,7 +354,6 @@ const struct dma_map_ops dma_direct_ops = { #endif .get_required_mask = dma_direct_get_required_mask, .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, .cache_sync = arch_dma_cache_sync, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 045930e32c0e..ff1ce81bb623 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -631,21 +631,21 @@ static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys, if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) { dev_warn_ratelimited(dev, "Cannot do DMA to address %pa\n", phys); - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /* Oh well, have to allocate and map a bounce buffer. */ *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), *phys, size, dir, attrs); if (*phys == SWIOTLB_MAP_ERROR) - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ dma_addr = __phys_to_dma(dev, *phys); if (unlikely(!dma_capable(dev, dma_addr, size))) { swiotlb_tbl_unmap_single(dev, *phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } return dma_addr; @@ -680,7 +680,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0 && - dev_addr != DIRECT_MAPPING_ERROR) + dev_addr != DMA_MAPPING_ERROR) arch_sync_dma_for_device(dev, phys, size, dir); return dev_addr; @@ -789,7 +789,7 @@ swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, for_each_sg(sgl, sg, nelems, i) { sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DIRECT_MAPPING_ERROR) + if (sg->dma_address == DMA_MAPPING_ERROR) goto out_error; sg_dma_len(sg) = sg->length; } @@ -869,7 +869,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) } const struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = dma_direct_mapping_error, .alloc = dma_direct_alloc, .free = dma_direct_free, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, -- cgit v1.2.3 From cad34be747b8a92146e71c8267f2c1d6794e34c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:35:19 +0100 Subject: iommu/dma-iommu: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/arm64/mm/dma-mapping.c | 7 +++---- drivers/iommu/dma-iommu.c | 23 ++++++++--------------- include/linux/dma-iommu.h | 1 - 3 files changed, 11 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3c2c088a3562..4c0f498069e8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -233,7 +233,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, return NULL; *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (iommu_dma_mapping_error(dev, *handle)) { + if (*handle == DMA_MAPPING_ERROR) { if (coherent) __free_pages(page, get_order(size)); else @@ -250,7 +250,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, return NULL; *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (iommu_dma_mapping_error(dev, *handle)) { + if (*handle == DMA_MAPPING_ERROR) { dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); return NULL; @@ -410,7 +410,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - !iommu_dma_mapping_error(dev, dev_addr)) + dev_addr != DMA_MAPPING_ERROR) __dma_map_area(page_address(page) + offset, size, dir); return dev_addr; @@ -493,7 +493,6 @@ static const struct dma_map_ops iommu_dma_ops = { .sync_sg_for_device = __iommu_sync_sg_for_device, .map_resource = iommu_dma_map_resource, .unmap_resource = iommu_dma_unmap_resource, - .mapping_error = iommu_dma_mapping_error, }; static int __init __iommu_dma_init(void) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d1b04753b204..60c7e9e9901e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -32,8 +32,6 @@ #include #include -#define IOMMU_MAPPING_ERROR 0 - struct iommu_dma_msi_page { struct list_head list; dma_addr_t iova; @@ -523,7 +521,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, { __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); - *handle = IOMMU_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; } /** @@ -556,7 +554,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, dma_addr_t iova; unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; - *handle = IOMMU_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; min_size = alloc_sizes & -alloc_sizes; if (min_size < PAGE_SIZE) { @@ -649,11 +647,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; if (iommu_map(domain, iova, phys - iova_off, size, prot)) { iommu_dma_free_iova(cookie, iova, size); - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } return iova + iova_off; } @@ -694,7 +692,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, s->offset += s_iova_off; s->length = s_length; - sg_dma_address(s) = IOMMU_MAPPING_ERROR; + sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; /* @@ -737,11 +735,11 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int i; for_each_sg(sg, s, nents, i) { - if (sg_dma_address(s) != IOMMU_MAPPING_ERROR) + if (sg_dma_address(s) != DMA_MAPPING_ERROR) s->offset += sg_dma_address(s); if (sg_dma_len(s)) s->length = sg_dma_len(s); - sg_dma_address(s) = IOMMU_MAPPING_ERROR; + sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; } } @@ -858,11 +856,6 @@ void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } -int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == IOMMU_MAPPING_ERROR; -} - static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) { @@ -882,7 +875,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, return NULL; iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); - if (iommu_dma_mapping_error(dev, iova)) + if (iova == DMA_MAPPING_ERROR) goto out_free_page; INIT_LIST_HEAD(&msi_page->list); diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index e8ca5e654277..e760dc5d1fa8 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -69,7 +69,6 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs); -int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); -- cgit v1.2.3 From 68c9ac1d1fd51233cfac15484c6153b90aaa4ca4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Dec 2018 14:33:24 -0800 Subject: dma-mapping: remove the mapping_error dma_map_ops method No users left except for vmd which just forwards it. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/pci/controller/vmd.c | 6 ------ include/linux/dma-mapping.h | 5 ----- 2 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index e50b0b5815ff..98ce79eac128 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -394,11 +394,6 @@ static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); } -static int vmd_mapping_error(struct device *dev, dma_addr_t addr) -{ - return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); -} - static int vmd_dma_supported(struct device *dev, u64 mask) { return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); @@ -446,7 +441,6 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd) ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); add_dma_domain(domain); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f89d277cc8ed..f4ac26d5294a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -128,7 +128,6 @@ struct dma_map_ops { enum dma_data_direction dir); void (*cache_sync)(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); - int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); }; @@ -580,12 +579,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - const struct dma_map_ops *ops = get_dma_ops(dev); - debug_dma_mapping_error(dev, dma_addr); - if (ops->mapping_error) - return ops->mapping_error(dev, dma_addr); if (dma_addr == DMA_MAPPING_ERROR) return 1; return 0; -- cgit v1.2.3 From b14b9d25a3c707c85e7e31e15766a71365b52ab7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Nov 2018 10:59:37 +0100 Subject: dma-mapping: return an error code from dma_mapping_error Currently dma_mapping_error returns a boolean as int, with 1 meaning error. This is rather unusual and many callers have to convert it to errno value. The callers are highly inconsistent with error codes ranging from -ENOMEM over -EIO, -EINVAL and -EFAULT ranging to -EAGAIN. Return -ENOMEM which seems to be what the largest number of callers convert it to, and which also matches the typical error case where we are out of resources. Signed-off-by: Christoph Hellwig Acked-by: Russell King Acked-by: Linus Torvalds --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f4ac26d5294a..7799c2b27849 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -582,7 +582,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) debug_dma_mapping_error(dev, dma_addr); if (dma_addr == DMA_MAPPING_ERROR) - return 1; + return -ENOMEM; return 0; } -- cgit v1.2.3 From 7c703e54cc71df5baa962e24a5663d88173bba5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 09:51:00 +0100 Subject: arch: switch the default on ARCH_HAS_SG_CHAIN These days architectures are mostly out of the business of dealing with struct scatterlist at all, unless they have architecture specific iommu drivers. Replace the ARCH_HAS_SG_CHAIN symbol with a ARCH_NO_SG_CHAIN one only enabled for architectures with horrible legacy iommu drivers like alpha and parisc, and conditionally for arm which wants to keep it disable for legacy platforms. Signed-off-by: Christoph Hellwig Reviewed-by: Palmer Dabbelt --- .../features/io/sg-chain/arch-support.txt | 33 ---------------------- arch/alpha/Kconfig | 1 + arch/arc/Kconfig | 1 - arch/arm/Kconfig | 2 +- arch/arm64/Kconfig | 1 - arch/ia64/Kconfig | 1 - arch/parisc/Kconfig | 1 + arch/powerpc/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/x86/Kconfig | 1 - arch/xtensa/Kconfig | 1 - include/linux/scatterlist.h | 6 ++-- lib/Kconfig | 2 +- lib/scatterlist.c | 2 +- 15 files changed, 8 insertions(+), 47 deletions(-) delete mode 100644 Documentation/features/io/sg-chain/arch-support.txt (limited to 'include/linux') diff --git a/Documentation/features/io/sg-chain/arch-support.txt b/Documentation/features/io/sg-chain/arch-support.txt deleted file mode 100644 index 6554f0372c3f..000000000000 --- a/Documentation/features/io/sg-chain/arch-support.txt +++ /dev/null @@ -1,33 +0,0 @@ -# -# Feature name: sg-chain -# Kconfig: ARCH_HAS_SG_CHAIN -# description: arch supports chained scatter-gather lists -# - ----------------------- - | arch |status| - ----------------------- - | alpha: | TODO | - | arc: | ok | - | arm: | ok | - | arm64: | ok | - | c6x: | TODO | - | h8300: | TODO | - | hexagon: | TODO | - | ia64: | ok | - | m68k: | TODO | - | microblaze: | TODO | - | mips: | TODO | - | nds32: | TODO | - | nios2: | TODO | - | openrisc: | TODO | - | parisc: | TODO | - | powerpc: | ok | - | riscv: | TODO | - | s390: | ok | - | sh: | TODO | - | sparc: | ok | - | um: | TODO | - | unicore32: | TODO | - | x86: | ok | - | xtensa: | TODO | - ----------------------- diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 5b4f88363453..a7e748a46c18 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -5,6 +5,7 @@ config ALPHA select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_NO_PREEMPT + select ARCH_NO_SG_CHAIN select ARCH_USE_CMPXCHG_LOCKREF select HAVE_AOUT select HAVE_IDE diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index c9e2a1323536..fd48d698da29 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -13,7 +13,6 @@ config ARC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_HAS_SG_CHAIN select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3b2852df6eb3..a858ee791ef0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -19,6 +19,7 @@ config ARM select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW @@ -119,7 +120,6 @@ config ARM . config ARM_HAS_SG_CHAIN - select ARCH_HAS_SG_CHAIN bool config ARM_DMA_USE_IOMMU diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2e645ea693ea..06cf0ef24367 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,7 +23,6 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_DMA_FOR_DEVICE diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 36773def6920..d6f203658994 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -29,7 +29,6 @@ config IA64 select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING select ARCH_HAS_DMA_MARK_CLEAN - select ARCH_HAS_SG_CHAIN select VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 92a339ee28b3..428ee50fc3db 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_MEMORY_FAILURE select RTC_CLASS select RTC_DRV_GENERIC diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8be31261aec8..4bc8edd83cee 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -138,7 +138,6 @@ config PPC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64 - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5173366af8f3..5624e8607054 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -73,7 +73,6 @@ config S390 select ARCH_HAS_KCOV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 490b2c95c212..8853b6ceae17 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -40,7 +40,6 @@ config SPARC select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND - select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select LOCKDEP_SMALL if LOCKDEP select NEED_DMA_MAP_STATE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d734f3c8234..adc845b66f01 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -66,7 +66,6 @@ config X86 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE select ARCH_HAS_SET_MEMORY - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 239bfb16c58b..75488b606edc 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config XTENSA def_bool y - select ARCH_HAS_SG_CHAIN select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_COHERENT_DMA_MMAP if !MMU diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 093aa57120b0..b96f0d0b5b8f 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -324,10 +324,10 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. */ -#ifdef CONFIG_ARCH_HAS_SG_CHAIN -#define SG_MAX_SEGMENTS 2048 -#else +#ifdef CONFIG_ARCH_NO_SG_CHAIN #define SG_MAX_SEGMENTS SG_CHUNK_SIZE +#else +#define SG_MAX_SEGMENTS 2048 #endif #ifdef CONFIG_SG_POOL diff --git a/lib/Kconfig b/lib/Kconfig index a9965f4af4dd..d5a5e2ebf286 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -577,7 +577,7 @@ config SG_POOL # sg chaining option # -config ARCH_HAS_SG_CHAIN +config ARCH_NO_SG_CHAIN def_bool n config ARCH_HAS_PMEM_API diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7c6096a71704..9ba349e775ef 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -271,7 +271,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (nents == 0) return -EINVAL; -#ifndef CONFIG_ARCH_HAS_SG_CHAIN +#ifdef CONFIG_ARCH_NO_SG_CHAIN if (WARN_ON_ONCE(nents > max_ents)) return -EINVAL; #endif -- cgit v1.2.3 From ded653ccbec0335a78fa7a7aff3ec9870349fafb Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 19 Sep 2018 21:41:04 -0700 Subject: signal: Add set_user_sigmask() Refactor reading sigset from userspace and updating sigmask into an api. This is useful for versions of syscalls that pass in the sigmask and expect the current->sigmask to be changed during, and restored after, the execution of the syscall. With the advent of new y2038 syscalls in the subsequent patches, we add two more new versions of the syscalls (for pselect, ppoll, and io_pgetevents) in addition to the existing native and compat versions. Adding such an api reduces the logic that would need to be replicated otherwise. Note that the calls to sigprocmask() ignored the return value from the api as the function only returns an error on an invalid first argument that is hardcoded at these call sites. The updated logic uses set_current_blocked() instead. Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- fs/aio.c | 23 +++++++---------------- fs/eventpoll.c | 22 ++++++---------------- fs/select.c | 50 ++++++++++++-------------------------------------- include/linux/compat.h | 4 ++++ include/linux/signal.h | 2 ++ kernel/signal.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 76 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 301e6314183b..6ddb63ee8eb6 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2104,14 +2104,10 @@ SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - if (ksig.sigmask) { - if (ksig.sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask))) - return -EFAULT; - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } + + ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + if (ret) + return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); if (signal_pending(current)) { @@ -2174,14 +2170,9 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - if (ksig.sigmask) { - if (ksig.sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (get_compat_sigset(&ksigmask, ksig.sigmask)) - return -EFAULT; - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } + ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + if (ret) + return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); if (signal_pending(current)) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 42bbe6824b4b..2d86eeba837b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2223,14 +2223,9 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ - if (sigmask) { - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - sigsaved = current->blocked; - set_current_blocked(&ksigmask); - } + error = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (error) + return error; error = do_epoll_wait(epfd, events, maxevents, timeout); @@ -2266,14 +2261,9 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (get_compat_sigset(&ksigmask, sigmask)) - return -EFAULT; - sigsaved = current->blocked; - set_current_blocked(&ksigmask); - } + err = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (err) + return err; err = do_epoll_wait(epfd, events, maxevents, timeout); diff --git a/fs/select.c b/fs/select.c index 22b3bf89f051..65c78b4147a2 100644 --- a/fs/select.c +++ b/fs/select.c @@ -717,16 +717,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return -EINVAL; } - if (sigmask) { - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } + ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (ret) + return ret; ret = core_sys_select(n, inp, outp, exp, to); ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); @@ -1061,16 +1054,9 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, return -EINVAL; } - if (sigmask) { - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } + ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (ret) + return ret; ret = do_sys_poll(ufds, nfds, to); @@ -1323,15 +1309,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return -EINVAL; } - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (get_compat_sigset(&ksigmask, sigmask)) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } + ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (ret) + return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); @@ -1389,15 +1369,9 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, return -EINVAL; } - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (get_compat_sigset(&ksigmask, sigmask)) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } + ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (ret) + return ret; ret = do_sys_poll(ufds, nfds, to); diff --git a/include/linux/compat.h b/include/linux/compat.h index 88720b443cd6..17c497b82690 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -169,6 +169,10 @@ typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; +int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, + sigset_t *set, sigset_t *oldset, + size_t sigsetsize); + struct compat_sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION compat_uptr_t sa_handler; diff --git a/include/linux/signal.h b/include/linux/signal.h index f428e86f4800..ce14b951befb 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -273,6 +273,8 @@ extern int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern int sigprocmask(int, sigset_t *, sigset_t *); +extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, + sigset_t *oldset, size_t sigsetsize); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; diff --git a/kernel/signal.c b/kernel/signal.c index 9a32bc2088c9..811b5d440617 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2735,6 +2735,51 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) } EXPORT_SYMBOL(sigprocmask); +/* + * The api helps set app-provided sigmasks. + * + * This is useful for syscalls such as ppoll, pselect, io_pgetevents and + * epoll_pwait where a new sigmask is passed from userland for the syscalls. + */ +int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, + sigset_t *oldset, size_t sigsetsize) +{ + if (!usigmask) + return 0; + + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(set, usigmask, sizeof(sigset_t))) + return -EFAULT; + + *oldset = current->blocked; + set_current_blocked(set); + + return 0; +} +EXPORT_SYMBOL(set_user_sigmask); + +#ifdef CONFIG_COMPAT +int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, + sigset_t *set, sigset_t *oldset, + size_t sigsetsize) +{ + if (!usigmask) + return 0; + + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (get_compat_sigset(set, usigmask)) + return -EFAULT; + + *oldset = current->blocked; + set_current_blocked(set); + + return 0; +} +EXPORT_SYMBOL(set_compat_user_sigmask); +#endif + /** * sys_rt_sigprocmask - change the list of currently blocked signals * @how: whether to add, remove, or set signals -- cgit v1.2.3 From 854a6ed56839a40f6b5d02a2962f48841482eec4 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 19 Sep 2018 21:41:05 -0700 Subject: signal: Add restore_user_sigmask() Refactor the logic to restore the sigmask before the syscall returns into an api. This is useful for versions of syscalls that pass in the sigmask and expect the current->sigmask to be changed during the execution and restored after the execution of the syscall. With the advent of new y2038 syscalls in the subsequent patches, we add two more new versions of the syscalls (for pselect, ppoll and io_pgetevents) in addition to the existing native and compat versions. Adding such an api reduces the logic that would need to be replicated otherwise. Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- fs/aio.c | 29 +++++------------------- fs/eventpoll.c | 30 ++----------------------- fs/select.c | 60 +++++++------------------------------------------- include/linux/signal.h | 2 ++ kernel/signal.c | 33 +++++++++++++++++++++++++++ 5 files changed, 51 insertions(+), 103 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 6ddb63ee8eb6..39a1f2df6805 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2110,18 +2110,9 @@ SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - if (signal_pending(current)) { - if (ksig.sigmask) { - current->saved_sigmask = sigsaved; - set_restore_sigmask(); - } - - if (!ret) - ret = -ERESTARTNOHAND; - } else { - if (ksig.sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - } + restore_user_sigmask(ksig.sigmask, &sigsaved); + if (signal_pending(current) && !ret) + ret = -ERESTARTNOHAND; return ret; } @@ -2175,17 +2166,9 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - if (signal_pending(current)) { - if (ksig.sigmask) { - current->saved_sigmask = sigsaved; - set_restore_sigmask(); - } - if (!ret) - ret = -ERESTARTNOHAND; - } else { - if (ksig.sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - } + restore_user_sigmask(ksig.sigmask, &sigsaved); + if (signal_pending(current) && !ret) + ret = -ERESTARTNOHAND; return ret; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 2d86eeba837b..8a5a1010886b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2229,20 +2229,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, error = do_epoll_wait(epfd, events, maxevents, timeout); - /* - * If we changed the signal mask, we need to restore the original one. - * In case we've got a signal while waiting, we do not restore the - * signal mask yet, and we allow do_signal() to deliver the signal on - * the way back to userspace, before the signal mask is restored. - */ - if (sigmask) { - if (error == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } else - set_current_blocked(&sigsaved); - } + restore_user_sigmask(sigmask, &sigsaved); return error; } @@ -2267,20 +2254,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, err = do_epoll_wait(epfd, events, maxevents, timeout); - /* - * If we changed the signal mask, we need to restore the original one. - * In case we've got a signal while waiting, we do not restore the - * signal mask yet, and we allow do_signal() to deliver the signal on - * the way back to userspace, before the signal mask is restored. - */ - if (sigmask) { - if (err == -EINTR) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } else - set_current_blocked(&sigsaved); - } + restore_user_sigmask(sigmask, &sigsaved); return err; } diff --git a/fs/select.c b/fs/select.c index 65c78b4147a2..eb9132520197 100644 --- a/fs/select.c +++ b/fs/select.c @@ -724,19 +724,7 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, ret = core_sys_select(n, inp, outp, exp, to); ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); - if (ret == -ERESTARTNOHAND) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + restore_user_sigmask(sigmask, &sigsaved); return ret; } @@ -1060,21 +1048,11 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); + restore_user_sigmask(sigmask, &sigsaved); + /* We can restart this syscall, usually */ - if (ret == -EINTR) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } + if (ret == -EINTR) ret = -ERESTARTNOHAND; - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); @@ -1316,19 +1294,7 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, ret = compat_core_sys_select(n, inp, outp, exp, to); ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); - if (ret == -ERESTARTNOHAND) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + restore_user_sigmask(sigmask, &sigsaved); return ret; } @@ -1375,21 +1341,11 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); + restore_user_sigmask(sigmask, &sigsaved); + /* We can restart this syscall, usually */ - if (ret == -EINTR) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } + if (ret == -EINTR) ret = -ERESTARTNOHAND; - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); diff --git a/include/linux/signal.h b/include/linux/signal.h index ce14b951befb..cc7e2c1cd444 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -275,6 +275,8 @@ extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struc extern int sigprocmask(int, sigset_t *, sigset_t *); extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, sigset_t *oldset, size_t sigsetsize); +extern void restore_user_sigmask(const void __user *usigmask, + sigset_t *sigsaved); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; diff --git a/kernel/signal.c b/kernel/signal.c index 811b5d440617..3c8ea7a328e0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2780,6 +2780,39 @@ int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, EXPORT_SYMBOL(set_compat_user_sigmask); #endif +/* + * restore_user_sigmask: + * usigmask: sigmask passed in from userland. + * sigsaved: saved sigmask when the syscall started and changed the sigmask to + * usigmask. + * + * This is useful for syscalls such as ppoll, pselect, io_pgetevents and + * epoll_pwait where a new sigmask is passed in from userland for the syscalls. + */ +void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) +{ + + if (!usigmask) + return; + /* + * When signals are pending, do not restore them here. + * Restoring sigmask here can lead to delivering signals that the above + * syscalls are intended to block because of the sigmask passed in. + */ + if (signal_pending(current)) { + current->saved_sigmask = *sigsaved; + set_restore_sigmask(); + return; + } + + /* + * This is needed because the fast syscall return path does not restore + * saved_sigmask when signals are not pending. + */ + set_current_blocked(sigsaved); +} +EXPORT_SYMBOL(restore_user_sigmask); + /** * sys_rt_sigprocmask - change the list of currently blocked signals * @how: whether to add, remove, or set signals -- cgit v1.2.3 From 8bd27a3004e80d3d0962534c97e5a841262d5093 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 19 Sep 2018 21:41:06 -0700 Subject: ppoll: use __kernel_timespec struct timespec is not y2038 safe. struct __kernel_timespec is the new y2038 safe structure for all syscalls that are using struct timespec. Update ppoll interfaces to use struct __kernel_timespec. sigset_t also has different representations on 32 bit and 64 bit architectures. Hence, we need to support the following different syscalls: New y2038 safe syscalls: (Controlled by CONFIG_64BIT_TIME for 32 bit ABIs) Native 64 bit(unchanged) and native 32 bit : sys_ppoll Compat : compat_sys_ppoll_time64 Older y2038 unsafe syscalls: (Controlled by CONFIG_32BIT_COMPAT_TIME for 32 bit ABIs) Native 32 bit : ppoll_time32 Compat : compat_sys_ppoll Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- fs/select.c | 166 +++++++++++++++++++++++++++++++---------------- include/linux/compat.h | 5 ++ include/linux/syscalls.h | 5 +- 3 files changed, 120 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/fs/select.c b/fs/select.c index eb9132520197..d332be059487 100644 --- a/fs/select.c +++ b/fs/select.c @@ -287,12 +287,18 @@ int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec) return 0; } +enum poll_time_type { + PT_TIMEVAL = 0, + PT_OLD_TIMEVAL = 1, + PT_TIMESPEC = 2, + PT_OLD_TIMESPEC = 3, +}; + static int poll_select_copy_remaining(struct timespec64 *end_time, void __user *p, - int timeval, int ret) + enum poll_time_type pt_type, int ret) { struct timespec64 rts; - struct timeval rtv; if (!p) return ret; @@ -310,18 +316,40 @@ static int poll_select_copy_remaining(struct timespec64 *end_time, rts.tv_sec = rts.tv_nsec = 0; - if (timeval) { - if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) - memset(&rtv, 0, sizeof(rtv)); - rtv.tv_sec = rts.tv_sec; - rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; + switch (pt_type) { + case PT_TIMEVAL: + { + struct timeval rtv; - if (!copy_to_user(p, &rtv, sizeof(rtv))) + if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) + memset(&rtv, 0, sizeof(rtv)); + rtv.tv_sec = rts.tv_sec; + rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + } + break; + case PT_OLD_TIMEVAL: + { + struct old_timeval32 rtv; + + rtv.tv_sec = rts.tv_sec; + rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + } + break; + case PT_TIMESPEC: + if (!put_timespec64(&rts, p)) return ret; - - } else if (!put_timespec64(&rts, p)) - return ret; - + break; + case PT_OLD_TIMESPEC: + if (!put_old_timespec32(&rts, p)) + return ret; + break; + default: + BUG(); + } /* * If an application puts its timeval in read-only memory, we * don't want the Linux-specific update to the timeval to @@ -689,7 +717,7 @@ static int kern_select(int n, fd_set __user *inp, fd_set __user *outp, } ret = core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); + ret = poll_select_copy_remaining(&end_time, tvp, PT_TIMEVAL, ret); return ret; } @@ -722,7 +750,7 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return ret; ret = core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); + ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); restore_user_sigmask(sigmask, &sigsaved); @@ -1026,7 +1054,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, } SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, - struct timespec __user *, tsp, const sigset_t __user *, sigmask, + struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) { sigset_t ksigmask, sigsaved; @@ -1054,60 +1082,50 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, if (ret == -EINTR) ret = -ERESTARTNOHAND; - ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); + ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); return ret; } -#ifdef CONFIG_COMPAT -#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) +#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) -static -int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p, - int timeval, int ret) +SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, + struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask, + size_t, sigsetsize) { - struct timespec64 ts; + sigset_t ksigmask, sigsaved; + struct timespec64 ts, end_time, *to = NULL; + int ret; - if (!p) - return ret; + if (tsp) { + if (get_old_timespec32(&ts, tsp)) + return -EFAULT; - if (current->personality & STICKY_TIMEOUTS) - goto sticky; + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } - /* No update for zero timeout */ - if (!end_time->tv_sec && !end_time->tv_nsec) + ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (ret) return ret; - ktime_get_ts64(&ts); - ts = timespec64_sub(*end_time, ts); - if (ts.tv_sec < 0) - ts.tv_sec = ts.tv_nsec = 0; + ret = do_sys_poll(ufds, nfds, to); - if (timeval) { - struct old_timeval32 rtv; + restore_user_sigmask(sigmask, &sigsaved); - rtv.tv_sec = ts.tv_sec; - rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; + /* We can restart this syscall, usually */ + if (ret == -EINTR) + ret = -ERESTARTNOHAND; - if (!copy_to_user(p, &rtv, sizeof(rtv))) - return ret; - } else { - if (!put_old_timespec32(&ts, p)) - return ret; - } - /* - * If an application puts its timeval in read-only memory, we - * don't want the Linux-specific update to the timeval to - * cause a fault after the select has completed - * successfully. However, because we're not updating the - * timeval, we can't restart the system call. - */ + ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); -sticky: - if (ret == -ERESTARTNOHAND) - ret = -EINTR; return ret; } +#endif + +#ifdef CONFIG_COMPAT +#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to @@ -1239,7 +1257,7 @@ static int do_compat_select(int n, compat_ulong_t __user *inp, } ret = compat_core_sys_select(n, inp, outp, exp, to); - ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret); + ret = poll_select_copy_remaining(&end_time, tvp, PT_OLD_TIMEVAL, ret); return ret; } @@ -1292,7 +1310,7 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); - ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); + ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); restore_user_sigmask(sigmask, &sigsaved); @@ -1318,6 +1336,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, sigsetsize); } +#if defined(CONFIG_COMPAT_32BIT_TIME) COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) @@ -1347,8 +1366,45 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, if (ret == -EINTR) ret = -ERESTARTNOHAND; - ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); + ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); return ret; } #endif + +/* New compat syscall for 64 bit time_t*/ +COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, + unsigned int, nfds, struct __kernel_timespec __user *, tsp, + const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) +{ + sigset_t ksigmask, sigsaved; + struct timespec64 ts, end_time, *to = NULL; + int ret; + + if (tsp) { + if (get_timespec64(&ts, tsp)) + return -EFAULT; + + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + if (ret) + return ret; + + ret = do_sys_poll(ufds, nfds, to); + + restore_user_sigmask(sigmask, &sigsaved); + + /* We can restart this syscall, usually */ + if (ret == -EINTR) + ret = -ERESTARTNOHAND; + + ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); + + return ret; +} + +#endif diff --git a/include/linux/compat.h b/include/linux/compat.h index 17c497b82690..f309a524a4b7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -652,6 +652,11 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, struct old_timespec32 __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_ppoll_time64(struct pollfd __user *ufds, + unsigned int nfds, + struct __kernel_timespec __user *tsp, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); /* fs/signalfd.c */ asmlinkage long compat_sys_signalfd4(int ufd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2ac3d13a915b..4575ea1f22cd 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -469,7 +469,10 @@ asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, fd_set __user *, struct timespec __user *, void __user *); asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, - struct timespec __user *, const sigset_t __user *, + struct __kernel_timespec __user *, const sigset_t __user *, + size_t); +asmlinkage long sys_ppoll_time32(struct pollfd __user *, unsigned int, + struct old_timespec32 __user *, const sigset_t __user *, size_t); /* fs/signalfd.c */ -- cgit v1.2.3 From e024707bccae15abd493265ea0b72f46a4920727 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 19 Sep 2018 21:41:07 -0700 Subject: pselect6: use __kernel_timespec struct timespec is not y2038 safe. struct __kernel_timespec is the new y2038 safe structure for all syscalls that are using struct timespec. Update pselect interfaces to use struct __kernel_timespec. sigset_t also has different representations on 32 bit and 64 bit architectures. Hence, we need to support the following different syscalls: New y2038 safe syscalls: (Controlled by CONFIG_64BIT_TIME for 32 bit ABIs) Native 64 bit(unchanged) and native 32 bit : sys_pselect6 Compat : compat_sys_pselect6_time64 Older y2038 unsafe syscalls: (Controlled by CONFIG_32BIT_COMPAT_TIME for 32 bit ABIs) Native 32 bit : pselect6_time32 Compat : compat_sys_pselect6 Note that all other versions of select syscalls will not have y2038 safe versions. Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- fs/select.c | 94 +++++++++++++++++++++++++++++++++++++++++------- include/linux/compat.h | 5 +++ include/linux/syscalls.h | 5 ++- 3 files changed, 90 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/select.c b/fs/select.c index d332be059487..4c8652390c94 100644 --- a/fs/select.c +++ b/fs/select.c @@ -729,16 +729,27 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, } static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timespec __user *tsp, - const sigset_t __user *sigmask, size_t sigsetsize) + fd_set __user *exp, void __user *tsp, + const sigset_t __user *sigmask, size_t sigsetsize, + enum poll_time_type type) { sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { - if (get_timespec64(&ts, tsp)) - return -EFAULT; + switch (type) { + case PT_TIMESPEC: + if (get_timespec64(&ts, tsp)) + return -EFAULT; + break; + case PT_OLD_TIMESPEC: + if (get_old_timespec32(&ts, tsp)) + return -EFAULT; + break; + default: + BUG(); + } to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) @@ -750,7 +761,7 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return ret; ret = core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); + ret = poll_select_copy_remaining(&end_time, tsp, type, ret); restore_user_sigmask(sigmask, &sigsaved); @@ -764,7 +775,27 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, * the sigset size. */ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, - fd_set __user *, exp, struct timespec __user *, tsp, + fd_set __user *, exp, struct __kernel_timespec __user *, tsp, + void __user *, sig) +{ + size_t sigsetsize = 0; + sigset_t __user *up = NULL; + + if (sig) { + if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t)) + || __get_user(up, (sigset_t __user * __user *)sig) + || __get_user(sigsetsize, + (size_t __user *)(sig+sizeof(void *)))) + return -EFAULT; + } + + return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize, PT_TIMESPEC); +} + +#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) + +SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct old_timespec32 __user *, tsp, void __user *, sig) { size_t sigsetsize = 0; @@ -778,9 +809,11 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, return -EFAULT; } - return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize); + return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize, PT_OLD_TIMESPEC); } +#endif + #ifdef __ARCH_WANT_SYS_OLD_SELECT struct sel_arg_struct { unsigned long n; @@ -1289,16 +1322,26 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) static long do_compat_pselect(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct old_timespec32 __user *tsp, compat_sigset_t __user *sigmask, - compat_size_t sigsetsize) + void __user *tsp, compat_sigset_t __user *sigmask, + compat_size_t sigsetsize, enum poll_time_type type) { sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; if (tsp) { - if (get_old_timespec32(&ts, tsp)) - return -EFAULT; + switch (type) { + case PT_OLD_TIMESPEC: + if (get_old_timespec32(&ts, tsp)) + return -EFAULT; + break; + case PT_TIMESPEC: + if (get_timespec64(&ts, tsp)) + return -EFAULT; + break; + default: + BUG(); + } to = &end_time; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) @@ -1310,13 +1353,35 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); + ret = poll_select_copy_remaining(&end_time, tsp, type, ret); restore_user_sigmask(sigmask, &sigsaved); return ret; } +COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct __kernel_timespec __user *, tsp, void __user *, sig) +{ + compat_size_t sigsetsize = 0; + compat_uptr_t up = 0; + + if (sig) { + if (!access_ok(VERIFY_READ, sig, + sizeof(compat_uptr_t)+sizeof(compat_size_t)) || + __get_user(up, (compat_uptr_t __user *)sig) || + __get_user(sigsetsize, + (compat_size_t __user *)(sig+sizeof(up)))) + return -EFAULT; + } + + return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), + sigsetsize, PT_TIMESPEC); +} + +#if defined(CONFIG_COMPAT_32BIT_TIME) + COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, struct old_timespec32 __user *, tsp, void __user *, sig) @@ -1332,10 +1397,13 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, (compat_size_t __user *)(sig+sizeof(up)))) return -EFAULT; } + return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), - sigsetsize); + sigsetsize, PT_OLD_TIMESPEC); } +#endif + #if defined(CONFIG_COMPAT_32BIT_TIME) COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, diff --git a/include/linux/compat.h b/include/linux/compat.h index f309a524a4b7..5776733b763f 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -647,6 +647,11 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *exp, struct old_timespec32 __user *tsp, void __user *sig); +asmlinkage long compat_sys_pselect6_time64(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, + compat_ulong_t __user *exp, + struct __kernel_timespec __user *tsp, + void __user *sig); asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct old_timespec32 __user *tsp, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4575ea1f22cd..934e58e0dfa4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -466,7 +466,10 @@ asmlinkage long sys_sendfile64(int out_fd, int in_fd, /* fs/select.c */ asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, - fd_set __user *, struct timespec __user *, + fd_set __user *, struct __kernel_timespec __user *, + void __user *); +asmlinkage long sys_pselect6_time32(int, fd_set __user *, fd_set __user *, + fd_set __user *, struct old_timespec32 __user *, void __user *); asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, struct __kernel_timespec __user *, const sigset_t __user *, -- cgit v1.2.3 From 7a35397f8c06bffd4c747537095321ff971045a5 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 19 Sep 2018 21:41:08 -0700 Subject: io_pgetevents: use __kernel_timespec struct timespec is not y2038 safe. struct __kernel_timespec is the new y2038 safe structure for all syscalls that are using struct timespec. Update io_pgetevents interfaces to use struct __kernel_timespec. sigset_t also has different representations on 32 bit and 64 bit architectures. Hence, we need to support the following different syscalls: New y2038 safe syscalls: (Controlled by CONFIG_64BIT_TIME for 32 bit ABIs) Native 64 bit(unchanged) and native 32 bit : sys_io_pgetevents Compat : compat_sys_io_pgetevents_time64 Older y2038 unsafe syscalls: (Controlled by CONFIG_32BIT_COMPAT_TIME for 32 bit ABIs) Native 32 bit : sys_io_pgetevents_time32 Compat : compat_sys_io_pgetevents Note that io_getevents syscalls do not have a y2038 safe solution. Signed-off-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- fs/aio.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/compat.h | 6 ++++ include/linux/syscalls.h | 10 ++++-- 3 files changed, 95 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 39a1f2df6805..62a0c560cd3d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2062,11 +2062,13 @@ static long do_io_getevents(aio_context_t ctx_id, * specifies an infinite timeout. Note that the timeout pointed to by * timeout is relative. Will fail with -ENOSYS if not implemented. */ +#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) + SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, long, min_nr, long, nr, struct io_event __user *, events, - struct timespec __user *, timeout) + struct __kernel_timespec __user *, timeout) { struct timespec64 ts; int ret; @@ -2080,6 +2082,8 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, return ret; } +#endif + struct __aio_sigset { const sigset_t __user *sigmask; size_t sigsetsize; @@ -2090,7 +2094,7 @@ SYSCALL_DEFINE6(io_pgetevents, long, min_nr, long, nr, struct io_event __user *, events, - struct timespec __user *, timeout, + struct __kernel_timespec __user *, timeout, const struct __aio_sigset __user *, usig) { struct __aio_sigset ksig = { NULL, }; @@ -2104,6 +2108,39 @@ SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; + ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + if (ret) + return ret; + + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); + restore_user_sigmask(ksig.sigmask, &sigsaved); + if (signal_pending(current) && !ret) + ret = -ERESTARTNOHAND; + + return ret; +} + +#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) + +SYSCALL_DEFINE6(io_pgetevents_time32, + aio_context_t, ctx_id, + long, min_nr, + long, nr, + struct io_event __user *, events, + struct old_timespec32 __user *, timeout, + const struct __aio_sigset __user *, usig) +{ + struct __aio_sigset ksig = { NULL, }; + sigset_t ksigmask, sigsaved; + struct timespec64 ts; + int ret; + + if (timeout && unlikely(get_old_timespec32(&ts, timeout))) + return -EFAULT; + + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) + return -EFAULT; + ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); if (ret) @@ -2117,7 +2154,10 @@ SYSCALL_DEFINE6(io_pgetevents, return ret; } -#ifdef CONFIG_COMPAT +#endif + +#if defined(CONFIG_COMPAT_32BIT_TIME) + COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, compat_long_t, min_nr, compat_long_t, nr, @@ -2136,12 +2176,17 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, return ret; } +#endif + +#ifdef CONFIG_COMPAT struct __compat_aio_sigset { compat_sigset_t __user *sigmask; compat_size_t sigsetsize; }; +#if defined(CONFIG_COMPAT_32BIT_TIME) + COMPAT_SYSCALL_DEFINE6(io_pgetevents, compat_aio_context_t, ctx_id, compat_long_t, min_nr, @@ -2172,4 +2217,37 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, return ret; } + +#endif + +COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, + compat_aio_context_t, ctx_id, + compat_long_t, min_nr, + compat_long_t, nr, + struct io_event __user *, events, + struct __kernel_timespec __user *, timeout, + const struct __compat_aio_sigset __user *, usig) +{ + struct __compat_aio_sigset ksig = { NULL, }; + sigset_t ksigmask, sigsaved; + struct timespec64 t; + int ret; + + if (timeout && get_timespec64(&t, timeout)) + return -EFAULT; + + if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) + return -EFAULT; + + ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + if (ret) + return ret; + + ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); + restore_user_sigmask(ksig.sigmask, &sigsaved); + if (signal_pending(current) && !ret) + ret = -ERESTARTNOHAND; + + return ret; +} #endif diff --git a/include/linux/compat.h b/include/linux/compat.h index 5776733b763f..8be8daa38c9a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -562,6 +562,12 @@ asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id, struct io_event __user *events, struct old_timespec32 __user *timeout, const struct __compat_aio_sigset __user *usig); +asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, + compat_long_t min_nr, + compat_long_t nr, + struct io_event __user *events, + struct __kernel_timespec __user *timeout, + const struct __compat_aio_sigset __user *usig); /* fs/cookies.c */ asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 934e58e0dfa4..a27cf407de92 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -296,12 +296,18 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, - struct timespec __user *timeout); + struct __kernel_timespec __user *timeout); asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, - struct timespec __user *timeout, + struct __kernel_timespec __user *timeout, + const struct __aio_sigset *sig); +asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id, + long min_nr, + long nr, + struct io_event __user *events, + struct old_timespec32 __user *timeout, const struct __aio_sigset *sig); /* fs/xattr.c */ -- cgit v1.2.3 From 7cb3026411cf2b64797eb6b1caacfba6ca4258d9 Mon Sep 17 00:00:00 2001 From: Benjamin Young Date: Sat, 1 Dec 2018 08:07:11 -0800 Subject: PCI: Remove unnecessary space before function pointer arguments Make spacing more consistent in the code for function pointer declarations based on checkpatch.pl. Signed-off-by: Benjamin Young [bhelgaas: make similar changes in include/linux/pci.h] Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/portdrv.h | 16 ++++++++-------- include/linux/pci.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index e495f04394d0..fbbf00b0992e 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -71,19 +71,19 @@ static inline void *get_service_data(struct pcie_device *dev) struct pcie_port_service_driver { const char *name; - int (*probe) (struct pcie_device *dev); - void (*remove) (struct pcie_device *dev); - int (*suspend) (struct pcie_device *dev); - int (*resume_noirq) (struct pcie_device *dev); - int (*resume) (struct pcie_device *dev); - int (*runtime_suspend) (struct pcie_device *dev); - int (*runtime_resume) (struct pcie_device *dev); + int (*probe)(struct pcie_device *dev); + void (*remove)(struct pcie_device *dev); + int (*suspend)(struct pcie_device *dev); + int (*resume_noirq)(struct pcie_device *dev); + int (*resume)(struct pcie_device *dev); + int (*runtime_suspend)(struct pcie_device *dev); + int (*runtime_resume)(struct pcie_device *dev); /* Device driver may resume normal operations */ void (*error_resume)(struct pci_dev *dev); /* Link Reset Capability - AER service driver specific */ - pci_ers_result_t (*reset_link) (struct pci_dev *dev); + pci_ers_result_t (*reset_link)(struct pci_dev *dev); int port_type; /* Type of the port this driver can handle */ u32 service; /* Port service this device represents */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 11c71c4ecf75..a6cd567c3fc1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -764,9 +764,9 @@ struct pci_driver { int (*suspend)(struct pci_dev *dev, pm_message_t state); /* Device suspended */ int (*suspend_late)(struct pci_dev *dev, pm_message_t state); int (*resume_early)(struct pci_dev *dev); - int (*resume) (struct pci_dev *dev); /* Device woken up */ - void (*shutdown) (struct pci_dev *dev); - int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* On PF */ + int (*resume)(struct pci_dev *dev); /* Device woken up */ + void (*shutdown)(struct pci_dev *dev); + int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */ const struct pci_error_handlers *err_handler; const struct attribute_group **groups; struct device_driver driver; -- cgit v1.2.3 From 00f54e68924eaf075f3f24be18557899d347bc4a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 6 Dec 2018 17:05:36 +0000 Subject: net: core: dev: Add extack argument to dev_open() In order to pass extack together with NETDEV_PRE_UP notifications, it's necessary to route the extack to __dev_open() from diverse (possibly indirect) callers. One prominent API through which the notification is invoked is dev_open(). Therefore extend dev_open() with and extra extack argument and update all users. Most of the calls end up just encoding NULL, but bond and team drivers have the extack readily available. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 2 +- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +- drivers/net/ethernet/sfc/ethtool.c | 2 +- drivers/net/ethernet/sfc/falcon/ethtool.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 4 ++-- drivers/net/net_failover.c | 8 ++++---- drivers/net/team/team.c | 2 +- drivers/net/wireless/intersil/hostap/hostap_main.c | 2 +- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 2 +- drivers/staging/unisys/visornic/visornic_main.c | 2 +- include/linux/netdevice.h | 2 +- net/bluetooth/6lowpan.c | 2 +- net/core/dev.c | 5 +++-- net/core/netpoll.c | 2 +- net/ipv4/ipmr.c | 4 ++-- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6mr.c | 2 +- 23 files changed, 30 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 333387f1f1fe..6b34dbefa7dd 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1538,7 +1538,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, slave_dev->flags |= IFF_SLAVE; /* open the slave since the application closed it */ - res = dev_open(slave_dev); + res = dev_open(slave_dev, extack); if (res) { netdev_dbg(bond_dev, "Opening slave %s failed\n", slave_dev->name); goto err_restore_mac; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a5fd71692c8b..43b42615ad84 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -525,7 +525,7 @@ static int aq_set_ringparam(struct net_device *ndev, } } if (ndev_running) - err = dev_open(ndev); + err = dev_open(ndev, NULL); err_exit: return err; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index f42f7a6e1559..ebd5c2cf1efe 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -241,7 +241,7 @@ static int enic_set_ringparam(struct net_device *netdev, } enic_init_vnic_resources(enic); if (running) { - err = dev_open(netdev); + err = dev_open(netdev, NULL); if (err) goto err_out; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 774beda040a1..8e9b95871d30 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -624,7 +624,7 @@ static void hns_nic_self_test(struct net_device *ndev, clear_bit(NIC_STATE_TESTING, &priv->state); if (if_running) - (void)dev_open(ndev); + (void)dev_open(ndev, NULL); } /* Online tests aren't run; pass by default */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 4563638367ac..e678b6939da3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -821,7 +821,7 @@ static int hns3_set_ringparam(struct net_device *ndev, } if (if_running) - ret = dev_open(ndev); + ret = dev_open(ndev, NULL); return ret; } diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 3143588ffd77..600d7b895cf2 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -539,7 +539,7 @@ static void efx_ethtool_self_test(struct net_device *net_dev, /* We need rx buffers and interrupts. */ already_up = (efx->net_dev->flags & IFF_UP); if (!already_up) { - rc = dev_open(efx->net_dev); + rc = dev_open(efx->net_dev, NULL); if (rc) { netif_err(efx, drv, efx->net_dev, "failed opening device.\n"); diff --git a/drivers/net/ethernet/sfc/falcon/ethtool.c b/drivers/net/ethernet/sfc/falcon/ethtool.c index 1ccdb7a82e2a..72cedec945c1 100644 --- a/drivers/net/ethernet/sfc/falcon/ethtool.c +++ b/drivers/net/ethernet/sfc/falcon/ethtool.c @@ -517,7 +517,7 @@ static void ef4_ethtool_self_test(struct net_device *net_dev, /* We need rx buffers and interrupts. */ already_up = (efx->net_dev->flags & IFF_UP); if (!already_up) { - rc = dev_open(efx->net_dev); + rc = dev_open(efx->net_dev, NULL); if (rc) { netif_err(efx, drv, efx->net_dev, "failed opening device.\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c728ed1375b2..d20496f0ebd0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4082,7 +4082,7 @@ static void stmmac_reset_subtask(struct stmmac_priv *priv) set_bit(STMMAC_DOWN, &priv->state); dev_close(priv->dev); - dev_open(priv->dev); + dev_open(priv->dev, NULL); clear_bit(STMMAC_DOWN, &priv->state); clear_bit(STMMAC_RESETING, &priv->state); rtnl_unlock(); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 85936ed9e952..c65620adab52 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -137,7 +137,7 @@ static int netvsc_open(struct net_device *net) * slave as up. If open fails, then slave will be * still be offline (and not used). */ - ret = dev_open(vf_netdev); + ret = dev_open(vf_netdev, NULL); if (ret) netdev_warn(net, "unable to open slave: %s: %d\n", @@ -2002,7 +2002,7 @@ static void __netvsc_vf_setup(struct net_device *ndev, netif_addr_unlock_bh(ndev); if (netif_running(ndev)) { - ret = dev_open(vf_netdev); + ret = dev_open(vf_netdev, NULL); if (ret) netdev_warn(vf_netdev, "unable to open: %d\n", ret); diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index e964d312f4ca..ed1166adaa2f 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -40,14 +40,14 @@ static int net_failover_open(struct net_device *dev) primary_dev = rtnl_dereference(nfo_info->primary_dev); if (primary_dev) { - err = dev_open(primary_dev); + err = dev_open(primary_dev, NULL); if (err) goto err_primary_open; } standby_dev = rtnl_dereference(nfo_info->standby_dev); if (standby_dev) { - err = dev_open(standby_dev); + err = dev_open(standby_dev, NULL); if (err) goto err_standby_open; } @@ -517,7 +517,7 @@ static int net_failover_slave_register(struct net_device *slave_dev, dev_hold(slave_dev); if (netif_running(failover_dev)) { - err = dev_open(slave_dev); + err = dev_open(slave_dev, NULL); if (err && (err != -EBUSY)) { netdev_err(failover_dev, "Opening slave %s failed err:%d\n", slave_dev->name, err); @@ -680,7 +680,7 @@ static int net_failover_slave_name_change(struct net_device *slave_dev, /* We need to bring up the slave after the rename by udev in case * open failed with EBUSY when it was registered. */ - dev_open(slave_dev); + dev_open(slave_dev, NULL); return 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 364f514d56d8..93576e0240dd 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1212,7 +1212,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_port_enter; } - err = dev_open(port_dev); + err = dev_open(port_dev, extack); if (err) { netdev_dbg(dev, "Device %s opening failed\n", portname); diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c index 012930d35434..b0e7c0a0617e 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_main.c +++ b/drivers/net/wireless/intersil/hostap/hostap_main.c @@ -690,7 +690,7 @@ static int prism2_open(struct net_device *dev) /* Master radio interface is needed for all operation, so open * it automatically when any virtual net_device is opened. */ local->master_dev_auto_open = 1; - dev_open(local->dev); + dev_open(local->dev, NULL); } netif_device_attach(dev); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2836231c1c5d..f108d4b44605 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1007,7 +1007,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) qeth_l2_set_rx_mode(card->dev); } else { rtnl_lock(); - dev_open(card->dev); + dev_open(card->dev, NULL); rtnl_unlock(); } } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index eca68da39d05..42a7cdc59b76 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2417,7 +2417,7 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) __qeth_l3_open(card->dev); qeth_l3_set_rx_mode(card->dev); } else { - dev_open(card->dev); + dev_open(card->dev, NULL); } rtnl_unlock(); } diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index 4fa37d6e598b..daabaceeea52 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -1172,7 +1172,7 @@ static int ethsw_open(struct ethsw_core *ethsw) for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { port_priv = ethsw->ports[i]; - err = dev_open(port_priv->netdev); + err = dev_open(port_priv->netdev, NULL); if (err) { netdev_err(port_priv->netdev, "dev_open err %d\n", err); return err; diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c index 3647b8f1ed28..5eeb4b93b45b 100644 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@ -2095,7 +2095,7 @@ static int visornic_resume(struct visor_device *dev, mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2)); rtnl_lock(); - dev_open(netdev); + dev_open(netdev, NULL); rtnl_unlock(); complete_func(dev, 0); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94fb2e12f117..d79be3055f5f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2605,7 +2605,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); -int dev_open(struct net_device *dev); +int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 828e87fe8027..9d79c7de234a 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -607,7 +607,7 @@ static void ifup(struct net_device *netdev) int err; rtnl_lock(); - err = dev_open(netdev); + err = dev_open(netdev, NULL); if (err < 0) BT_INFO("iface %s cannot be opened (%d)", netdev->name, err); rtnl_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index 04a6b7100aac..b801c1aafd70 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1406,7 +1406,8 @@ static int __dev_open(struct net_device *dev) /** * dev_open - prepare an interface for use. - * @dev: device to open + * @dev: device to open + * @extack: netlink extended ack * * Takes a device from down to up state. The device's private open * function is invoked and then the multicast lists are loaded. Finally @@ -1416,7 +1417,7 @@ static int __dev_open(struct net_device *dev) * Calling this function on an active interface is a nop. On a failure * a negative errno code is returned. */ -int dev_open(struct net_device *dev) +int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) { int ret; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2b9fdbc43205..36a2b63ffd6d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -663,7 +663,7 @@ int netpoll_setup(struct netpoll *np) np_info(np, "device %s not up yet, forcing it\n", np->dev_name); - err = dev_open(ndev); + err = dev_open(ndev, NULL); if (err) { np_err(np, "failed to open %s\n", ndev->name); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5cbc749a50aa..ea04e38f56e9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -506,7 +506,7 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) dev->flags |= IFF_MULTICAST; if (!ipmr_init_vif_indev(dev)) goto failure; - if (dev_open(dev)) + if (dev_open(dev, NULL)) goto failure; dev_hold(dev); } @@ -589,7 +589,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) if (!ipmr_init_vif_indev(dev)) goto failure; - if (dev_open(dev)) + if (dev_open(dev, NULL)) goto failure; dev_hold(dev); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 045597b9a7c0..521e471f1cf9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2820,7 +2820,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) dev = __dev_get_by_name(net, p.name); if (!dev) goto err_exit; - err = dev_open(dev); + err = dev_open(dev, NULL); } } #endif diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e2ea691e42c6..8c63494400c4 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -655,7 +655,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) return NULL; } - if (dev_open(dev)) + if (dev_open(dev, NULL)) goto failure; dev_hold(dev); -- cgit v1.2.3 From 567c5e13be5cc74d24f5eb54cf353c2e2277189b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 6 Dec 2018 17:05:42 +0000 Subject: net: core: dev: Add extack argument to dev_change_flags() In order to pass extack together with NETDEV_PRE_UP notifications, it's necessary to route the extack to __dev_open() from diverse (possibly indirect) callers. One prominent API through which the notification is invoked is dev_change_flags(). Therefore extend dev_change_flags() with and extra extack argument and update all users. Most of the calls end up just encoding NULL, but several sites (VLAN, ipvlan, VRF, rtnetlink) do have extack available. Since the function declaration line is changed anyway, name the other function arguments to placate checkpatch. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 +++--- drivers/net/hyperv/netvsc_drv.c | 2 +- drivers/net/ipvlan/ipvlan_main.c | 12 ++++++++---- drivers/net/vrf.c | 4 ++-- include/linux/netdevice.h | 3 ++- net/8021q/vlan.c | 4 +++- net/core/dev.c | 4 +++- net/core/dev_ioctl.c | 2 +- net/core/net-sysfs.c | 2 +- net/core/rtnetlink.c | 3 ++- net/ipv4/devinet.c | 2 +- net/ipv4/ipconfig.c | 6 +++--- net/openvswitch/vport-geneve.c | 2 +- net/openvswitch/vport-gre.c | 2 +- net/openvswitch/vport-vxlan.c | 2 +- 15 files changed, 33 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8710214594d8..6214d8c0d546 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -167,7 +167,7 @@ int ipoib_open(struct net_device *dev) if (flags & IFF_UP) continue; - dev_change_flags(cpriv->dev, flags | IFF_UP); + dev_change_flags(cpriv->dev, flags | IFF_UP, NULL); } up_read(&priv->vlan_rwsem); } @@ -207,7 +207,7 @@ static int ipoib_stop(struct net_device *dev) if (!(flags & IFF_UP)) continue; - dev_change_flags(cpriv->dev, flags & ~IFF_UP); + dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL); } up_read(&priv->vlan_rwsem); } @@ -1823,7 +1823,7 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev) * running ensures the it will not add more work. */ rtnl_lock(); - dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP, NULL); rtnl_unlock(); /* ipoib_event() cannot be running once this returns */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c65620adab52..18b5584d6377 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1993,7 +1993,7 @@ static void __netvsc_vf_setup(struct net_device *ndev, "unable to change mtu to %u\n", ndev->mtu); /* set multicast etc flags on VF */ - dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE); + dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE, NULL); /* sync address list from ndev to VF */ netif_addr_lock_bh(ndev); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 14f1cbd3b96f..c3d3e458f541 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -85,10 +85,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, flags = ipvlan->dev->flags; if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { err = dev_change_flags(ipvlan->dev, - flags | IFF_NOARP); + flags | IFF_NOARP, + extack); } else { err = dev_change_flags(ipvlan->dev, - flags & ~IFF_NOARP); + flags & ~IFF_NOARP, + extack); } if (unlikely(err)) goto fail; @@ -117,9 +119,11 @@ fail: flags = ipvlan->dev->flags; if (port->mode == IPVLAN_MODE_L3 || port->mode == IPVLAN_MODE_L3S) - dev_change_flags(ipvlan->dev, flags | IFF_NOARP); + dev_change_flags(ipvlan->dev, flags | IFF_NOARP, + NULL); else - dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP); + dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP, + NULL); } return err; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 1e9f2dc0de07..95909e262ba4 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -756,9 +756,9 @@ static void cycle_netdev(struct net_device *dev, if (!netif_running(dev)) return; - ret = dev_change_flags(dev, flags & ~IFF_UP); + ret = dev_change_flags(dev, flags & ~IFF_UP, extack); if (ret >= 0) - ret = dev_change_flags(dev, flags); + ret = dev_change_flags(dev, flags, extack); if (ret < 0) { netdev_err(dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d79be3055f5f..18cf464450ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3612,7 +3612,8 @@ int dev_ifconf(struct net *net, struct ifconf *, int); int dev_ethtool(struct net *net, struct ifreq *); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *, unsigned int flags); -int dev_change_flags(struct net_device *, unsigned int); +int dev_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack); void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); int dev_change_name(struct net_device *, const char *); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index aef1a977279c..dc4411165e43 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -358,6 +358,7 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event) static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; @@ -460,7 +461,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) - dev_change_flags(vlandev, flgs | IFF_UP); + dev_change_flags(vlandev, flgs | IFF_UP, + extack); netif_stacked_transfer_operstate(dev, vlandev); } break; diff --git a/net/core/dev.c b/net/core/dev.c index b801c1aafd70..8bba6f98b545 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7595,11 +7595,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, * dev_change_flags - change device settings * @dev: device * @flags: device state flags + * @extack: netlink extended ack * * Change settings on device based state flags. The flags are * in the userspace exported format. */ -int dev_change_flags(struct net_device *dev, unsigned int flags) +int dev_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) { int ret; unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 90e8aa36881e..da273ec3cc57 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -234,7 +234,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) switch (cmd) { case SIOCSIFFLAGS: /* Set interface flags */ - return dev_change_flags(dev, ifr->ifr_flags); + return dev_change_flags(dev, ifr->ifr_flags, NULL); case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bd67c4d0fcfd..ff9fd2bb4ce4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -337,7 +337,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec); static int change_flags(struct net_device *dev, unsigned long new_flags) { - return dev_change_flags(dev, (unsigned int)new_flags); + return dev_change_flags(dev, (unsigned int)new_flags, NULL); } static ssize_t flags_store(struct device *dev, struct device_attribute *attr, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 98876cd1e36c..4c9e4e187600 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2489,7 +2489,8 @@ static int do_setlink(const struct sk_buff *skb, } if (ifm->ifi_flags || ifm->ifi_change) { - err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm)); + err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + extack); if (err < 0) goto errout; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a34602ae27de..5b9b6d497f71 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1100,7 +1100,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) inet_del_ifa(in_dev, ifap, 1); break; } - ret = dev_change_flags(dev, ifr->ifr_flags); + ret = dev_change_flags(dev, ifr->ifr_flags, NULL); break; case SIOCSIFADDR: /* Set interface address (and family) */ diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 88212615bf4c..55757764c381 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -220,7 +220,7 @@ static int __init ic_open_devs(void) for_each_netdev(&init_net, dev) { if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev)) continue; - if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) + if (dev_change_flags(dev, dev->flags | IFF_UP, NULL) < 0) pr_err("IP-Config: Failed to open %s\n", dev->name); } @@ -238,7 +238,7 @@ static int __init ic_open_devs(void) if (ic_proto_enabled && !able) continue; oflags = dev->flags; - if (dev_change_flags(dev, oflags | IFF_UP) < 0) { + if (dev_change_flags(dev, oflags | IFF_UP, NULL) < 0) { pr_err("IP-Config: Failed to open %s\n", dev->name); continue; @@ -315,7 +315,7 @@ static void __init ic_close_devs(void) dev = d->dev; if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); - dev_change_flags(dev, d->flags); + dev_change_flags(dev, d->flags, NULL); } kfree(d); } diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 5aaf3babfc3f..acb6077b7478 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -93,7 +93,7 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - err = dev_change_flags(dev, dev->flags | IFF_UP); + err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); if (err < 0) { rtnl_delete_link(dev); rtnl_unlock(); diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 0e72d95b0e8f..c38a62464b85 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -68,7 +68,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - err = dev_change_flags(dev, dev->flags | IFF_UP); + err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); if (err < 0) { rtnl_delete_link(dev); rtnl_unlock(); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 7e6301b2ec4d..8f16f11f7ad3 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -131,7 +131,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - err = dev_change_flags(dev, dev->flags | IFF_UP); + err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); if (err < 0) { rtnl_delete_link(dev); rtnl_unlock(); -- cgit v1.2.3 From 6d0403216d030e5623de3911168fceeaac2e14d6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 6 Dec 2018 17:05:43 +0000 Subject: net: core: dev: Add extack argument to __dev_change_flags() In order to pass extack together with NETDEV_PRE_UP notifications, it's necessary to route the extack to __dev_open() from diverse (possibly indirect) callers. The last missing API is __dev_change_flags(). Therefore extend __dev_change_flags() with and extra extack argument and update the two existing users. Since the function declaration line is changed anyway, name the struct net_device argument to placate checkpatch. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- net/core/dev.c | 5 +++-- net/core/rtnetlink.c | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 18cf464450ee..fc6ba71513be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3611,7 +3611,8 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, int dev_ifconf(struct net *net, struct ifconf *, int); int dev_ethtool(struct net *net, struct ifreq *); unsigned int dev_get_flags(const struct net_device *); -int __dev_change_flags(struct net_device *, unsigned int flags); +int __dev_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); void __dev_notify_flags(struct net_device *, unsigned int old_flags, diff --git a/net/core/dev.c b/net/core/dev.c index 8bba6f98b545..b37e320def13 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7498,7 +7498,8 @@ unsigned int dev_get_flags(const struct net_device *dev) } EXPORT_SYMBOL(dev_get_flags); -int __dev_change_flags(struct net_device *dev, unsigned int flags) +int __dev_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) { unsigned int old_flags = dev->flags; int ret; @@ -7606,7 +7607,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, int ret; unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; - ret = __dev_change_flags(dev, flags); + ret = __dev_change_flags(dev, flags, extack); if (ret < 0) return ret; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4c9e4e187600..91a0f7477f8e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2871,7 +2871,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) old_flags = dev->flags; if (ifm && (ifm->ifi_flags || ifm->ifi_change)) { - err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm)); + err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + NULL); if (err < 0) return err; } -- cgit v1.2.3 From 3fd3c80acc172fcaab2356c15e5e3c05758a9d51 Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Fri, 30 Nov 2018 13:22:04 +0200 Subject: net/mlx5: Expose packet based credit mode Packet based credit mode bit determines whether the credit mode is done per message or packet. Expose the QP creation flag and the HCA capability. Signed-off-by: Danit Goldberg Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ece1b606c909..91d6e85e3cef 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1047,7 +1047,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vector_calc[0x1]; u8 umr_ptr_rlky[0x1]; u8 imaicl[0x1]; - u8 reserved_at_232[0x4]; + u8 qp_packet_based[0x1]; + u8 reserved_at_233[0x3]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; @@ -2259,7 +2260,8 @@ struct mlx5_ifc_qpc_bits { u8 st[0x8]; u8 reserved_at_10[0x3]; u8 pm_state[0x2]; - u8 reserved_at_15[0x3]; + u8 reserved_at_15[0x1]; + u8 req_e2e_credit_mode[0x2]; u8 offload_type[0x4]; u8 end_padding_mode[0x2]; u8 reserved_at_1e[0x2]; -- cgit v1.2.3 From 2ced26078fcff26db532d6300a1b5f8ffd11a5e1 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:16 +0000 Subject: crypto: user - made crypto_user_stat optional Even if CRYPTO_STATS is set to n, some part of CRYPTO_STATS are compiled. This patch made all part of crypto_user_stat uncompiled in that case. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/Makefile | 3 ++- crypto/algapi.c | 2 ++ include/crypto/internal/cryptouser.h | 17 +++++++++++++++++ include/linux/crypto.h | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/crypto/Makefile b/crypto/Makefile index 5e789dc2d4fd..799ed5e94606 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -54,7 +54,8 @@ cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o obj-$(CONFIG_CRYPTO_USER) += crypto_user.o -crypto_user-y := crypto_user_base.o crypto_user_stat.o +crypto_user-y := crypto_user_base.o +crypto_user-$(CONFIG_CRYPTO_STATS) += crypto_user_stat.o obj-$(CONFIG_CRYPTO_CMAC) += cmac.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_VMAC) += vmac.o diff --git a/crypto/algapi.c b/crypto/algapi.c index 2545c5f89c4c..f5396c88e8cd 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -258,6 +258,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) list_add(&alg->cra_list, &crypto_alg_list); list_add(&larval->alg.cra_list, &crypto_alg_list); +#ifdef CONFIG_CRYPTO_STATS atomic_set(&alg->encrypt_cnt, 0); atomic_set(&alg->decrypt_cnt, 0); atomic64_set(&alg->encrypt_tlen, 0); @@ -265,6 +266,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) atomic_set(&alg->verify_cnt, 0); atomic_set(&alg->cipher_err_cnt, 0); atomic_set(&alg->sign_cnt, 0); +#endif out: return larval; diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h index 8db299c25566..3492ab42eefb 100644 --- a/include/crypto/internal/cryptouser.h +++ b/include/crypto/internal/cryptouser.h @@ -3,6 +3,23 @@ struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact); +#ifdef CONFIG_CRYPTO_STATS int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb); int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs); int crypto_dump_reportstat_done(struct netlink_callback *cb); +#else +static int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -ENOTSUPP; +} + +static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs) +{ + return -ENOTSUPP; +} + +static int crypto_dump_reportstat_done(struct netlink_callback *cb) +{ + return -ENOTSUPP; +} +#endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3634ad6fe202..3e05053b8d57 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -515,6 +515,7 @@ struct crypto_alg { struct module *cra_module; +#ifdef CONFIG_CRYPTO_STATS union { atomic_t encrypt_cnt; atomic_t compress_cnt; @@ -552,6 +553,7 @@ struct crypto_alg { atomic_t compute_shared_secret_cnt; }; atomic_t sign_cnt; +#endif /* CONFIG_CRYPTO_STATS */ } CRYPTO_MINALIGN_ATTR; -- cgit v1.2.3 From 6e8e72cd206e2ba68801e4f2490f639d41808c8d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:18 +0000 Subject: crypto: user - convert all stats from u32 to u64 All the 32-bit fields need to be 64-bit. In some cases, UINT32_MAX crypto operations can be done in seconds. Reported-by: Eric Biggers Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/algapi.c | 10 ++-- crypto/crypto_user_stat.c | 114 +++++++++++++++++++--------------------- include/crypto/acompress.h | 8 +-- include/crypto/aead.h | 8 +-- include/crypto/akcipher.h | 16 +++--- include/crypto/hash.h | 6 +-- include/crypto/kpp.h | 12 ++--- include/crypto/rng.h | 8 +-- include/crypto/skcipher.h | 8 +-- include/linux/crypto.h | 46 ++++++++-------- include/uapi/linux/cryptouser.h | 38 +++++++------- 11 files changed, 133 insertions(+), 141 deletions(-) (limited to 'include/linux') diff --git a/crypto/algapi.c b/crypto/algapi.c index f5396c88e8cd..42fe316f80ee 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -259,13 +259,13 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) list_add(&larval->alg.cra_list, &crypto_alg_list); #ifdef CONFIG_CRYPTO_STATS - atomic_set(&alg->encrypt_cnt, 0); - atomic_set(&alg->decrypt_cnt, 0); + atomic64_set(&alg->encrypt_cnt, 0); + atomic64_set(&alg->decrypt_cnt, 0); atomic64_set(&alg->encrypt_tlen, 0); atomic64_set(&alg->decrypt_tlen, 0); - atomic_set(&alg->verify_cnt, 0); - atomic_set(&alg->cipher_err_cnt, 0); - atomic_set(&alg->sign_cnt, 0); + atomic64_set(&alg->verify_cnt, 0); + atomic64_set(&alg->cipher_err_cnt, 0); + atomic64_set(&alg->sign_cnt, 0); #endif out: diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c index a6fb2e6f618d..352569f378a0 100644 --- a/crypto/crypto_user_stat.c +++ b/crypto/crypto_user_stat.c @@ -35,22 +35,21 @@ static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat raead; u64 v64; - u32 v32; memset(&raead, 0, sizeof(raead)); strscpy(raead.type, "aead", sizeof(raead.type)); - v32 = atomic_read(&alg->encrypt_cnt); - raead.stat_encrypt_cnt = v32; + v64 = atomic64_read(&alg->encrypt_cnt); + raead.stat_encrypt_cnt = v64; v64 = atomic64_read(&alg->encrypt_tlen); raead.stat_encrypt_tlen = v64; - v32 = atomic_read(&alg->decrypt_cnt); - raead.stat_decrypt_cnt = v32; + v64 = atomic64_read(&alg->decrypt_cnt); + raead.stat_decrypt_cnt = v64; v64 = atomic64_read(&alg->decrypt_tlen); raead.stat_decrypt_tlen = v64; - v32 = atomic_read(&alg->aead_err_cnt); - raead.stat_aead_err_cnt = v32; + v64 = atomic64_read(&alg->aead_err_cnt); + raead.stat_aead_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_AEAD, sizeof(raead), &raead); } @@ -59,22 +58,21 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat rcipher; u64 v64; - u32 v32; memset(&rcipher, 0, sizeof(rcipher)); strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - v32 = atomic_read(&alg->encrypt_cnt); - rcipher.stat_encrypt_cnt = v32; + v64 = atomic64_read(&alg->encrypt_cnt); + rcipher.stat_encrypt_cnt = v64; v64 = atomic64_read(&alg->encrypt_tlen); rcipher.stat_encrypt_tlen = v64; - v32 = atomic_read(&alg->decrypt_cnt); - rcipher.stat_decrypt_cnt = v32; + v64 = atomic64_read(&alg->decrypt_cnt); + rcipher.stat_decrypt_cnt = v64; v64 = atomic64_read(&alg->decrypt_tlen); rcipher.stat_decrypt_tlen = v64; - v32 = atomic_read(&alg->cipher_err_cnt); - rcipher.stat_cipher_err_cnt = v32; + v64 = atomic64_read(&alg->cipher_err_cnt); + rcipher.stat_cipher_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); } @@ -83,21 +81,20 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat rcomp; u64 v64; - u32 v32; memset(&rcomp, 0, sizeof(rcomp)); strscpy(rcomp.type, "compression", sizeof(rcomp.type)); - v32 = atomic_read(&alg->compress_cnt); - rcomp.stat_compress_cnt = v32; + v64 = atomic64_read(&alg->compress_cnt); + rcomp.stat_compress_cnt = v64; v64 = atomic64_read(&alg->compress_tlen); rcomp.stat_compress_tlen = v64; - v32 = atomic_read(&alg->decompress_cnt); - rcomp.stat_decompress_cnt = v32; + v64 = atomic64_read(&alg->decompress_cnt); + rcomp.stat_decompress_cnt = v64; v64 = atomic64_read(&alg->decompress_tlen); rcomp.stat_decompress_tlen = v64; - v32 = atomic_read(&alg->cipher_err_cnt); - rcomp.stat_compress_err_cnt = v32; + v64 = atomic64_read(&alg->cipher_err_cnt); + rcomp.stat_compress_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, sizeof(rcomp), &rcomp); } @@ -106,21 +103,20 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat racomp; u64 v64; - u32 v32; memset(&racomp, 0, sizeof(racomp)); strscpy(racomp.type, "acomp", sizeof(racomp.type)); - v32 = atomic_read(&alg->compress_cnt); - racomp.stat_compress_cnt = v32; + v64 = atomic64_read(&alg->compress_cnt); + racomp.stat_compress_cnt = v64; v64 = atomic64_read(&alg->compress_tlen); racomp.stat_compress_tlen = v64; - v32 = atomic_read(&alg->decompress_cnt); - racomp.stat_decompress_cnt = v32; + v64 = atomic64_read(&alg->decompress_cnt); + racomp.stat_decompress_cnt = v64; v64 = atomic64_read(&alg->decompress_tlen); racomp.stat_decompress_tlen = v64; - v32 = atomic_read(&alg->cipher_err_cnt); - racomp.stat_compress_err_cnt = v32; + v64 = atomic64_read(&alg->cipher_err_cnt); + racomp.stat_compress_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_ACOMP, sizeof(racomp), &racomp); } @@ -129,25 +125,24 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat rakcipher; u64 v64; - u32 v32; memset(&rakcipher, 0, sizeof(rakcipher)); strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); - v32 = atomic_read(&alg->encrypt_cnt); - rakcipher.stat_encrypt_cnt = v32; + v64 = atomic64_read(&alg->encrypt_cnt); + rakcipher.stat_encrypt_cnt = v64; v64 = atomic64_read(&alg->encrypt_tlen); rakcipher.stat_encrypt_tlen = v64; - v32 = atomic_read(&alg->decrypt_cnt); - rakcipher.stat_decrypt_cnt = v32; + v64 = atomic64_read(&alg->decrypt_cnt); + rakcipher.stat_decrypt_cnt = v64; v64 = atomic64_read(&alg->decrypt_tlen); rakcipher.stat_decrypt_tlen = v64; - v32 = atomic_read(&alg->sign_cnt); - rakcipher.stat_sign_cnt = v32; - v32 = atomic_read(&alg->verify_cnt); - rakcipher.stat_verify_cnt = v32; - v32 = atomic_read(&alg->akcipher_err_cnt); - rakcipher.stat_akcipher_err_cnt = v32; + v64 = atomic64_read(&alg->sign_cnt); + rakcipher.stat_sign_cnt = v64; + v64 = atomic64_read(&alg->verify_cnt); + rakcipher.stat_verify_cnt = v64; + v64 = atomic64_read(&alg->akcipher_err_cnt); + rakcipher.stat_akcipher_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, sizeof(rakcipher), &rakcipher); @@ -156,19 +151,19 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat rkpp; - u32 v; + u64 v; memset(&rkpp, 0, sizeof(rkpp)); strscpy(rkpp.type, "kpp", sizeof(rkpp.type)); - v = atomic_read(&alg->setsecret_cnt); + v = atomic64_read(&alg->setsecret_cnt); rkpp.stat_setsecret_cnt = v; - v = atomic_read(&alg->generate_public_key_cnt); + v = atomic64_read(&alg->generate_public_key_cnt); rkpp.stat_generate_public_key_cnt = v; - v = atomic_read(&alg->compute_shared_secret_cnt); + v = atomic64_read(&alg->compute_shared_secret_cnt); rkpp.stat_compute_shared_secret_cnt = v; - v = atomic_read(&alg->kpp_err_cnt); + v = atomic64_read(&alg->kpp_err_cnt); rkpp.stat_kpp_err_cnt = v; return nla_put(skb, CRYPTOCFGA_STAT_KPP, sizeof(rkpp), &rkpp); @@ -178,18 +173,17 @@ static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat rhash; u64 v64; - u32 v32; memset(&rhash, 0, sizeof(rhash)); strscpy(rhash.type, "ahash", sizeof(rhash.type)); - v32 = atomic_read(&alg->hash_cnt); - rhash.stat_hash_cnt = v32; + v64 = atomic64_read(&alg->hash_cnt); + rhash.stat_hash_cnt = v64; v64 = atomic64_read(&alg->hash_tlen); rhash.stat_hash_tlen = v64; - v32 = atomic_read(&alg->hash_err_cnt); - rhash.stat_hash_err_cnt = v32; + v64 = atomic64_read(&alg->hash_err_cnt); + rhash.stat_hash_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } @@ -198,18 +192,17 @@ static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat rhash; u64 v64; - u32 v32; memset(&rhash, 0, sizeof(rhash)); strscpy(rhash.type, "shash", sizeof(rhash.type)); - v32 = atomic_read(&alg->hash_cnt); - rhash.stat_hash_cnt = v32; + v64 = atomic64_read(&alg->hash_cnt); + rhash.stat_hash_cnt = v64; v64 = atomic64_read(&alg->hash_tlen); rhash.stat_hash_tlen = v64; - v32 = atomic_read(&alg->hash_err_cnt); - rhash.stat_hash_err_cnt = v32; + v64 = atomic64_read(&alg->hash_err_cnt); + rhash.stat_hash_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } @@ -218,20 +211,19 @@ static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_stat rrng; u64 v64; - u32 v32; memset(&rrng, 0, sizeof(rrng)); strscpy(rrng.type, "rng", sizeof(rrng.type)); - v32 = atomic_read(&alg->generate_cnt); - rrng.stat_generate_cnt = v32; + v64 = atomic64_read(&alg->generate_cnt); + rrng.stat_generate_cnt = v64; v64 = atomic64_read(&alg->generate_tlen); rrng.stat_generate_tlen = v64; - v32 = atomic_read(&alg->seed_cnt); - rrng.stat_seed_cnt = v32; - v32 = atomic_read(&alg->hash_err_cnt); - rrng.stat_rng_err_cnt = v32; + v64 = atomic64_read(&alg->seed_cnt); + rrng.stat_seed_cnt = v64; + v64 = atomic64_read(&alg->hash_err_cnt); + rrng.stat_rng_err_cnt = v64; return nla_put(skb, CRYPTOCFGA_STAT_RNG, sizeof(rrng), &rrng); } diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 22e6f412c595..f79918196811 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -240,9 +240,9 @@ static inline void crypto_stat_compress(struct acomp_req *req, int ret) struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->compress_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->compress_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->compress_cnt); + atomic64_inc(&tfm->base.__crt_alg->compress_cnt); atomic64_add(req->slen, &tfm->base.__crt_alg->compress_tlen); } #endif @@ -254,9 +254,9 @@ static inline void crypto_stat_decompress(struct acomp_req *req, int ret) struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->compress_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->compress_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->decompress_cnt); + atomic64_inc(&tfm->base.__crt_alg->decompress_cnt); atomic64_add(req->slen, &tfm->base.__crt_alg->decompress_tlen); } #endif diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 0d765d7bfb82..99afd78c665d 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -312,9 +312,9 @@ static inline void crypto_stat_aead_encrypt(struct aead_request *req, int ret) struct crypto_aead *tfm = crypto_aead_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->aead_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->aead_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->encrypt_cnt); + atomic64_inc(&tfm->base.__crt_alg->encrypt_cnt); atomic64_add(req->cryptlen, &tfm->base.__crt_alg->encrypt_tlen); } #endif @@ -326,9 +326,9 @@ static inline void crypto_stat_aead_decrypt(struct aead_request *req, int ret) struct crypto_aead *tfm = crypto_aead_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->aead_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->aead_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->decrypt_cnt); + atomic64_inc(&tfm->base.__crt_alg->decrypt_cnt); atomic64_add(req->cryptlen, &tfm->base.__crt_alg->decrypt_tlen); } #endif diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index afac71119396..3dc05cf7e0a9 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -278,9 +278,9 @@ static inline void crypto_stat_akcipher_encrypt(struct akcipher_request *req, struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->encrypt_cnt); + atomic64_inc(&tfm->base.__crt_alg->encrypt_cnt); atomic64_add(req->src_len, &tfm->base.__crt_alg->encrypt_tlen); } #endif @@ -293,9 +293,9 @@ static inline void crypto_stat_akcipher_decrypt(struct akcipher_request *req, struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->decrypt_cnt); + atomic64_inc(&tfm->base.__crt_alg->decrypt_cnt); atomic64_add(req->src_len, &tfm->base.__crt_alg->decrypt_tlen); } #endif @@ -308,9 +308,9 @@ static inline void crypto_stat_akcipher_sign(struct akcipher_request *req, struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); else - atomic_inc(&tfm->base.__crt_alg->sign_cnt); + atomic64_inc(&tfm->base.__crt_alg->sign_cnt); #endif } @@ -321,9 +321,9 @@ static inline void crypto_stat_akcipher_verify(struct akcipher_request *req, struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); else - atomic_inc(&tfm->base.__crt_alg->verify_cnt); + atomic64_inc(&tfm->base.__crt_alg->verify_cnt); #endif } diff --git a/include/crypto/hash.h b/include/crypto/hash.h index bc7796600338..52920bed05ba 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -418,7 +418,7 @@ static inline void crypto_stat_ahash_update(struct ahash_request *req, int ret) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->hash_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->hash_err_cnt); else atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen); #endif @@ -430,9 +430,9 @@ static inline void crypto_stat_ahash_final(struct ahash_request *req, int ret) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->hash_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->hash_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->hash_cnt); + atomic64_inc(&tfm->base.__crt_alg->hash_cnt); atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen); } #endif diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index f517ba6d3a27..bd5103a80919 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -272,9 +272,9 @@ static inline void crypto_stat_kpp_set_secret(struct crypto_kpp *tfm, int ret) { #ifdef CONFIG_CRYPTO_STATS if (ret) - atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->kpp_err_cnt); else - atomic_inc(&tfm->base.__crt_alg->setsecret_cnt); + atomic64_inc(&tfm->base.__crt_alg->setsecret_cnt); #endif } @@ -285,9 +285,9 @@ static inline void crypto_stat_kpp_generate_public_key(struct kpp_request *req, struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); if (ret) - atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->kpp_err_cnt); else - atomic_inc(&tfm->base.__crt_alg->generate_public_key_cnt); + atomic64_inc(&tfm->base.__crt_alg->generate_public_key_cnt); #endif } @@ -298,9 +298,9 @@ static inline void crypto_stat_kpp_compute_shared_secret(struct kpp_request *req struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); if (ret) - atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->kpp_err_cnt); else - atomic_inc(&tfm->base.__crt_alg->compute_shared_secret_cnt); + atomic64_inc(&tfm->base.__crt_alg->compute_shared_secret_cnt); #endif } diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 6d258f5b68f1..966615bba45e 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -126,9 +126,9 @@ static inline void crypto_stat_rng_seed(struct crypto_rng *tfm, int ret) { #ifdef CONFIG_CRYPTO_STATS if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic_inc(&tfm->base.__crt_alg->rng_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->rng_err_cnt); else - atomic_inc(&tfm->base.__crt_alg->seed_cnt); + atomic64_inc(&tfm->base.__crt_alg->seed_cnt); #endif } @@ -137,9 +137,9 @@ static inline void crypto_stat_rng_generate(struct crypto_rng *tfm, { #ifdef CONFIG_CRYPTO_STATS if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&tfm->base.__crt_alg->rng_err_cnt); + atomic64_inc(&tfm->base.__crt_alg->rng_err_cnt); } else { - atomic_inc(&tfm->base.__crt_alg->generate_cnt); + atomic64_inc(&tfm->base.__crt_alg->generate_cnt); atomic64_add(dlen, &tfm->base.__crt_alg->generate_tlen); } #endif diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 925f547cdcfa..dff54731ddf4 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -491,9 +491,9 @@ static inline void crypto_stat_skcipher_encrypt(struct skcipher_request *req, { #ifdef CONFIG_CRYPTO_STATS if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&alg->cipher_err_cnt); + atomic64_inc(&alg->cipher_err_cnt); } else { - atomic_inc(&alg->encrypt_cnt); + atomic64_inc(&alg->encrypt_cnt); atomic64_add(req->cryptlen, &alg->encrypt_tlen); } #endif @@ -504,9 +504,9 @@ static inline void crypto_stat_skcipher_decrypt(struct skcipher_request *req, { #ifdef CONFIG_CRYPTO_STATS if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&alg->cipher_err_cnt); + atomic64_inc(&alg->cipher_err_cnt); } else { - atomic_inc(&alg->decrypt_cnt); + atomic64_inc(&alg->decrypt_cnt); atomic64_add(req->cryptlen, &alg->decrypt_tlen); } #endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3e05053b8d57..b109b50906e7 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -517,11 +517,11 @@ struct crypto_alg { #ifdef CONFIG_CRYPTO_STATS union { - atomic_t encrypt_cnt; - atomic_t compress_cnt; - atomic_t generate_cnt; - atomic_t hash_cnt; - atomic_t setsecret_cnt; + atomic64_t encrypt_cnt; + atomic64_t compress_cnt; + atomic64_t generate_cnt; + atomic64_t hash_cnt; + atomic64_t setsecret_cnt; }; union { atomic64_t encrypt_tlen; @@ -530,29 +530,29 @@ struct crypto_alg { atomic64_t hash_tlen; }; union { - atomic_t akcipher_err_cnt; - atomic_t cipher_err_cnt; - atomic_t compress_err_cnt; - atomic_t aead_err_cnt; - atomic_t hash_err_cnt; - atomic_t rng_err_cnt; - atomic_t kpp_err_cnt; + atomic64_t akcipher_err_cnt; + atomic64_t cipher_err_cnt; + atomic64_t compress_err_cnt; + atomic64_t aead_err_cnt; + atomic64_t hash_err_cnt; + atomic64_t rng_err_cnt; + atomic64_t kpp_err_cnt; }; union { - atomic_t decrypt_cnt; - atomic_t decompress_cnt; - atomic_t seed_cnt; - atomic_t generate_public_key_cnt; + atomic64_t decrypt_cnt; + atomic64_t decompress_cnt; + atomic64_t seed_cnt; + atomic64_t generate_public_key_cnt; }; union { atomic64_t decrypt_tlen; atomic64_t decompress_tlen; }; union { - atomic_t verify_cnt; - atomic_t compute_shared_secret_cnt; + atomic64_t verify_cnt; + atomic64_t compute_shared_secret_cnt; }; - atomic_t sign_cnt; + atomic64_t sign_cnt; #endif /* CONFIG_CRYPTO_STATS */ } CRYPTO_MINALIGN_ATTR; @@ -983,9 +983,9 @@ static inline void crypto_stat_ablkcipher_encrypt(struct ablkcipher_request *req crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt); + atomic64_inc(&crt->base->base.__crt_alg->cipher_err_cnt); } else { - atomic_inc(&crt->base->base.__crt_alg->encrypt_cnt); + atomic64_inc(&crt->base->base.__crt_alg->encrypt_cnt); atomic64_add(req->nbytes, &crt->base->base.__crt_alg->encrypt_tlen); } #endif @@ -999,9 +999,9 @@ static inline void crypto_stat_ablkcipher_decrypt(struct ablkcipher_request *req crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt); + atomic64_inc(&crt->base->base.__crt_alg->cipher_err_cnt); } else { - atomic_inc(&crt->base->base.__crt_alg->decrypt_cnt); + atomic64_inc(&crt->base->base.__crt_alg->decrypt_cnt); atomic64_add(req->nbytes, &crt->base->base.__crt_alg->decrypt_tlen); } #endif diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 6dafbc3e4414..9f8187077ce4 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -79,11 +79,11 @@ struct crypto_user_alg { struct crypto_stat { char type[CRYPTO_MAX_NAME]; union { - __u32 stat_encrypt_cnt; - __u32 stat_compress_cnt; - __u32 stat_generate_cnt; - __u32 stat_hash_cnt; - __u32 stat_setsecret_cnt; + __u64 stat_encrypt_cnt; + __u64 stat_compress_cnt; + __u64 stat_generate_cnt; + __u64 stat_hash_cnt; + __u64 stat_setsecret_cnt; }; union { __u64 stat_encrypt_tlen; @@ -92,29 +92,29 @@ struct crypto_stat { __u64 stat_hash_tlen; }; union { - __u32 stat_akcipher_err_cnt; - __u32 stat_cipher_err_cnt; - __u32 stat_compress_err_cnt; - __u32 stat_aead_err_cnt; - __u32 stat_hash_err_cnt; - __u32 stat_rng_err_cnt; - __u32 stat_kpp_err_cnt; + __u64 stat_akcipher_err_cnt; + __u64 stat_cipher_err_cnt; + __u64 stat_compress_err_cnt; + __u64 stat_aead_err_cnt; + __u64 stat_hash_err_cnt; + __u64 stat_rng_err_cnt; + __u64 stat_kpp_err_cnt; }; union { - __u32 stat_decrypt_cnt; - __u32 stat_decompress_cnt; - __u32 stat_seed_cnt; - __u32 stat_generate_public_key_cnt; + __u64 stat_decrypt_cnt; + __u64 stat_decompress_cnt; + __u64 stat_seed_cnt; + __u64 stat_generate_public_key_cnt; }; union { __u64 stat_decrypt_tlen; __u64 stat_decompress_tlen; }; union { - __u32 stat_verify_cnt; - __u32 stat_compute_shared_secret_cnt; + __u64 stat_verify_cnt; + __u64 stat_compute_shared_secret_cnt; }; - __u32 stat_sign_cnt; + __u64 stat_sign_cnt; }; struct crypto_report_larval { -- cgit v1.2.3 From f7d76e05d058b832b373237566cc1af8251371b5 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:21 +0000 Subject: crypto: user - fix use_after_free of struct xxx_request All crypto_stats functions use the struct xxx_request for feeding stats, but in some case this structure could already be freed. For fixing this, the needed parameters (len and alg) will be stored before the request being executed. Fixes: cac5818c25d0 ("crypto: user - Implement a generic crypto statistics") Reported-by: syzbot Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/ahash.c | 17 +++- crypto/algapi.c | 233 +++++++++++++++++++++++++++++++++++++++++++++ crypto/rng.c | 4 +- include/crypto/acompress.h | 38 ++------ include/crypto/aead.h | 38 ++------ include/crypto/akcipher.h | 74 +++----------- include/crypto/hash.h | 32 +------ include/crypto/kpp.h | 48 ++-------- include/crypto/rng.h | 27 +----- include/crypto/skcipher.h | 36 ++----- include/linux/crypto.h | 105 +++++++++++++------- 11 files changed, 376 insertions(+), 276 deletions(-) (limited to 'include/linux') diff --git a/crypto/ahash.c b/crypto/ahash.c index 3a348fbcf8f9..5d320a811f75 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -364,20 +364,28 @@ static int crypto_ahash_op(struct ahash_request *req, int crypto_ahash_final(struct ahash_request *req) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final); - crypto_stat_ahash_final(req, ret); + crypto_stats_ahash_final(nbytes, ret, alg); return ret; } EXPORT_SYMBOL_GPL(crypto_ahash_final); int crypto_ahash_finup(struct ahash_request *req) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup); - crypto_stat_ahash_final(req, ret); + crypto_stats_ahash_final(nbytes, ret, alg); return ret; } EXPORT_SYMBOL_GPL(crypto_ahash_finup); @@ -385,13 +393,16 @@ EXPORT_SYMBOL_GPL(crypto_ahash_finup); int crypto_ahash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = crypto_ahash_op(req, tfm->digest); - crypto_stat_ahash_final(req, ret); + crypto_stats_ahash_final(nbytes, ret, alg); return ret; } EXPORT_SYMBOL_GPL(crypto_ahash_digest); diff --git a/crypto/algapi.c b/crypto/algapi.c index 42fe316f80ee..4c1e6079d271 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -1078,6 +1078,239 @@ int crypto_type_has_alg(const char *name, const struct crypto_type *frontend, } EXPORT_SYMBOL_GPL(crypto_type_has_alg); +#ifdef CONFIG_CRYPTO_STATS +void crypto_stats_get(struct crypto_alg *alg) +{ + crypto_alg_get(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_get); + +void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->cipher_err_cnt); + } else { + atomic64_inc(&alg->encrypt_cnt); + atomic64_add(nbytes, &alg->encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_encrypt); + +void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->cipher_err_cnt); + } else { + atomic64_inc(&alg->decrypt_cnt); + atomic64_add(nbytes, &alg->decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_decrypt); + +void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, + int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->aead_err_cnt); + } else { + atomic64_inc(&alg->encrypt_cnt); + atomic64_add(cryptlen, &alg->encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_aead_encrypt); + +void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, + int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->aead_err_cnt); + } else { + atomic64_inc(&alg->decrypt_cnt); + atomic64_add(cryptlen, &alg->decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_aead_decrypt); + +void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->akcipher_err_cnt); + } else { + atomic64_inc(&alg->encrypt_cnt); + atomic64_add(src_len, &alg->encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_encrypt); + +void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->akcipher_err_cnt); + } else { + atomic64_inc(&alg->decrypt_cnt); + atomic64_add(src_len, &alg->decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_decrypt); + +void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->akcipher_err_cnt); + else + atomic64_inc(&alg->sign_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_sign); + +void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->akcipher_err_cnt); + else + atomic64_inc(&alg->verify_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_akcipher_verify); + +void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->compress_err_cnt); + } else { + atomic64_inc(&alg->compress_cnt); + atomic64_add(slen, &alg->compress_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_compress); + +void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->compress_err_cnt); + } else { + atomic64_inc(&alg->decompress_cnt); + atomic64_add(slen, &alg->decompress_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_decompress); + +void crypto_stats_ahash_update(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->hash_err_cnt); + else + atomic64_add(nbytes, &alg->hash_tlen); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ahash_update); + +void crypto_stats_ahash_final(unsigned int nbytes, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->hash_err_cnt); + } else { + atomic64_inc(&alg->hash_cnt); + atomic64_add(nbytes, &alg->hash_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_ahash_final); + +void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret) +{ + if (ret) + atomic64_inc(&alg->kpp_err_cnt); + else + atomic64_inc(&alg->setsecret_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_kpp_set_secret); + +void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret) +{ + if (ret) + atomic64_inc(&alg->kpp_err_cnt); + else + atomic64_inc(&alg->generate_public_key_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_kpp_generate_public_key); + +void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret) +{ + if (ret) + atomic64_inc(&alg->kpp_err_cnt); + else + atomic64_inc(&alg->compute_shared_secret_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_kpp_compute_shared_secret); + +void crypto_stats_rng_seed(struct crypto_alg *alg, int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) + atomic64_inc(&alg->rng_err_cnt); + else + atomic64_inc(&alg->seed_cnt); + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_rng_seed); + +void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, + int ret) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->rng_err_cnt); + } else { + atomic64_inc(&alg->generate_cnt); + atomic64_add(dlen, &alg->generate_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_rng_generate); + +void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->cipher_err_cnt); + } else { + atomic64_inc(&alg->encrypt_cnt); + atomic64_add(cryptlen, &alg->encrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_skcipher_encrypt); + +void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, + struct crypto_alg *alg) +{ + if (ret && ret != -EINPROGRESS && ret != -EBUSY) { + atomic64_inc(&alg->cipher_err_cnt); + } else { + atomic64_inc(&alg->decrypt_cnt); + atomic64_add(cryptlen, &alg->decrypt_tlen); + } + crypto_alg_put(alg); +} +EXPORT_SYMBOL_GPL(crypto_stats_skcipher_decrypt); +#endif + static int __init crypto_algapi_init(void) { crypto_init_proc(); diff --git a/crypto/rng.c b/crypto/rng.c index 2406501b90b7..33c38a72bff5 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -35,9 +35,11 @@ static int crypto_default_rng_refcnt; int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { + struct crypto_alg *alg = tfm->base.__crt_alg; u8 *buf = NULL; int err; + crypto_stats_get(alg); if (!seed && slen) { buf = kmalloc(slen, GFP_KERNEL); if (!buf) @@ -50,7 +52,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) } err = crypto_rng_alg(tfm)->seed(tfm, seed, slen); - crypto_stat_rng_seed(tfm, err); + crypto_stats_rng_seed(alg, err); out: kzfree(buf); return err; diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index f79918196811..a3e766dff917 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -234,34 +234,6 @@ static inline void acomp_request_set_params(struct acomp_req *req, req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT; } -static inline void crypto_stat_compress(struct acomp_req *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->compress_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->compress_cnt); - atomic64_add(req->slen, &tfm->base.__crt_alg->compress_tlen); - } -#endif -} - -static inline void crypto_stat_decompress(struct acomp_req *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->compress_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->decompress_cnt); - atomic64_add(req->slen, &tfm->base.__crt_alg->decompress_tlen); - } -#endif -} - /** * crypto_acomp_compress() -- Invoke asynchronous compress operation * @@ -274,10 +246,13 @@ static inline void crypto_stat_decompress(struct acomp_req *req, int ret) static inline int crypto_acomp_compress(struct acomp_req *req) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int slen = req->slen; int ret; + crypto_stats_get(alg); ret = tfm->compress(req); - crypto_stat_compress(req, ret); + crypto_stats_compress(slen, ret, alg); return ret; } @@ -293,10 +268,13 @@ static inline int crypto_acomp_compress(struct acomp_req *req) static inline int crypto_acomp_decompress(struct acomp_req *req) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int slen = req->slen; int ret; + crypto_stats_get(alg); ret = tfm->decompress(req); - crypto_stat_decompress(req, ret); + crypto_stats_decompress(slen, ret, alg); return ret; } diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 99afd78c665d..b7b8d24cf765 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -306,34 +306,6 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) return __crypto_aead_cast(req->base.tfm); } -static inline void crypto_stat_aead_encrypt(struct aead_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->aead_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->encrypt_cnt); - atomic64_add(req->cryptlen, &tfm->base.__crt_alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_aead_decrypt(struct aead_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->aead_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->decrypt_cnt); - atomic64_add(req->cryptlen, &tfm->base.__crt_alg->decrypt_tlen); - } -#endif -} - /** * crypto_aead_encrypt() - encrypt plaintext * @req: reference to the aead_request handle that holds all information @@ -356,13 +328,16 @@ static inline void crypto_stat_aead_decrypt(struct aead_request *req, int ret) static inline int crypto_aead_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_alg *alg = aead->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = crypto_aead_alg(aead)->encrypt(req); - crypto_stat_aead_encrypt(req, ret); + crypto_stats_aead_encrypt(cryptlen, alg, ret); return ret; } @@ -391,15 +366,18 @@ static inline int crypto_aead_encrypt(struct aead_request *req) static inline int crypto_aead_decrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_alg *alg = aead->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else if (req->cryptlen < crypto_aead_authsize(aead)) ret = -EINVAL; else ret = crypto_aead_alg(aead)->decrypt(req); - crypto_stat_aead_decrypt(req, ret); + crypto_stats_aead_decrypt(cryptlen, alg, ret); return ret; } diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index 3dc05cf7e0a9..2d690494568c 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -271,62 +271,6 @@ static inline unsigned int crypto_akcipher_maxsize(struct crypto_akcipher *tfm) return alg->max_size(tfm); } -static inline void crypto_stat_akcipher_encrypt(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->encrypt_cnt); - atomic64_add(req->src_len, &tfm->base.__crt_alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_akcipher_decrypt(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->decrypt_cnt); - atomic64_add(req->src_len, &tfm->base.__crt_alg->decrypt_tlen); - } -#endif -} - -static inline void crypto_stat_akcipher_sign(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - else - atomic64_inc(&tfm->base.__crt_alg->sign_cnt); -#endif -} - -static inline void crypto_stat_akcipher_verify(struct akcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&tfm->base.__crt_alg->akcipher_err_cnt); - else - atomic64_inc(&tfm->base.__crt_alg->verify_cnt); -#endif -} - /** * crypto_akcipher_encrypt() - Invoke public key encrypt operation * @@ -341,10 +285,13 @@ static inline int crypto_akcipher_encrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; + unsigned int src_len = req->src_len; int ret; + crypto_stats_get(calg); ret = alg->encrypt(req); - crypto_stat_akcipher_encrypt(req, ret); + crypto_stats_akcipher_encrypt(src_len, ret, calg); return ret; } @@ -362,10 +309,13 @@ static inline int crypto_akcipher_decrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; + unsigned int src_len = req->src_len; int ret; + crypto_stats_get(calg); ret = alg->decrypt(req); - crypto_stat_akcipher_decrypt(req, ret); + crypto_stats_akcipher_decrypt(src_len, ret, calg); return ret; } @@ -383,10 +333,12 @@ static inline int crypto_akcipher_sign(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->sign(req); - crypto_stat_akcipher_sign(req, ret); + crypto_stats_akcipher_sign(ret, calg); return ret; } @@ -404,10 +356,12 @@ static inline int crypto_akcipher_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->verify(req); - crypto_stat_akcipher_verify(req, ret); + crypto_stats_akcipher_verify(ret, calg); return ret; } diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 52920bed05ba..3b31c1b349ae 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -412,32 +412,6 @@ static inline void *ahash_request_ctx(struct ahash_request *req) int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); -static inline void crypto_stat_ahash_update(struct ahash_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&tfm->base.__crt_alg->hash_err_cnt); - else - atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen); -#endif -} - -static inline void crypto_stat_ahash_final(struct ahash_request *req, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->hash_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->hash_cnt); - atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen); - } -#endif -} - /** * crypto_ahash_finup() - update and finalize message digest * @req: reference to the ahash_request handle that holds all information @@ -552,10 +526,14 @@ static inline int crypto_ahash_init(struct ahash_request *req) */ static inline int crypto_ahash_update(struct ahash_request *req) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crypto_ahash_reqtfm(req)->update(req); - crypto_stat_ahash_update(req, ret); + crypto_stats_ahash_update(nbytes, ret, alg); return ret; } diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index bd5103a80919..1a97e1601422 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -268,42 +268,6 @@ struct kpp_secret { unsigned short len; }; -static inline void crypto_stat_kpp_set_secret(struct crypto_kpp *tfm, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret) - atomic64_inc(&tfm->base.__crt_alg->kpp_err_cnt); - else - atomic64_inc(&tfm->base.__crt_alg->setsecret_cnt); -#endif -} - -static inline void crypto_stat_kpp_generate_public_key(struct kpp_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - - if (ret) - atomic64_inc(&tfm->base.__crt_alg->kpp_err_cnt); - else - atomic64_inc(&tfm->base.__crt_alg->generate_public_key_cnt); -#endif -} - -static inline void crypto_stat_kpp_compute_shared_secret(struct kpp_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - - if (ret) - atomic64_inc(&tfm->base.__crt_alg->kpp_err_cnt); - else - atomic64_inc(&tfm->base.__crt_alg->compute_shared_secret_cnt); -#endif -} - /** * crypto_kpp_set_secret() - Invoke kpp operation * @@ -323,10 +287,12 @@ static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, const void *buffer, unsigned int len) { struct kpp_alg *alg = crypto_kpp_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->set_secret(tfm, buffer, len); - crypto_stat_kpp_set_secret(tfm, ret); + crypto_stats_kpp_set_secret(calg, ret); return ret; } @@ -347,10 +313,12 @@ static inline int crypto_kpp_generate_public_key(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct kpp_alg *alg = crypto_kpp_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->generate_public_key(req); - crypto_stat_kpp_generate_public_key(req, ret); + crypto_stats_kpp_generate_public_key(calg, ret); return ret; } @@ -368,10 +336,12 @@ static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct kpp_alg *alg = crypto_kpp_alg(tfm); + struct crypto_alg *calg = tfm->base.__crt_alg; int ret; + crypto_stats_get(calg); ret = alg->compute_shared_secret(req); - crypto_stat_kpp_compute_shared_secret(req, ret); + crypto_stats_kpp_compute_shared_secret(calg, ret); return ret; } diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 966615bba45e..022a1b896b47 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -122,29 +122,6 @@ static inline void crypto_free_rng(struct crypto_rng *tfm) crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm)); } -static inline void crypto_stat_rng_seed(struct crypto_rng *tfm, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&tfm->base.__crt_alg->rng_err_cnt); - else - atomic64_inc(&tfm->base.__crt_alg->seed_cnt); -#endif -} - -static inline void crypto_stat_rng_generate(struct crypto_rng *tfm, - unsigned int dlen, int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&tfm->base.__crt_alg->rng_err_cnt); - } else { - atomic64_inc(&tfm->base.__crt_alg->generate_cnt); - atomic64_add(dlen, &tfm->base.__crt_alg->generate_tlen); - } -#endif -} - /** * crypto_rng_generate() - get random number * @tfm: cipher handle @@ -163,10 +140,12 @@ static inline int crypto_rng_generate(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { + struct crypto_alg *alg = tfm->base.__crt_alg; int ret; + crypto_stats_get(alg); ret = crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen); - crypto_stat_rng_generate(tfm, dlen, ret); + crypto_stats_rng_generate(alg, dlen, ret); return ret; } diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index dff54731ddf4..480f8301a47d 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -486,32 +486,6 @@ static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm( return container_of(tfm, struct crypto_sync_skcipher, base); } -static inline void crypto_stat_skcipher_encrypt(struct skcipher_request *req, - int ret, struct crypto_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->cipher_err_cnt); - } else { - atomic64_inc(&alg->encrypt_cnt); - atomic64_add(req->cryptlen, &alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_skcipher_decrypt(struct skcipher_request *req, - int ret, struct crypto_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->cipher_err_cnt); - } else { - atomic64_inc(&alg->decrypt_cnt); - atomic64_add(req->cryptlen, &alg->decrypt_tlen); - } -#endif -} - /** * crypto_skcipher_encrypt() - encrypt plaintext * @req: reference to the skcipher_request handle that holds all information @@ -526,13 +500,16 @@ static inline void crypto_stat_skcipher_decrypt(struct skcipher_request *req, static inline int crypto_skcipher_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = tfm->encrypt(req); - crypto_stat_skcipher_encrypt(req, ret, tfm->base.__crt_alg); + crypto_stats_skcipher_encrypt(cryptlen, ret, alg); return ret; } @@ -550,13 +527,16 @@ static inline int crypto_skcipher_encrypt(struct skcipher_request *req) static inline int crypto_skcipher_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_alg *alg = tfm->base.__crt_alg; + unsigned int cryptlen = req->cryptlen; int ret; + crypto_stats_get(alg); if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else ret = tfm->decrypt(req); - crypto_stat_skcipher_decrypt(req, ret, tfm->base.__crt_alg); + crypto_stats_skcipher_decrypt(cryptlen, ret, alg); return ret; } diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b109b50906e7..e2fd24714e00 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -557,6 +557,69 @@ struct crypto_alg { } CRYPTO_MINALIGN_ATTR; +#ifdef CONFIG_CRYPTO_STATS +void crypto_stats_get(struct crypto_alg *alg); +void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); +void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); +void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg); +void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg); +void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg); +void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg); +void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret); +void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret); +void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret); +void crypto_stats_rng_seed(struct crypto_alg *alg, int ret); +void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret); +void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); +void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); +#else +static inline void crypto_stats_get(struct crypto_alg *alg) +{} +static inline void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_rng_seed(struct crypto_alg *alg, int ret) +{} +static inline void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret) +{} +static inline void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) +{} +static inline void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) +{} +#endif /* * A helper struct for waiting for completion of async crypto ops */ @@ -975,38 +1038,6 @@ static inline struct crypto_ablkcipher *crypto_ablkcipher_reqtfm( return __crypto_ablkcipher_cast(req->base.tfm); } -static inline void crypto_stat_ablkcipher_encrypt(struct ablkcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&crt->base->base.__crt_alg->cipher_err_cnt); - } else { - atomic64_inc(&crt->base->base.__crt_alg->encrypt_cnt); - atomic64_add(req->nbytes, &crt->base->base.__crt_alg->encrypt_tlen); - } -#endif -} - -static inline void crypto_stat_ablkcipher_decrypt(struct ablkcipher_request *req, - int ret) -{ -#ifdef CONFIG_CRYPTO_STATS - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); - - if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&crt->base->base.__crt_alg->cipher_err_cnt); - } else { - atomic64_inc(&crt->base->base.__crt_alg->decrypt_cnt); - atomic64_add(req->nbytes, &crt->base->base.__crt_alg->decrypt_tlen); - } -#endif -} - /** * crypto_ablkcipher_encrypt() - encrypt plaintext * @req: reference to the ablkcipher_request handle that holds all information @@ -1022,10 +1053,13 @@ static inline int crypto_ablkcipher_encrypt(struct ablkcipher_request *req) { struct ablkcipher_tfm *crt = crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); + struct crypto_alg *alg = crt->base->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crt->encrypt(req); - crypto_stat_ablkcipher_encrypt(req, ret); + crypto_stats_ablkcipher_encrypt(nbytes, ret, alg); return ret; } @@ -1044,10 +1078,13 @@ static inline int crypto_ablkcipher_decrypt(struct ablkcipher_request *req) { struct ablkcipher_tfm *crt = crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); + struct crypto_alg *alg = crt->base->base.__crt_alg; + unsigned int nbytes = req->nbytes; int ret; + crypto_stats_get(alg); ret = crt->decrypt(req); - crypto_stat_ablkcipher_decrypt(req, ret); + crypto_stats_ablkcipher_decrypt(nbytes, ret, alg); return ret; } -- cgit v1.2.3 From 17c18f9e33282a170458cb5ea20759bfcb0da7d8 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:24 +0000 Subject: crypto: user - Split stats in multiple structures Like for userspace, this patch splits stats into multiple structures, one for each algorithm class. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/algapi.c | 108 +++++++++++++--------------- crypto/crypto_user_stat.c | 82 ++++++++++----------- include/linux/crypto.h | 180 ++++++++++++++++++++++++++++++---------------- 3 files changed, 210 insertions(+), 160 deletions(-) (limited to 'include/linux') diff --git a/crypto/algapi.c b/crypto/algapi.c index 4c1e6079d271..a8cb5aed0069 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -259,13 +259,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) list_add(&larval->alg.cra_list, &crypto_alg_list); #ifdef CONFIG_CRYPTO_STATS - atomic64_set(&alg->encrypt_cnt, 0); - atomic64_set(&alg->decrypt_cnt, 0); - atomic64_set(&alg->encrypt_tlen, 0); - atomic64_set(&alg->decrypt_tlen, 0); - atomic64_set(&alg->verify_cnt, 0); - atomic64_set(&alg->cipher_err_cnt, 0); - atomic64_set(&alg->sign_cnt, 0); + memset(&alg->stats, 0, sizeof(alg->stats)); #endif out: @@ -1089,10 +1083,10 @@ void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.cipher_err_cnt); } else { - atomic64_inc(&alg->encrypt_cnt); - atomic64_add(nbytes, &alg->encrypt_tlen); + atomic64_inc(&alg->stats.cipher.encrypt_cnt); + atomic64_add(nbytes, &alg->stats.cipher.encrypt_tlen); } crypto_alg_put(alg); } @@ -1102,10 +1096,10 @@ void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.cipher_err_cnt); } else { - atomic64_inc(&alg->decrypt_cnt); - atomic64_add(nbytes, &alg->decrypt_tlen); + atomic64_inc(&alg->stats.cipher.decrypt_cnt); + atomic64_add(nbytes, &alg->stats.cipher.decrypt_tlen); } crypto_alg_put(alg); } @@ -1115,10 +1109,10 @@ void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->aead_err_cnt); + atomic64_inc(&alg->stats.aead.aead_err_cnt); } else { - atomic64_inc(&alg->encrypt_cnt); - atomic64_add(cryptlen, &alg->encrypt_tlen); + atomic64_inc(&alg->stats.aead.encrypt_cnt); + atomic64_add(cryptlen, &alg->stats.aead.encrypt_tlen); } crypto_alg_put(alg); } @@ -1128,10 +1122,10 @@ void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->aead_err_cnt); + atomic64_inc(&alg->stats.aead.aead_err_cnt); } else { - atomic64_inc(&alg->decrypt_cnt); - atomic64_add(cryptlen, &alg->decrypt_tlen); + atomic64_inc(&alg->stats.aead.decrypt_cnt); + atomic64_add(cryptlen, &alg->stats.aead.decrypt_tlen); } crypto_alg_put(alg); } @@ -1141,10 +1135,10 @@ void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); } else { - atomic64_inc(&alg->encrypt_cnt); - atomic64_add(src_len, &alg->encrypt_tlen); + atomic64_inc(&alg->stats.akcipher.encrypt_cnt); + atomic64_add(src_len, &alg->stats.akcipher.encrypt_tlen); } crypto_alg_put(alg); } @@ -1154,10 +1148,10 @@ void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); } else { - atomic64_inc(&alg->decrypt_cnt); - atomic64_add(src_len, &alg->decrypt_tlen); + atomic64_inc(&alg->stats.akcipher.decrypt_cnt); + atomic64_add(src_len, &alg->stats.akcipher.decrypt_tlen); } crypto_alg_put(alg); } @@ -1166,9 +1160,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_akcipher_decrypt); void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); else - atomic64_inc(&alg->sign_cnt); + atomic64_inc(&alg->stats.akcipher.sign_cnt); crypto_alg_put(alg); } EXPORT_SYMBOL_GPL(crypto_stats_akcipher_sign); @@ -1176,9 +1170,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_akcipher_sign); void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); else - atomic64_inc(&alg->verify_cnt); + atomic64_inc(&alg->stats.akcipher.verify_cnt); crypto_alg_put(alg); } EXPORT_SYMBOL_GPL(crypto_stats_akcipher_verify); @@ -1186,10 +1180,10 @@ EXPORT_SYMBOL_GPL(crypto_stats_akcipher_verify); void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->compress_err_cnt); + atomic64_inc(&alg->stats.compress.compress_err_cnt); } else { - atomic64_inc(&alg->compress_cnt); - atomic64_add(slen, &alg->compress_tlen); + atomic64_inc(&alg->stats.compress.compress_cnt); + atomic64_add(slen, &alg->stats.compress.compress_tlen); } crypto_alg_put(alg); } @@ -1198,10 +1192,10 @@ EXPORT_SYMBOL_GPL(crypto_stats_compress); void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->compress_err_cnt); + atomic64_inc(&alg->stats.compress.compress_err_cnt); } else { - atomic64_inc(&alg->decompress_cnt); - atomic64_add(slen, &alg->decompress_tlen); + atomic64_inc(&alg->stats.compress.decompress_cnt); + atomic64_add(slen, &alg->stats.compress.decompress_tlen); } crypto_alg_put(alg); } @@ -1211,9 +1205,9 @@ void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->hash_err_cnt); + atomic64_inc(&alg->stats.hash.hash_err_cnt); else - atomic64_add(nbytes, &alg->hash_tlen); + atomic64_add(nbytes, &alg->stats.hash.hash_tlen); crypto_alg_put(alg); } EXPORT_SYMBOL_GPL(crypto_stats_ahash_update); @@ -1222,10 +1216,10 @@ void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->hash_err_cnt); + atomic64_inc(&alg->stats.hash.hash_err_cnt); } else { - atomic64_inc(&alg->hash_cnt); - atomic64_add(nbytes, &alg->hash_tlen); + atomic64_inc(&alg->stats.hash.hash_cnt); + atomic64_add(nbytes, &alg->stats.hash.hash_tlen); } crypto_alg_put(alg); } @@ -1234,9 +1228,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_ahash_final); void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret) { if (ret) - atomic64_inc(&alg->kpp_err_cnt); + atomic64_inc(&alg->stats.kpp.kpp_err_cnt); else - atomic64_inc(&alg->setsecret_cnt); + atomic64_inc(&alg->stats.kpp.setsecret_cnt); crypto_alg_put(alg); } EXPORT_SYMBOL_GPL(crypto_stats_kpp_set_secret); @@ -1244,9 +1238,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_kpp_set_secret); void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret) { if (ret) - atomic64_inc(&alg->kpp_err_cnt); + atomic64_inc(&alg->stats.kpp.kpp_err_cnt); else - atomic64_inc(&alg->generate_public_key_cnt); + atomic64_inc(&alg->stats.kpp.generate_public_key_cnt); crypto_alg_put(alg); } EXPORT_SYMBOL_GPL(crypto_stats_kpp_generate_public_key); @@ -1254,9 +1248,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_kpp_generate_public_key); void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret) { if (ret) - atomic64_inc(&alg->kpp_err_cnt); + atomic64_inc(&alg->stats.kpp.kpp_err_cnt); else - atomic64_inc(&alg->compute_shared_secret_cnt); + atomic64_inc(&alg->stats.kpp.compute_shared_secret_cnt); crypto_alg_put(alg); } EXPORT_SYMBOL_GPL(crypto_stats_kpp_compute_shared_secret); @@ -1264,9 +1258,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_kpp_compute_shared_secret); void crypto_stats_rng_seed(struct crypto_alg *alg, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->rng_err_cnt); + atomic64_inc(&alg->stats.rng.rng_err_cnt); else - atomic64_inc(&alg->seed_cnt); + atomic64_inc(&alg->stats.rng.seed_cnt); crypto_alg_put(alg); } EXPORT_SYMBOL_GPL(crypto_stats_rng_seed); @@ -1275,10 +1269,10 @@ void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->rng_err_cnt); + atomic64_inc(&alg->stats.rng.rng_err_cnt); } else { - atomic64_inc(&alg->generate_cnt); - atomic64_add(dlen, &alg->generate_tlen); + atomic64_inc(&alg->stats.rng.generate_cnt); + atomic64_add(dlen, &alg->stats.rng.generate_tlen); } crypto_alg_put(alg); } @@ -1288,10 +1282,10 @@ void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.cipher_err_cnt); } else { - atomic64_inc(&alg->encrypt_cnt); - atomic64_add(cryptlen, &alg->encrypt_tlen); + atomic64_inc(&alg->stats.cipher.encrypt_cnt); + atomic64_add(cryptlen, &alg->stats.cipher.encrypt_tlen); } crypto_alg_put(alg); } @@ -1301,10 +1295,10 @@ void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.cipher_err_cnt); } else { - atomic64_inc(&alg->decrypt_cnt); - atomic64_add(cryptlen, &alg->decrypt_tlen); + atomic64_inc(&alg->stats.cipher.decrypt_cnt); + atomic64_add(cryptlen, &alg->stats.cipher.decrypt_tlen); } crypto_alg_put(alg); } diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c index 7b668c659122..113bf1691560 100644 --- a/crypto/crypto_user_stat.c +++ b/crypto/crypto_user_stat.c @@ -39,11 +39,11 @@ static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg) strscpy(raead.type, "aead", sizeof(raead.type)); - raead.stat_encrypt_cnt = atomic64_read(&alg->encrypt_cnt); - raead.stat_encrypt_tlen = atomic64_read(&alg->encrypt_tlen); - raead.stat_decrypt_cnt = atomic64_read(&alg->decrypt_cnt); - raead.stat_decrypt_tlen = atomic64_read(&alg->decrypt_tlen); - raead.stat_aead_err_cnt = atomic64_read(&alg->aead_err_cnt); + raead.stat_encrypt_cnt = atomic64_read(&alg->stats.aead.encrypt_cnt); + raead.stat_encrypt_tlen = atomic64_read(&alg->stats.aead.encrypt_tlen); + raead.stat_decrypt_cnt = atomic64_read(&alg->stats.aead.decrypt_cnt); + raead.stat_decrypt_tlen = atomic64_read(&alg->stats.aead.decrypt_tlen); + raead.stat_aead_err_cnt = atomic64_read(&alg->stats.aead.aead_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_AEAD, sizeof(raead), &raead); } @@ -56,11 +56,11 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - rcipher.stat_encrypt_cnt = atomic64_read(&alg->encrypt_cnt); - rcipher.stat_encrypt_tlen = atomic64_read(&alg->encrypt_tlen); - rcipher.stat_decrypt_cnt = atomic64_read(&alg->decrypt_cnt); - rcipher.stat_decrypt_tlen = atomic64_read(&alg->decrypt_tlen); - rcipher.stat_cipher_err_cnt = atomic64_read(&alg->cipher_err_cnt); + rcipher.stat_encrypt_cnt = atomic64_read(&alg->stats.cipher.encrypt_cnt); + rcipher.stat_encrypt_tlen = atomic64_read(&alg->stats.cipher.encrypt_tlen); + rcipher.stat_decrypt_cnt = atomic64_read(&alg->stats.cipher.decrypt_cnt); + rcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.cipher.decrypt_tlen); + rcipher.stat_cipher_err_cnt = atomic64_read(&alg->stats.cipher.cipher_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); } @@ -72,11 +72,11 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) memset(&rcomp, 0, sizeof(rcomp)); strscpy(rcomp.type, "compression", sizeof(rcomp.type)); - rcomp.stat_compress_cnt = atomic64_read(&alg->compress_cnt); - rcomp.stat_compress_tlen = atomic64_read(&alg->compress_tlen); - rcomp.stat_decompress_cnt = atomic64_read(&alg->decompress_cnt); - rcomp.stat_decompress_tlen = atomic64_read(&alg->decompress_tlen); - rcomp.stat_compress_err_cnt = atomic64_read(&alg->compress_err_cnt); + rcomp.stat_compress_cnt = atomic64_read(&alg->stats.compress.compress_cnt); + rcomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen); + rcomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt); + rcomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen); + rcomp.stat_compress_err_cnt = atomic64_read(&alg->stats.compress.compress_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, sizeof(rcomp), &rcomp); } @@ -88,11 +88,11 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) memset(&racomp, 0, sizeof(racomp)); strscpy(racomp.type, "acomp", sizeof(racomp.type)); - racomp.stat_compress_cnt = atomic64_read(&alg->compress_cnt); - racomp.stat_compress_tlen = atomic64_read(&alg->compress_tlen); - racomp.stat_decompress_cnt = atomic64_read(&alg->decompress_cnt); - racomp.stat_decompress_tlen = atomic64_read(&alg->decompress_tlen); - racomp.stat_compress_err_cnt = atomic64_read(&alg->compress_err_cnt); + racomp.stat_compress_cnt = atomic64_read(&alg->stats.compress.compress_cnt); + racomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen); + racomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt); + racomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen); + racomp.stat_compress_err_cnt = atomic64_read(&alg->stats.compress.compress_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_ACOMP, sizeof(racomp), &racomp); } @@ -104,13 +104,13 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) memset(&rakcipher, 0, sizeof(rakcipher)); strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); - rakcipher.stat_encrypt_cnt = atomic64_read(&alg->encrypt_cnt); - rakcipher.stat_encrypt_tlen = atomic64_read(&alg->encrypt_tlen); - rakcipher.stat_decrypt_cnt = atomic64_read(&alg->decrypt_cnt); - rakcipher.stat_decrypt_tlen = atomic64_read(&alg->decrypt_tlen); - rakcipher.stat_sign_cnt = atomic64_read(&alg->sign_cnt); - rakcipher.stat_verify_cnt = atomic64_read(&alg->verify_cnt); - rakcipher.stat_akcipher_err_cnt = atomic64_read(&alg->akcipher_err_cnt); + rakcipher.stat_encrypt_cnt = atomic64_read(&alg->stats.akcipher.encrypt_cnt); + rakcipher.stat_encrypt_tlen = atomic64_read(&alg->stats.akcipher.encrypt_tlen); + rakcipher.stat_decrypt_cnt = atomic64_read(&alg->stats.akcipher.decrypt_cnt); + rakcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.akcipher.decrypt_tlen); + rakcipher.stat_sign_cnt = atomic64_read(&alg->stats.akcipher.sign_cnt); + rakcipher.stat_verify_cnt = atomic64_read(&alg->stats.akcipher.verify_cnt); + rakcipher.stat_akcipher_err_cnt = atomic64_read(&alg->stats.akcipher.akcipher_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, sizeof(rakcipher), &rakcipher); @@ -124,10 +124,10 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) strscpy(rkpp.type, "kpp", sizeof(rkpp.type)); - rkpp.stat_setsecret_cnt = atomic64_read(&alg->setsecret_cnt); - rkpp.stat_generate_public_key_cnt = atomic64_read(&alg->generate_public_key_cnt); - rkpp.stat_compute_shared_secret_cnt = atomic64_read(&alg->compute_shared_secret_cnt); - rkpp.stat_kpp_err_cnt = atomic64_read(&alg->kpp_err_cnt); + rkpp.stat_setsecret_cnt = atomic64_read(&alg->stats.kpp.setsecret_cnt); + rkpp.stat_generate_public_key_cnt = atomic64_read(&alg->stats.kpp.generate_public_key_cnt); + rkpp.stat_compute_shared_secret_cnt = atomic64_read(&alg->stats.kpp.compute_shared_secret_cnt); + rkpp.stat_kpp_err_cnt = atomic64_read(&alg->stats.kpp.kpp_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_KPP, sizeof(rkpp), &rkpp); } @@ -140,9 +140,9 @@ static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg) strscpy(rhash.type, "ahash", sizeof(rhash.type)); - rhash.stat_hash_cnt = atomic64_read(&alg->hash_cnt); - rhash.stat_hash_tlen = atomic64_read(&alg->hash_tlen); - rhash.stat_hash_err_cnt = atomic64_read(&alg->hash_err_cnt); + rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt); + rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen); + rhash.stat_hash_err_cnt = atomic64_read(&alg->stats.hash.hash_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } @@ -155,9 +155,9 @@ static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg) strscpy(rhash.type, "shash", sizeof(rhash.type)); - rhash.stat_hash_cnt = atomic64_read(&alg->hash_cnt); - rhash.stat_hash_tlen = atomic64_read(&alg->hash_tlen); - rhash.stat_hash_err_cnt = atomic64_read(&alg->hash_err_cnt); + rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt); + rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen); + rhash.stat_hash_err_cnt = atomic64_read(&alg->stats.hash.hash_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } @@ -170,10 +170,10 @@ static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg) strscpy(rrng.type, "rng", sizeof(rrng.type)); - rrng.stat_generate_cnt = atomic64_read(&alg->generate_cnt); - rrng.stat_generate_tlen = atomic64_read(&alg->generate_tlen); - rrng.stat_seed_cnt = atomic64_read(&alg->seed_cnt); - rrng.stat_rng_err_cnt = atomic64_read(&alg->rng_err_cnt); + rrng.stat_generate_cnt = atomic64_read(&alg->stats.rng.generate_cnt); + rrng.stat_generate_tlen = atomic64_read(&alg->stats.rng.generate_tlen); + rrng.stat_seed_cnt = atomic64_read(&alg->stats.rng.seed_cnt); + rrng.stat_rng_err_cnt = atomic64_read(&alg->stats.rng.rng_err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_RNG, sizeof(rrng), &rrng); } diff --git a/include/linux/crypto.h b/include/linux/crypto.h index e2fd24714e00..8a46ab35479e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -369,6 +369,115 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; +#ifdef CONFIG_CRYPTO_STATS +/* + * struct crypto_istat_aead - statistics for AEAD algorithm + * @encrypt_cnt: number of encrypt requests + * @encrypt_tlen: total data size handled by encrypt requests + * @decrypt_cnt: number of decrypt requests + * @decrypt_tlen: total data size handled by decrypt requests + * @aead_err_cnt: number of error for AEAD requests + */ +struct crypto_istat_aead { + atomic64_t encrypt_cnt; + atomic64_t encrypt_tlen; + atomic64_t decrypt_cnt; + atomic64_t decrypt_tlen; + atomic64_t aead_err_cnt; +}; + +/* + * struct crypto_istat_akcipher - statistics for akcipher algorithm + * @encrypt_cnt: number of encrypt requests + * @encrypt_tlen: total data size handled by encrypt requests + * @decrypt_cnt: number of decrypt requests + * @decrypt_tlen: total data size handled by decrypt requests + * @verify_cnt: number of verify operation + * @sign_cnt: number of sign requests + * @akcipher_err_cnt: number of error for akcipher requests + */ +struct crypto_istat_akcipher { + atomic64_t encrypt_cnt; + atomic64_t encrypt_tlen; + atomic64_t decrypt_cnt; + atomic64_t decrypt_tlen; + atomic64_t verify_cnt; + atomic64_t sign_cnt; + atomic64_t akcipher_err_cnt; +}; + +/* + * struct crypto_istat_cipher - statistics for cipher algorithm + * @encrypt_cnt: number of encrypt requests + * @encrypt_tlen: total data size handled by encrypt requests + * @decrypt_cnt: number of decrypt requests + * @decrypt_tlen: total data size handled by decrypt requests + * @cipher_err_cnt: number of error for cipher requests + */ +struct crypto_istat_cipher { + atomic64_t encrypt_cnt; + atomic64_t encrypt_tlen; + atomic64_t decrypt_cnt; + atomic64_t decrypt_tlen; + atomic64_t cipher_err_cnt; +}; + +/* + * struct crypto_istat_compress - statistics for compress algorithm + * @compress_cnt: number of compress requests + * @compress_tlen: total data size handled by compress requests + * @decompress_cnt: number of decompress requests + * @decompress_tlen: total data size handled by decompress requests + * @compress_err_cnt: number of error for compress requests + */ +struct crypto_istat_compress { + atomic64_t compress_cnt; + atomic64_t compress_tlen; + atomic64_t decompress_cnt; + atomic64_t decompress_tlen; + atomic64_t compress_err_cnt; +}; + +/* + * struct crypto_istat_hash - statistics for has algorithm + * @hash_cnt: number of hash requests + * @hash_tlen: total data size hashed + * @hash_err_cnt: number of error for hash requests + */ +struct crypto_istat_hash { + atomic64_t hash_cnt; + atomic64_t hash_tlen; + atomic64_t hash_err_cnt; +}; + +/* + * struct crypto_istat_kpp - statistics for KPP algorithm + * @setsecret_cnt: number of setsecrey operation + * @generate_public_key_cnt: number of generate_public_key operation + * @compute_shared_secret_cnt: number of compute_shared_secret operation + * @kpp_err_cnt: number of error for KPP requests + */ +struct crypto_istat_kpp { + atomic64_t setsecret_cnt; + atomic64_t generate_public_key_cnt; + atomic64_t compute_shared_secret_cnt; + atomic64_t kpp_err_cnt; +}; + +/* + * struct crypto_istat_rng: statistics for RNG algorithm + * @generate_cnt: number of RNG generate requests + * @generate_tlen: total data size of generated data by the RNG + * @seed_cnt: number of times the RNG was seeded + * @rng_err_cnt: number of error for RNG requests + */ +struct crypto_istat_rng { + atomic64_t generate_cnt; + atomic64_t generate_tlen; + atomic64_t seed_cnt; + atomic64_t rng_err_cnt; +}; +#endif /* CONFIG_CRYPTO_STATS */ #define cra_ablkcipher cra_u.ablkcipher #define cra_blkcipher cra_u.blkcipher @@ -454,32 +563,7 @@ struct compress_alg { * @cra_refcnt: internally used * @cra_destroy: internally used * - * All following statistics are for this crypto_alg - * @encrypt_cnt: number of encrypt requests - * @decrypt_cnt: number of decrypt requests - * @compress_cnt: number of compress requests - * @decompress_cnt: number of decompress requests - * @generate_cnt: number of RNG generate requests - * @seed_cnt: number of times the rng was seeded - * @hash_cnt: number of hash requests - * @sign_cnt: number of sign requests - * @setsecret_cnt: number of setsecrey operation - * @generate_public_key_cnt: number of generate_public_key operation - * @verify_cnt: number of verify operation - * @compute_shared_secret_cnt: number of compute_shared_secret operation - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @compress_tlen: total data size handled by compress requests - * @decompress_tlen: total data size handled by decompress requests - * @generate_tlen: total data size of generated data by the RNG - * @hash_tlen: total data size hashed - * @akcipher_err_cnt: number of error for akcipher requests - * @cipher_err_cnt: number of error for akcipher requests - * @compress_err_cnt: number of error for akcipher requests - * @aead_err_cnt: number of error for akcipher requests - * @hash_err_cnt: number of error for akcipher requests - * @rng_err_cnt: number of error for akcipher requests - * @kpp_err_cnt: number of error for akcipher requests + * @stats: union of all possible crypto_istat_xxx structures * * The struct crypto_alg describes a generic Crypto API algorithm and is common * for all of the transformations. Any variable not documented here shall not @@ -517,42 +601,14 @@ struct crypto_alg { #ifdef CONFIG_CRYPTO_STATS union { - atomic64_t encrypt_cnt; - atomic64_t compress_cnt; - atomic64_t generate_cnt; - atomic64_t hash_cnt; - atomic64_t setsecret_cnt; - }; - union { - atomic64_t encrypt_tlen; - atomic64_t compress_tlen; - atomic64_t generate_tlen; - atomic64_t hash_tlen; - }; - union { - atomic64_t akcipher_err_cnt; - atomic64_t cipher_err_cnt; - atomic64_t compress_err_cnt; - atomic64_t aead_err_cnt; - atomic64_t hash_err_cnt; - atomic64_t rng_err_cnt; - atomic64_t kpp_err_cnt; - }; - union { - atomic64_t decrypt_cnt; - atomic64_t decompress_cnt; - atomic64_t seed_cnt; - atomic64_t generate_public_key_cnt; - }; - union { - atomic64_t decrypt_tlen; - atomic64_t decompress_tlen; - }; - union { - atomic64_t verify_cnt; - atomic64_t compute_shared_secret_cnt; - }; - atomic64_t sign_cnt; + struct crypto_istat_aead aead; + struct crypto_istat_akcipher akcipher; + struct crypto_istat_cipher cipher; + struct crypto_istat_compress compress; + struct crypto_istat_hash hash; + struct crypto_istat_rng rng; + struct crypto_istat_kpp kpp; + } stats; #endif /* CONFIG_CRYPTO_STATS */ } CRYPTO_MINALIGN_ATTR; -- cgit v1.2.3 From 44f13133cb03ec32fc88a533673248ef5c0617e3 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:25 +0000 Subject: crypto: user - rename err_cnt parameter Since now all crypto stats are on their own structures, it is now useless to have the algorithm name in the err_cnt member. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/algapi.c | 38 +++++++++++++++++++------------------- crypto/crypto_user_stat.c | 18 +++++++++--------- include/linux/crypto.h | 28 ++++++++++++++-------------- include/uapi/linux/cryptouser.h | 14 +++++++------- tools/crypto/getstat.c | 18 +++++++++--------- 5 files changed, 58 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/crypto/algapi.c b/crypto/algapi.c index a8cb5aed0069..c0d4f9ef6b0f 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -1083,7 +1083,7 @@ void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.cipher.cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.err_cnt); } else { atomic64_inc(&alg->stats.cipher.encrypt_cnt); atomic64_add(nbytes, &alg->stats.cipher.encrypt_tlen); @@ -1096,7 +1096,7 @@ void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.cipher.cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.err_cnt); } else { atomic64_inc(&alg->stats.cipher.decrypt_cnt); atomic64_add(nbytes, &alg->stats.cipher.decrypt_tlen); @@ -1109,7 +1109,7 @@ void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.aead.aead_err_cnt); + atomic64_inc(&alg->stats.aead.err_cnt); } else { atomic64_inc(&alg->stats.aead.encrypt_cnt); atomic64_add(cryptlen, &alg->stats.aead.encrypt_tlen); @@ -1122,7 +1122,7 @@ void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.aead.aead_err_cnt); + atomic64_inc(&alg->stats.aead.err_cnt); } else { atomic64_inc(&alg->stats.aead.decrypt_cnt); atomic64_add(cryptlen, &alg->stats.aead.decrypt_tlen); @@ -1135,7 +1135,7 @@ void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.err_cnt); } else { atomic64_inc(&alg->stats.akcipher.encrypt_cnt); atomic64_add(src_len, &alg->stats.akcipher.encrypt_tlen); @@ -1148,7 +1148,7 @@ void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.err_cnt); } else { atomic64_inc(&alg->stats.akcipher.decrypt_cnt); atomic64_add(src_len, &alg->stats.akcipher.decrypt_tlen); @@ -1160,7 +1160,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_akcipher_decrypt); void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.err_cnt); else atomic64_inc(&alg->stats.akcipher.sign_cnt); crypto_alg_put(alg); @@ -1170,7 +1170,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_akcipher_sign); void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->stats.akcipher.akcipher_err_cnt); + atomic64_inc(&alg->stats.akcipher.err_cnt); else atomic64_inc(&alg->stats.akcipher.verify_cnt); crypto_alg_put(alg); @@ -1180,7 +1180,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_akcipher_verify); void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.compress.compress_err_cnt); + atomic64_inc(&alg->stats.compress.err_cnt); } else { atomic64_inc(&alg->stats.compress.compress_cnt); atomic64_add(slen, &alg->stats.compress.compress_tlen); @@ -1192,7 +1192,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_compress); void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.compress.compress_err_cnt); + atomic64_inc(&alg->stats.compress.err_cnt); } else { atomic64_inc(&alg->stats.compress.decompress_cnt); atomic64_add(slen, &alg->stats.compress.decompress_tlen); @@ -1205,7 +1205,7 @@ void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->stats.hash.hash_err_cnt); + atomic64_inc(&alg->stats.hash.err_cnt); else atomic64_add(nbytes, &alg->stats.hash.hash_tlen); crypto_alg_put(alg); @@ -1216,7 +1216,7 @@ void crypto_stats_ahash_final(unsigned int nbytes, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.hash.hash_err_cnt); + atomic64_inc(&alg->stats.hash.err_cnt); } else { atomic64_inc(&alg->stats.hash.hash_cnt); atomic64_add(nbytes, &alg->stats.hash.hash_tlen); @@ -1228,7 +1228,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_ahash_final); void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret) { if (ret) - atomic64_inc(&alg->stats.kpp.kpp_err_cnt); + atomic64_inc(&alg->stats.kpp.err_cnt); else atomic64_inc(&alg->stats.kpp.setsecret_cnt); crypto_alg_put(alg); @@ -1238,7 +1238,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_kpp_set_secret); void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret) { if (ret) - atomic64_inc(&alg->stats.kpp.kpp_err_cnt); + atomic64_inc(&alg->stats.kpp.err_cnt); else atomic64_inc(&alg->stats.kpp.generate_public_key_cnt); crypto_alg_put(alg); @@ -1248,7 +1248,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_kpp_generate_public_key); void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret) { if (ret) - atomic64_inc(&alg->stats.kpp.kpp_err_cnt); + atomic64_inc(&alg->stats.kpp.err_cnt); else atomic64_inc(&alg->stats.kpp.compute_shared_secret_cnt); crypto_alg_put(alg); @@ -1258,7 +1258,7 @@ EXPORT_SYMBOL_GPL(crypto_stats_kpp_compute_shared_secret); void crypto_stats_rng_seed(struct crypto_alg *alg, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) - atomic64_inc(&alg->stats.rng.rng_err_cnt); + atomic64_inc(&alg->stats.rng.err_cnt); else atomic64_inc(&alg->stats.rng.seed_cnt); crypto_alg_put(alg); @@ -1269,7 +1269,7 @@ void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int ret) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.rng.rng_err_cnt); + atomic64_inc(&alg->stats.rng.err_cnt); } else { atomic64_inc(&alg->stats.rng.generate_cnt); atomic64_add(dlen, &alg->stats.rng.generate_tlen); @@ -1282,7 +1282,7 @@ void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.cipher.cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.err_cnt); } else { atomic64_inc(&alg->stats.cipher.encrypt_cnt); atomic64_add(cryptlen, &alg->stats.cipher.encrypt_tlen); @@ -1295,7 +1295,7 @@ void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg) { if (ret && ret != -EINPROGRESS && ret != -EBUSY) { - atomic64_inc(&alg->stats.cipher.cipher_err_cnt); + atomic64_inc(&alg->stats.cipher.err_cnt); } else { atomic64_inc(&alg->stats.cipher.decrypt_cnt); atomic64_add(cryptlen, &alg->stats.cipher.decrypt_tlen); diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c index 113bf1691560..0ba00aaeb810 100644 --- a/crypto/crypto_user_stat.c +++ b/crypto/crypto_user_stat.c @@ -43,7 +43,7 @@ static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg) raead.stat_encrypt_tlen = atomic64_read(&alg->stats.aead.encrypt_tlen); raead.stat_decrypt_cnt = atomic64_read(&alg->stats.aead.decrypt_cnt); raead.stat_decrypt_tlen = atomic64_read(&alg->stats.aead.decrypt_tlen); - raead.stat_aead_err_cnt = atomic64_read(&alg->stats.aead.aead_err_cnt); + raead.stat_err_cnt = atomic64_read(&alg->stats.aead.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_AEAD, sizeof(raead), &raead); } @@ -60,7 +60,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) rcipher.stat_encrypt_tlen = atomic64_read(&alg->stats.cipher.encrypt_tlen); rcipher.stat_decrypt_cnt = atomic64_read(&alg->stats.cipher.decrypt_cnt); rcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.cipher.decrypt_tlen); - rcipher.stat_cipher_err_cnt = atomic64_read(&alg->stats.cipher.cipher_err_cnt); + rcipher.stat_err_cnt = atomic64_read(&alg->stats.cipher.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); } @@ -76,7 +76,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) rcomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen); rcomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt); rcomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen); - rcomp.stat_compress_err_cnt = atomic64_read(&alg->stats.compress.compress_err_cnt); + rcomp.stat_err_cnt = atomic64_read(&alg->stats.compress.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, sizeof(rcomp), &rcomp); } @@ -92,7 +92,7 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) racomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen); racomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt); racomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen); - racomp.stat_compress_err_cnt = atomic64_read(&alg->stats.compress.compress_err_cnt); + racomp.stat_err_cnt = atomic64_read(&alg->stats.compress.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_ACOMP, sizeof(racomp), &racomp); } @@ -110,7 +110,7 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) rakcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.akcipher.decrypt_tlen); rakcipher.stat_sign_cnt = atomic64_read(&alg->stats.akcipher.sign_cnt); rakcipher.stat_verify_cnt = atomic64_read(&alg->stats.akcipher.verify_cnt); - rakcipher.stat_akcipher_err_cnt = atomic64_read(&alg->stats.akcipher.akcipher_err_cnt); + rakcipher.stat_err_cnt = atomic64_read(&alg->stats.akcipher.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, sizeof(rakcipher), &rakcipher); @@ -127,7 +127,7 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) rkpp.stat_setsecret_cnt = atomic64_read(&alg->stats.kpp.setsecret_cnt); rkpp.stat_generate_public_key_cnt = atomic64_read(&alg->stats.kpp.generate_public_key_cnt); rkpp.stat_compute_shared_secret_cnt = atomic64_read(&alg->stats.kpp.compute_shared_secret_cnt); - rkpp.stat_kpp_err_cnt = atomic64_read(&alg->stats.kpp.kpp_err_cnt); + rkpp.stat_err_cnt = atomic64_read(&alg->stats.kpp.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_KPP, sizeof(rkpp), &rkpp); } @@ -142,7 +142,7 @@ static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg) rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt); rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen); - rhash.stat_hash_err_cnt = atomic64_read(&alg->stats.hash.hash_err_cnt); + rhash.stat_err_cnt = atomic64_read(&alg->stats.hash.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } @@ -157,7 +157,7 @@ static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg) rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt); rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen); - rhash.stat_hash_err_cnt = atomic64_read(&alg->stats.hash.hash_err_cnt); + rhash.stat_err_cnt = atomic64_read(&alg->stats.hash.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); } @@ -173,7 +173,7 @@ static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg) rrng.stat_generate_cnt = atomic64_read(&alg->stats.rng.generate_cnt); rrng.stat_generate_tlen = atomic64_read(&alg->stats.rng.generate_tlen); rrng.stat_seed_cnt = atomic64_read(&alg->stats.rng.seed_cnt); - rrng.stat_rng_err_cnt = atomic64_read(&alg->stats.rng.rng_err_cnt); + rrng.stat_err_cnt = atomic64_read(&alg->stats.rng.err_cnt); return nla_put(skb, CRYPTOCFGA_STAT_RNG, sizeof(rrng), &rrng); } diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 8a46ab35479e..a2967c1a08b1 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -376,14 +376,14 @@ struct compress_alg { * @encrypt_tlen: total data size handled by encrypt requests * @decrypt_cnt: number of decrypt requests * @decrypt_tlen: total data size handled by decrypt requests - * @aead_err_cnt: number of error for AEAD requests + * @err_cnt: number of error for AEAD requests */ struct crypto_istat_aead { atomic64_t encrypt_cnt; atomic64_t encrypt_tlen; atomic64_t decrypt_cnt; atomic64_t decrypt_tlen; - atomic64_t aead_err_cnt; + atomic64_t err_cnt; }; /* @@ -394,7 +394,7 @@ struct crypto_istat_aead { * @decrypt_tlen: total data size handled by decrypt requests * @verify_cnt: number of verify operation * @sign_cnt: number of sign requests - * @akcipher_err_cnt: number of error for akcipher requests + * @err_cnt: number of error for akcipher requests */ struct crypto_istat_akcipher { atomic64_t encrypt_cnt; @@ -403,7 +403,7 @@ struct crypto_istat_akcipher { atomic64_t decrypt_tlen; atomic64_t verify_cnt; atomic64_t sign_cnt; - atomic64_t akcipher_err_cnt; + atomic64_t err_cnt; }; /* @@ -412,14 +412,14 @@ struct crypto_istat_akcipher { * @encrypt_tlen: total data size handled by encrypt requests * @decrypt_cnt: number of decrypt requests * @decrypt_tlen: total data size handled by decrypt requests - * @cipher_err_cnt: number of error for cipher requests + * @err_cnt: number of error for cipher requests */ struct crypto_istat_cipher { atomic64_t encrypt_cnt; atomic64_t encrypt_tlen; atomic64_t decrypt_cnt; atomic64_t decrypt_tlen; - atomic64_t cipher_err_cnt; + atomic64_t err_cnt; }; /* @@ -428,26 +428,26 @@ struct crypto_istat_cipher { * @compress_tlen: total data size handled by compress requests * @decompress_cnt: number of decompress requests * @decompress_tlen: total data size handled by decompress requests - * @compress_err_cnt: number of error for compress requests + * @err_cnt: number of error for compress requests */ struct crypto_istat_compress { atomic64_t compress_cnt; atomic64_t compress_tlen; atomic64_t decompress_cnt; atomic64_t decompress_tlen; - atomic64_t compress_err_cnt; + atomic64_t err_cnt; }; /* * struct crypto_istat_hash - statistics for has algorithm * @hash_cnt: number of hash requests * @hash_tlen: total data size hashed - * @hash_err_cnt: number of error for hash requests + * @err_cnt: number of error for hash requests */ struct crypto_istat_hash { atomic64_t hash_cnt; atomic64_t hash_tlen; - atomic64_t hash_err_cnt; + atomic64_t err_cnt; }; /* @@ -455,13 +455,13 @@ struct crypto_istat_hash { * @setsecret_cnt: number of setsecrey operation * @generate_public_key_cnt: number of generate_public_key operation * @compute_shared_secret_cnt: number of compute_shared_secret operation - * @kpp_err_cnt: number of error for KPP requests + * @err_cnt: number of error for KPP requests */ struct crypto_istat_kpp { atomic64_t setsecret_cnt; atomic64_t generate_public_key_cnt; atomic64_t compute_shared_secret_cnt; - atomic64_t kpp_err_cnt; + atomic64_t err_cnt; }; /* @@ -469,13 +469,13 @@ struct crypto_istat_kpp { * @generate_cnt: number of RNG generate requests * @generate_tlen: total data size of generated data by the RNG * @seed_cnt: number of times the RNG was seeded - * @rng_err_cnt: number of error for RNG requests + * @err_cnt: number of error for RNG requests */ struct crypto_istat_rng { atomic64_t generate_cnt; atomic64_t generate_tlen; atomic64_t seed_cnt; - atomic64_t rng_err_cnt; + atomic64_t err_cnt; }; #endif /* CONFIG_CRYPTO_STATS */ diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 3a70f025e27d..4dc1603919ce 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -82,7 +82,7 @@ struct crypto_stat_aead { __u64 stat_encrypt_tlen; __u64 stat_decrypt_cnt; __u64 stat_decrypt_tlen; - __u64 stat_aead_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_akcipher { @@ -93,7 +93,7 @@ struct crypto_stat_akcipher { __u64 stat_decrypt_tlen; __u64 stat_verify_cnt; __u64 stat_sign_cnt; - __u64 stat_akcipher_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_cipher { @@ -102,7 +102,7 @@ struct crypto_stat_cipher { __u64 stat_encrypt_tlen; __u64 stat_decrypt_cnt; __u64 stat_decrypt_tlen; - __u64 stat_cipher_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_compress { @@ -111,14 +111,14 @@ struct crypto_stat_compress { __u64 stat_compress_tlen; __u64 stat_decompress_cnt; __u64 stat_decompress_tlen; - __u64 stat_compress_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_hash { char type[CRYPTO_MAX_NAME]; __u64 stat_hash_cnt; __u64 stat_hash_tlen; - __u64 stat_hash_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_kpp { @@ -126,7 +126,7 @@ struct crypto_stat_kpp { __u64 stat_setsecret_cnt; __u64 stat_generate_public_key_cnt; __u64 stat_compute_shared_secret_cnt; - __u64 stat_kpp_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_rng { @@ -134,7 +134,7 @@ struct crypto_stat_rng { __u64 stat_generate_cnt; __u64 stat_generate_tlen; __u64 stat_seed_cnt; - __u64 stat_rng_err_cnt; + __u64 stat_err_cnt; }; struct crypto_stat_larval { diff --git a/tools/crypto/getstat.c b/tools/crypto/getstat.c index 57fbb94608d4..9e8ff76420fa 100644 --- a/tools/crypto/getstat.c +++ b/tools/crypto/getstat.c @@ -157,7 +157,7 @@ static int get_stat(const char *drivername) printf("%s\tHash\n\tHash: %llu bytes: %llu\n\tErrors: %llu\n", drivername, rhash->stat_hash_cnt, rhash->stat_hash_tlen, - rhash->stat_hash_err_cnt); + rhash->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_COMPRESS]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_COMPRESS]; struct crypto_stat_compress *rblk = @@ -166,7 +166,7 @@ static int get_stat(const char *drivername) drivername, rblk->stat_compress_cnt, rblk->stat_compress_tlen, rblk->stat_decompress_cnt, rblk->stat_decompress_tlen, - rblk->stat_compress_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_ACOMP]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_ACOMP]; struct crypto_stat_compress *rcomp = @@ -175,7 +175,7 @@ static int get_stat(const char *drivername) drivername, rcomp->stat_compress_cnt, rcomp->stat_compress_tlen, rcomp->stat_decompress_cnt, rcomp->stat_decompress_tlen, - rcomp->stat_compress_err_cnt); + rcomp->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_AEAD]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_AEAD]; struct crypto_stat_aead *raead = @@ -184,7 +184,7 @@ static int get_stat(const char *drivername) drivername, raead->stat_encrypt_cnt, raead->stat_encrypt_tlen, raead->stat_decrypt_cnt, raead->stat_decrypt_tlen, - raead->stat_aead_err_cnt); + raead->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_BLKCIPHER]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_BLKCIPHER]; struct crypto_stat_cipher *rblk = @@ -193,7 +193,7 @@ static int get_stat(const char *drivername) drivername, rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen, rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen, - rblk->stat_cipher_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_AKCIPHER]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_AKCIPHER]; struct crypto_stat_akcipher *rblk = @@ -203,7 +203,7 @@ static int get_stat(const char *drivername) rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen, rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen, rblk->stat_sign_cnt, rblk->stat_verify_cnt, - rblk->stat_akcipher_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_CIPHER]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_CIPHER]; struct crypto_stat_cipher *rblk = @@ -212,7 +212,7 @@ static int get_stat(const char *drivername) drivername, rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen, rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen, - rblk->stat_cipher_err_cnt); + rblk->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_RNG]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_RNG]; struct crypto_stat_rng *rrng = @@ -221,7 +221,7 @@ static int get_stat(const char *drivername) drivername, rrng->stat_seed_cnt, rrng->stat_generate_cnt, rrng->stat_generate_tlen, - rrng->stat_rng_err_cnt); + rrng->stat_err_cnt); } else if (tb[CRYPTOCFGA_STAT_KPP]) { struct rtattr *rta = tb[CRYPTOCFGA_STAT_KPP]; struct crypto_stat_kpp *rkpp = @@ -231,7 +231,7 @@ static int get_stat(const char *drivername) rkpp->stat_setsecret_cnt, rkpp->stat_generate_public_key_cnt, rkpp->stat_compute_shared_secret_cnt, - rkpp->stat_kpp_err_cnt); + rkpp->stat_err_cnt); } else { fprintf(stderr, "%s is of an unknown algorithm\n", drivername); } -- cgit v1.2.3 From 1f6669b9716c6c98391b0f756e060892b32b8ca7 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 29 Nov 2018 14:42:26 +0000 Subject: crypto: user - Add crypto_stats_init This patch add the crypto_stats_init() function. This will permit to remove some ifdef from __crypto_register_alg(). Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/algapi.c | 10 +++++++--- include/linux/crypto.h | 3 +++ 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/crypto/algapi.c b/crypto/algapi.c index c0d4f9ef6b0f..8b65ada33e5d 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -258,9 +258,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) list_add(&alg->cra_list, &crypto_alg_list); list_add(&larval->alg.cra_list, &crypto_alg_list); -#ifdef CONFIG_CRYPTO_STATS - memset(&alg->stats, 0, sizeof(alg->stats)); -#endif + crypto_stats_init(alg); out: return larval; @@ -1073,6 +1071,12 @@ int crypto_type_has_alg(const char *name, const struct crypto_type *frontend, EXPORT_SYMBOL_GPL(crypto_type_has_alg); #ifdef CONFIG_CRYPTO_STATS +void crypto_stats_init(struct crypto_alg *alg) +{ + memset(&alg->stats, 0, sizeof(alg->stats)); +} +EXPORT_SYMBOL_GPL(crypto_stats_init); + void crypto_stats_get(struct crypto_alg *alg) { crypto_alg_get(alg); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index a2967c1a08b1..9850b41e38ae 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -614,6 +614,7 @@ struct crypto_alg { } CRYPTO_MINALIGN_ATTR; #ifdef CONFIG_CRYPTO_STATS +void crypto_stats_init(struct crypto_alg *alg); void crypto_stats_get(struct crypto_alg *alg); void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); @@ -635,6 +636,8 @@ void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen, int re void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret, struct crypto_alg *alg); #else +static inline void crypto_stats_init(struct crypto_alg *alg) +{} static inline void crypto_stats_get(struct crypto_alg *alg) {} static inline void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) -- cgit v1.2.3 From 10949af1681d5bb5cdbcc012815c6e40eec17d02 Mon Sep 17 00:00:00 2001 From: Schrempf Frieder Date: Thu, 8 Nov 2018 08:32:11 +0000 Subject: mtd: spinand: Add initial support for Toshiba TC58CVG2S0H MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add minimal support for the Toshiba TC58CVG2S0H SPI NAND chip. Signed-off-by: Frieder Schrempf Acked-by: Clément Péron Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/toshiba.c | 137 +++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/toshiba.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index b74e074b363a..be5f73512ece 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -spinand-objs := core.o macronix.o micron.o winbond.o +spinand-objs := core.o macronix.o micron.o toshiba.o winbond.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 30f83649c481..87bdf2a7b724 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -766,6 +766,7 @@ static const struct nand_ops spinand_ops = { static const struct spinand_manufacturer *spinand_manufacturers[] = { ¯onix_spinand_manufacturer, µn_spinand_manufacturer, + &toshiba_spinand_manufacturer, &winbond_spinand_manufacturer, }; diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c new file mode 100644 index 000000000000..081265557e70 --- /dev/null +++ b/drivers/mtd/nand/spi/toshiba.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018 exceet electronics GmbH + * Copyright (c) 2018 Kontron Electronics GmbH + * + * Author: Frieder Schrempf + */ + +#include +#include +#include + +#define SPINAND_MFR_TOSHIBA 0x98 +#define TOSH_STATUS_ECC_HAS_BITFLIPS_T (3 << 4) + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD(false, 0, NULL, 0)); + +static int tc58cvg2s0h_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 7) + return -ERANGE; + + region->offset = 128 + 16 * section; + region->length = 16; + + return 0; +} + +static int tc58cvg2s0h_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 0) + return -ERANGE; + + /* 2 bytes reserved for BBM */ + region->offset = 2; + region->length = 126; + + return 0; +} + +static const struct mtd_ooblayout_ops tc58cvg2s0h_ooblayout = { + .ecc = tc58cvg2s0h_ooblayout_ecc, + .free = tc58cvg2s0h_ooblayout_free, +}; + +static int tc58cvg2s0h_ecc_get_status(struct spinand_device *spinand, + u8 status) +{ + struct nand_device *nand = spinand_to_nand(spinand); + u8 mbf = 0; + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf); + + switch (status & STATUS_ECC_MASK) { + case STATUS_ECC_NO_BITFLIPS: + return 0; + + case STATUS_ECC_UNCOR_ERROR: + return -EBADMSG; + + case STATUS_ECC_HAS_BITFLIPS: + case TOSH_STATUS_ECC_HAS_BITFLIPS_T: + /* + * Let's try to retrieve the real maximum number of bitflips + * in order to avoid forcing the wear-leveling layer to move + * data around if it's not necessary. + */ + if (spi_mem_exec_op(spinand->spimem, &op)) + return nand->eccreq.strength; + + mbf >>= 4; + + if (WARN_ON(mbf > nand->eccreq.strength || !mbf)) + return nand->eccreq.strength; + + return mbf; + + default: + break; + } + + return -EINVAL; +} + +static const struct spinand_info toshiba_spinand_table[] = { + SPINAND_INFO("TC58CVG2S0H", 0xCD, + NAND_MEMORG(1, 4096, 256, 64, 2048, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&tc58cvg2s0h_ooblayout, + tc58cvg2s0h_ecc_get_status)), +}; + +static int toshiba_spinand_detect(struct spinand_device *spinand) +{ + u8 *id = spinand->id.data; + int ret; + + /* + * Toshiba SPI NAND read ID needs a dummy byte, + * so the first byte in id is garbage. + */ + if (id[1] != SPINAND_MFR_TOSHIBA) + return 0; + + ret = spinand_match_and_init(spinand, toshiba_spinand_table, + ARRAY_SIZE(toshiba_spinand_table), + id[2]); + if (ret) + return ret; + + return 1; +} + +static const struct spinand_manufacturer_ops toshiba_spinand_manuf_ops = { + .detect = toshiba_spinand_detect, +}; + +const struct spinand_manufacturer toshiba_spinand_manufacturer = { + .id = SPINAND_MFR_TOSHIBA, + .name = "Toshiba", + .ops = &toshiba_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 088ff96c3eb6..816c4b00abca 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -196,6 +196,7 @@ struct spinand_manufacturer { /* SPI NAND manufacturers */ extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; +extern const struct spinand_manufacturer toshiba_spinand_manufacturer; extern const struct spinand_manufacturer winbond_spinand_manufacturer; /** -- cgit v1.2.3 From 1f2d29e634b3e7abc7b62adf6bb4a676615c02ef Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 11 Nov 2018 08:55:06 +0100 Subject: mtd: rawnand: Move nand_exec_op() to internal.h nand_exec_op() is only used by core code (nand_xxx.c files). Let's move this inline function in drivers/mtd/nand/raw/internals.h. Signed-off-by: Boris Brezillon Tested-by: Janusz Krzysztofik Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/internals.h | 9 +++++++++ include/linux/mtd/rawnand.h | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/internals.h b/drivers/mtd/nand/raw/internals.h index 04c2cf74eff3..6e2f61fbc5f0 100644 --- a/drivers/mtd/nand/raw/internals.h +++ b/drivers/mtd/nand/raw/internals.h @@ -95,6 +95,15 @@ void nand_decode_ext_id(struct nand_chip *chip); void panic_nand_wait(struct nand_chip *chip, unsigned long timeo); void sanitize_string(uint8_t *s, size_t len); +static inline int nand_exec_op(struct nand_chip *chip, + const struct nand_operation *op) +{ + if (!chip->exec_op) + return -ENOTSUPP; + + return chip->exec_op(chip, op, false); +} + /* BBT functions */ int nand_markbad_bbt(struct nand_chip *chip, loff_t offs); int nand_isreserved_bbt(struct nand_chip *chip, loff_t offs); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 4e91a70ede10..85dd89abcd22 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1098,15 +1098,6 @@ struct nand_chip { } manufacturer; }; -static inline int nand_exec_op(struct nand_chip *chip, - const struct nand_operation *op) -{ - if (!chip->exec_op) - return -ENOTSUPP; - - return chip->exec_op(chip, op, false); -} - extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; -- cgit v1.2.3 From 336058c8f4c2c7991427304c8bde05acef156054 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 11 Nov 2018 08:55:07 +0100 Subject: mtd: rawnand: Remove unused NAND_CONTROLLER_ALLOC flag Looks like NAND_CONTROLLER_ALLOC has been introduced a long time ago back when the dummy nand_hw_ctrl object was dynamically allocated instead of being embedded in nand_chip. We can safely get rid of this unused flag. Signed-off-by: Boris Brezillon Tested-by: Janusz Krzysztofik Signed-off-by: Miquel Raynal --- include/linux/mtd/rawnand.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 85dd89abcd22..2a3dd3e633f1 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -203,10 +203,6 @@ enum nand_ecc_algo { */ #define NAND_IS_BOOT_MEDIUM 0x00400000 -/* Options set by nand scan */ -/* Nand scan has allocated controller struct */ -#define NAND_CONTROLLER_ALLOC 0x80000000 - /* Cell info constants */ #define NAND_CI_CHIPNR_MSK 0x03 #define NAND_CI_CELLTYPE_MSK 0x0C -- cgit v1.2.3 From 1d0178593d148e88d2ac1e3f09c7f7eb1c20796b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 11 Nov 2018 08:55:14 +0100 Subject: mtd: rawnand: Add nand_[de]select_target() helpers Add a wrapper to prevent drivers and core code from directly calling the ->select_chip hook which we are about to deprecate. Signed-off-by: Boris Brezillon Tested-by: Janusz Krzysztofik Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 23 ++++-- drivers/mtd/nand/raw/jz4740_nand.c | 4 +- drivers/mtd/nand/raw/nand_base.c | 120 +++++++++++++++++++---------- drivers/mtd/nand/raw/r852.c | 4 +- include/linux/mtd/rawnand.h | 5 +- 5 files changed, 104 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 94c2b7525c85..302ddd3d4a5f 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -1549,7 +1549,7 @@ static int gpmi_block_markbad(struct nand_chip *chip, loff_t ofs) int column, page, chipnr; chipnr = (int)(ofs >> chip->chip_shift); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); column = !GPMI_IS_MX23(this) ? mtd->writesize : 0; @@ -1562,7 +1562,7 @@ static int gpmi_block_markbad(struct nand_chip *chip, loff_t ofs) ret = nand_prog_page_op(chip, page, column, block_mark, 1); - chip->select_chip(chip, -1); + nand_deselect_target(chip); return ret; } @@ -1610,7 +1610,7 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this) search_area_size_in_strides = 1 << rom_geo->search_area_stride_exponent; saved_chip_number = this->current_chip; - chip->select_chip(chip, 0); + nand_select_target(chip, 0); /* * Loop through the first search area, looking for the NCB fingerprint. @@ -1638,7 +1638,10 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this) } - chip->select_chip(chip, saved_chip_number); + if (saved_chip_number >= 0) + nand_select_target(chip, saved_chip_number); + else + nand_deselect_target(chip); if (found_an_ncb_fingerprint) dev_dbg(dev, "\tFound a fingerprint\n"); @@ -1681,7 +1684,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this) /* Select chip 0. */ saved_chip_number = this->current_chip; - chip->select_chip(chip, 0); + nand_select_target(chip, 0); /* Loop over blocks in the first search area, erasing them. */ dev_dbg(dev, "Erasing the search area...\n"); @@ -1713,7 +1716,11 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this) } /* Deselect chip 0. */ - chip->select_chip(chip, saved_chip_number); + if (saved_chip_number >= 0) + nand_select_target(chip, saved_chip_number); + else + nand_deselect_target(chip); + return 0; } @@ -1762,10 +1769,10 @@ static int mx23_boot_init(struct gpmi_nand_data *this) byte = block << chip->phys_erase_shift; /* Send the command to read the conventional block mark. */ - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); nand_read_page_op(chip, page, mtd->writesize, NULL, 0); block_mark = chip->legacy.read_byte(chip); - chip->select_chip(chip, -1); + nand_deselect_target(chip); /* * Check if the block is marked bad. If so, we need to mark it diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c index fb59cfca11a7..d271004f16b0 100644 --- a/drivers/mtd/nand/raw/jz4740_nand.c +++ b/drivers/mtd/nand/raw/jz4740_nand.c @@ -335,14 +335,14 @@ static int jz_nand_detect_bank(struct platform_device *pdev, goto notfound_id; /* Retrieve the IDs from the first chip. */ - chip->select_chip(chip, 0); + nand_select_target(chip, 0); nand_reset_op(chip); nand_readid_op(chip, 0, id, sizeof(id)); *nand_maf_id = id[0]; *nand_dev_id = id[1]; } else { /* Detect additional chip. */ - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); nand_reset_op(chip); nand_readid_op(chip, 0, id, sizeof(id)); if (*nand_maf_id != id[0] || *nand_dev_id != id[1]) { diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 6d9de6949366..f85e6f3b1b2f 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -228,6 +228,41 @@ static int check_offs_len(struct nand_chip *chip, loff_t ofs, uint64_t len) return ret; } +/** + * nand_select_target() - Select a NAND target (A.K.A. die) + * @chip: NAND chip object + * @cs: the CS line to select. Note that this CS id is always from the chip + * PoV, not the controller one + * + * Select a NAND target so that further operations executed on @chip go to the + * selected NAND target. + */ +void nand_select_target(struct nand_chip *chip, unsigned int cs) +{ + /* + * cs should always lie between 0 and chip->numchips, when that's not + * the case it's a bug and the caller should be fixed. + */ + if (WARN_ON(cs > chip->numchips)) + return; + + chip->select_chip(chip, cs); +} +EXPORT_SYMBOL_GPL(nand_select_target); + +/** + * nand_deselect_target() - Deselect the currently selected target + * @chip: NAND chip object + * + * Deselect the currently selected NAND target. The result of operations + * executed on @chip after the target has been deselected is undefined. + */ +void nand_deselect_target(struct nand_chip *chip) +{ + chip->select_chip(chip, -1); +} +EXPORT_SYMBOL_GPL(nand_deselect_target); + /** * nand_release_device - [GENERIC] release chip * @chip: NAND chip object @@ -440,14 +475,14 @@ static int nand_do_write_oob(struct nand_chip *chip, loff_t to, */ nand_reset(chip, chipnr); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); /* Shift to get page */ page = (int)(to >> chip->page_shift); /* Check, if it is write protected */ if (nand_check_wp(chip)) { - chip->select_chip(chip, -1); + nand_deselect_target(chip); return -EROFS; } @@ -462,7 +497,7 @@ static int nand_do_write_oob(struct nand_chip *chip, loff_t to, else status = chip->ecc.write_oob(chip, page & chip->pagemask); - chip->select_chip(chip, -1); + nand_deselect_target(chip); if (status) return status; @@ -816,10 +851,10 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr) /* Change the mode on the chip side (if supported by the NAND chip) */ if (nand_supports_set_features(chip, ONFI_FEATURE_ADDR_TIMING_MODE)) { - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); ret = nand_set_features(chip, ONFI_FEATURE_ADDR_TIMING_MODE, tmode_param); - chip->select_chip(chip, -1); + nand_deselect_target(chip); if (ret) return ret; } @@ -834,10 +869,10 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr) return 0; memset(tmode_param, 0, ONFI_SUBFEATURE_PARAM_LEN); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); ret = nand_get_features(chip, ONFI_FEATURE_ADDR_TIMING_MODE, tmode_param); - chip->select_chip(chip, -1); + nand_deselect_target(chip); if (ret) goto err_reset_chip; @@ -855,9 +890,9 @@ err_reset_chip: * timing mode. */ nand_reset_data_interface(chip, chipnr); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); nand_reset_op(chip); - chip->select_chip(chip, -1); + nand_deselect_target(chip); return ret; } @@ -2345,11 +2380,12 @@ int nand_reset(struct nand_chip *chip, int chipnr) /* * The CS line has to be released before we can apply the new NAND - * interface settings, hence this weird ->select_chip() dance. + * interface settings, hence this weird nand_select_target() + * nand_deselect_target() dance. */ - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); ret = nand_reset_op(chip); - chip->select_chip(chip, -1); + nand_deselect_target(chip); if (ret) return ret; @@ -3133,7 +3169,7 @@ static int nand_do_read_ops(struct nand_chip *chip, loff_t from, bool ecc_fail = false; chipnr = (int)(from >> chip->chip_shift); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); realpage = (int)(from >> chip->page_shift); page = realpage & chip->pagemask; @@ -3264,11 +3300,11 @@ read_retry: /* Check, if we cross a chip boundary */ if (!page) { chipnr++; - chip->select_chip(chip, -1); - chip->select_chip(chip, chipnr); + nand_deselect_target(chip); + nand_select_target(chip, chipnr); } } - chip->select_chip(chip, -1); + nand_deselect_target(chip); ops->retlen = ops->len - (size_t) readlen; if (oob) @@ -3465,7 +3501,7 @@ static int nand_do_read_oob(struct nand_chip *chip, loff_t from, len = mtd_oobavail(mtd, ops); chipnr = (int)(from >> chip->chip_shift); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); /* Shift to get page */ realpage = (int)(from >> chip->page_shift); @@ -3498,11 +3534,11 @@ static int nand_do_read_oob(struct nand_chip *chip, loff_t from, /* Check, if we cross a chip boundary */ if (!page) { chipnr++; - chip->select_chip(chip, -1); - chip->select_chip(chip, chipnr); + nand_deselect_target(chip); + nand_select_target(chip, chipnr); } } - chip->select_chip(chip, -1); + nand_deselect_target(chip); ops->oobretlen = ops->ooblen - readlen; @@ -3946,7 +3982,7 @@ static int nand_do_write_ops(struct nand_chip *chip, loff_t to, column = to & (mtd->writesize - 1); chipnr = (int)(to >> chip->chip_shift); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); /* Check, if it is write protected */ if (nand_check_wp(chip)) { @@ -4022,8 +4058,8 @@ static int nand_do_write_ops(struct nand_chip *chip, loff_t to, /* Check, if we cross a chip boundary */ if (!page) { chipnr++; - chip->select_chip(chip, -1); - chip->select_chip(chip, chipnr); + nand_deselect_target(chip); + nand_select_target(chip, chipnr); } } @@ -4032,7 +4068,7 @@ static int nand_do_write_ops(struct nand_chip *chip, loff_t to, ops->oobretlen = ops->ooblen; err_out: - chip->select_chip(chip, -1); + nand_deselect_target(chip); return ret; } @@ -4058,7 +4094,7 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len, /* Grab the device */ panic_nand_get_device(chip, FL_WRITING); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); /* Wait for the device to get ready */ panic_nand_wait(chip, 400); @@ -4171,7 +4207,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, pages_per_block = 1 << (chip->phys_erase_shift - chip->page_shift); /* Select the NAND device */ - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); /* Check, if it is write protected */ if (nand_check_wp(chip)) { @@ -4225,8 +4261,8 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, /* Check, if we cross a chip boundary */ if (len && !(page & chip->pagemask)) { chipnr++; - chip->select_chip(chip, -1); - chip->select_chip(chip, chipnr); + nand_deselect_target(chip); + nand_select_target(chip, chipnr); } } @@ -4234,7 +4270,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, erase_exit: /* Deselect and wake up anyone waiting on the device */ - chip->select_chip(chip, -1); + nand_deselect_target(chip); nand_release_device(chip); /* Return more or less happy */ @@ -4272,11 +4308,11 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs) /* Select the NAND device */ nand_get_device(chip, FL_READING); - chip->select_chip(chip, chipnr); + nand_select_target(chip, chipnr); ret = nand_block_checkbad(chip, offs, 0); - chip->select_chip(chip, -1); + nand_deselect_target(chip); nand_release_device(chip); return ret; @@ -4645,7 +4681,7 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) return ret; /* Select the device */ - chip->select_chip(chip, 0); + nand_select_target(chip, 0); /* Send the command for reading device ID */ ret = nand_readid_op(chip, 0, id_data, 2); @@ -4989,6 +5025,12 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, if (ret) return ret; + /* + * Start with chips->numchips = maxchips to let nand_select_target() do + * its job. chip->numchips will be adjusted after. + */ + chip->numchips = maxchips; + /* Set the default functions */ nand_set_defaults(chip); @@ -4997,14 +5039,14 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, if (ret) { if (!(chip->options & NAND_SCAN_SILENT_NODEV)) pr_warn("No NAND device found\n"); - chip->select_chip(chip, -1); + nand_deselect_target(chip); return ret; } nand_maf_id = chip->id.data[0]; nand_dev_id = chip->id.data[1]; - chip->select_chip(chip, -1); + nand_deselect_target(chip); /* Check for a chip array */ for (i = 1; i < maxchips; i++) { @@ -5013,15 +5055,15 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, /* See comment in nand_get_flash_type for reset */ nand_reset(chip, i); - chip->select_chip(chip, i); + nand_select_target(chip, i); /* Send the command for reading device ID */ nand_readid_op(chip, 0, id, sizeof(id)); /* Read manufacturer and device IDs */ if (nand_maf_id != id[0] || nand_dev_id != id[1]) { - chip->select_chip(chip, -1); + nand_deselect_target(chip); break; } - chip->select_chip(chip, -1); + nand_deselect_target(chip); } if (i > 1) pr_info("%d chips detected\n", i); @@ -5447,9 +5489,9 @@ static int nand_scan_tail(struct nand_chip *chip) * to explictly select the relevant die when interacting with the NAND * chip. */ - chip->select_chip(chip, 0); + nand_select_target(chip, 0); ret = nand_manufacturer_init(chip); - chip->select_chip(chip, -1); + nand_deselect_target(chip); if (ret) goto err_free_buf; diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c index 35f0b343cf90..c01422d953dd 100644 --- a/drivers/mtd/nand/raw/r852.c +++ b/drivers/mtd/nand/raw/r852.c @@ -1045,9 +1045,9 @@ static int r852_resume(struct device *device) /* Otherwise, initialize the card */ if (dev->card_registered) { r852_engine_enable(dev); - dev->chip->select_chip(dev->chip, 0); + nand_select_target(dev->chip, 0); nand_reset_op(dev->chip); - dev->chip->select_chip(dev->chip, -1); + nand_deselect_target(dev->chip); } /* Program card detection IRQ */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2a3dd3e633f1..def6dff11e8b 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1332,9 +1332,12 @@ void nand_release(struct nand_chip *chip); * instruction and have no physical pin to check it. */ int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms); - struct gpio_desc; int nand_gpio_waitrdy(struct nand_chip *chip, struct gpio_desc *gpiod, unsigned long timeout_ms); +/* Select/deselect a NAND target. */ +void nand_select_target(struct nand_chip *chip, unsigned int cs); +void nand_deselect_target(struct nand_chip *chip); + #endif /* __LINUX_MTD_RAWNAND_H */ -- cgit v1.2.3 From ae2294b10b0f066ef500954b36c94ee11c4ef20f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 11 Nov 2018 08:55:15 +0100 Subject: mtd: rawnand: Pass the CS line to be selected in struct nand_operation In order to deprecate the ->select_chip hook we need to pass the CS line a NAND operations are targeting. This is done through the addition of a cs field to the nand_operation struct. We also need to keep track of the currently selected target to properly initialize op->cs, hence the ->cur_cs field addition to the nand_chip struct. Note that op->cs is not assigned in nand_exec_op() because we might rework the way we execute NAND operations in the future (adopt a queuing mechanism instead of the serialization we have right now). Signed-off-by: Boris Brezillon Tested-by: Janusz Krzysztofik Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/internals.h | 3 +++ drivers/mtd/nand/raw/nand_base.c | 39 ++++++++++++++++++++++----------------- drivers/mtd/nand/raw/nand_hynix.c | 4 ++-- include/linux/mtd/rawnand.h | 11 ++++++++++- 4 files changed, 37 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/internals.h b/drivers/mtd/nand/raw/internals.h index 6e2f61fbc5f0..b62728d5884b 100644 --- a/drivers/mtd/nand/raw/internals.h +++ b/drivers/mtd/nand/raw/internals.h @@ -101,6 +101,9 @@ static inline int nand_exec_op(struct nand_chip *chip, if (!chip->exec_op) return -ENOTSUPP; + if (WARN_ON(op->cs >= chip->numchips)) + return -EINVAL; + return chip->exec_op(chip, op, false); } diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index f85e6f3b1b2f..7aa661f76891 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -246,6 +246,7 @@ void nand_select_target(struct nand_chip *chip, unsigned int cs) if (WARN_ON(cs > chip->numchips)) return; + chip->cur_cs = cs; chip->select_chip(chip, cs); } EXPORT_SYMBOL_GPL(nand_select_target); @@ -260,6 +261,7 @@ EXPORT_SYMBOL_GPL(nand_select_target); void nand_deselect_target(struct nand_chip *chip) { chip->select_chip(chip, -1); + chip->cur_cs = -1; } EXPORT_SYMBOL_GPL(nand_deselect_target); @@ -1022,7 +1024,7 @@ static int nand_sp_exec_read_page_op(struct nand_chip *chip, unsigned int page, PSEC_TO_NSEC(sdr->tRR_min)), NAND_OP_DATA_IN(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); int ret; /* Drop the DATA_IN instruction if len is set to 0. */ @@ -1065,7 +1067,7 @@ static int nand_lp_exec_read_page_op(struct nand_chip *chip, unsigned int page, PSEC_TO_NSEC(sdr->tRR_min)), NAND_OP_DATA_IN(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); int ret; /* Drop the DATA_IN instruction if len is set to 0. */ @@ -1160,7 +1162,7 @@ int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf, PSEC_TO_NSEC(sdr->tRR_min)), NAND_OP_8BIT_DATA_IN(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); /* Drop the DATA_IN instruction if len is set to 0. */ if (!len) @@ -1216,7 +1218,7 @@ int nand_change_read_column_op(struct nand_chip *chip, PSEC_TO_NSEC(sdr->tCCS_min)), NAND_OP_DATA_IN(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); int ret; ret = nand_fill_column_cycles(chip, addrs, offset_in_page); @@ -1298,7 +1300,7 @@ static int nand_exec_prog_page_op(struct nand_chip *chip, unsigned int page, NAND_OP_CMD(NAND_CMD_PAGEPROG, PSEC_TO_NSEC(sdr->tWB_max)), NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tPROG_max), 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); int naddrs = nand_fill_column_cycles(chip, addrs, offset_in_page); int ret; u8 status; @@ -1412,7 +1414,7 @@ int nand_prog_page_end_op(struct nand_chip *chip) PSEC_TO_NSEC(sdr->tWB_max)), NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tPROG_max), 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); ret = nand_exec_op(chip, &op); if (ret) @@ -1520,7 +1522,7 @@ int nand_change_write_column_op(struct nand_chip *chip, NAND_OP_ADDR(2, addrs, PSEC_TO_NSEC(sdr->tCCS_min)), NAND_OP_DATA_OUT(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); int ret; ret = nand_fill_column_cycles(chip, addrs, offset_in_page); @@ -1574,7 +1576,7 @@ int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf, NAND_OP_ADDR(1, &addr, PSEC_TO_NSEC(sdr->tADL_min)), NAND_OP_8BIT_DATA_IN(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); /* Drop the DATA_IN instruction if len is set to 0. */ if (!len) @@ -1613,7 +1615,7 @@ int nand_status_op(struct nand_chip *chip, u8 *status) PSEC_TO_NSEC(sdr->tADL_min)), NAND_OP_8BIT_DATA_IN(1, status, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); if (!status) op.ninstrs--; @@ -1646,7 +1648,7 @@ int nand_exit_status_op(struct nand_chip *chip) struct nand_op_instr instrs[] = { NAND_OP_CMD(NAND_CMD_READ0, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); return nand_exec_op(chip, &op); } @@ -1685,7 +1687,7 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock) PSEC_TO_MSEC(sdr->tWB_max)), NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tBERS_max), 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); if (chip->options & NAND_ROW_ADDR_3) instrs[1].ctx.addr.naddrs++; @@ -1743,7 +1745,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature, PSEC_TO_NSEC(sdr->tWB_max)), NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tFEAT_max), 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); return nand_exec_op(chip, &op); } @@ -1791,7 +1793,7 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature, NAND_OP_8BIT_DATA_IN(ONFI_SUBFEATURE_PARAM_LEN, data, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); return nand_exec_op(chip, &op); } @@ -1811,7 +1813,7 @@ static int nand_wait_rdy_op(struct nand_chip *chip, unsigned int timeout_ms, NAND_OP_WAIT_RDY(PSEC_TO_MSEC(timeout_ms), PSEC_TO_NSEC(delay_ns)), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); return nand_exec_op(chip, &op); } @@ -1844,7 +1846,7 @@ int nand_reset_op(struct nand_chip *chip) NAND_OP_CMD(NAND_CMD_RESET, PSEC_TO_NSEC(sdr->tWB_max)), NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tRST_max), 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); return nand_exec_op(chip, &op); } @@ -1878,7 +1880,7 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, struct nand_op_instr instrs[] = { NAND_OP_DATA_IN(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); instrs[0].ctx.data.force_8bit = force_8bit; @@ -1922,7 +1924,7 @@ int nand_write_data_op(struct nand_chip *chip, const void *buf, struct nand_op_instr instrs[] = { NAND_OP_DATA_OUT(len, buf, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); instrs[0].ctx.data.force_8bit = force_8bit; @@ -5006,6 +5008,9 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, unsigned int i; int ret; + /* Assume all dies are deselected when we enter nand_scan_ident(). */ + chip->cur_cs = -1; + /* Enforce the right timings for reset/detection */ onfi_fill_data_interface(chip, NAND_SDR_IFACE, 0); diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c index ac1b5c103968..1e4499d01e14 100644 --- a/drivers/mtd/nand/raw/nand_hynix.c +++ b/drivers/mtd/nand/raw/nand_hynix.c @@ -84,7 +84,7 @@ static int hynix_nand_cmd_op(struct nand_chip *chip, u8 cmd) struct nand_op_instr instrs[] = { NAND_OP_CMD(cmd, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); return nand_exec_op(chip, &op); } @@ -103,7 +103,7 @@ static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val) NAND_OP_ADDR(1, &addr, 0), NAND_OP_8BIT_DATA_OUT(1, &val, 0), }; - struct nand_operation op = NAND_OPERATION(instrs); + struct nand_operation op = NAND_OPERATION(chip->cur_cs, instrs); return nand_exec_op(chip, &op); } diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index def6dff11e8b..aa1512df38a9 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -875,18 +875,21 @@ struct nand_op_parser { /** * struct nand_operation - NAND operation descriptor + * @cs: the CS line to select for this NAND operation * @instrs: array of instructions to execute * @ninstrs: length of the @instrs array * * The actual operation structure that will be passed to chip->exec_op(). */ struct nand_operation { + unsigned int cs; const struct nand_op_instr *instrs; unsigned int ninstrs; }; -#define NAND_OPERATION(_instrs) \ +#define NAND_OPERATION(_cs, _instrs) \ { \ + .cs = _cs, \ .instrs = _instrs, \ .ninstrs = ARRAY_SIZE(_instrs), \ } @@ -1008,6 +1011,10 @@ struct nand_legacy { * this nand device will encounter their life times. * @blocks_per_die: [INTERN] The number of PEBs in a die * @data_interface: [INTERN] NAND interface timing information + * @cur_cs: currently selected target. -1 means no target selected, + * otherwise we should always have cur_cs >= 0 && + * cur_cs < numchips. NAND Controller drivers should not + * modify this value, but they're allowed to read it. * @read_retries: [INTERN] the number of read retry modes supported * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this @@ -1069,6 +1076,8 @@ struct nand_chip { struct nand_data_interface data_interface; + int cur_cs; + int read_retries; flstate_t state; -- cgit v1.2.3 From 7d6c37e90cf9013bd18240cd861b9ae7b006f91f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 11 Nov 2018 08:55:22 +0100 Subject: mtd: rawnand: Deprecate the ->select_chip() hook Now that the CS line to be selected is passed to ->exec_op() and stored in chip->cur_cs and after patching all drivers implementing ->exec_op() to stop implementing this method, we can deprecate it by moving it to the nand_legacy structure. Signed-off-by: Boris Brezillon Tested-by: Janusz Krzysztofik Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/atmel/nand-controller.c | 4 ++-- drivers/mtd/nand/raw/au1550nd.c | 2 +- drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c | 2 +- drivers/mtd/nand/raw/cafe_nand.c | 2 +- drivers/mtd/nand/raw/davinci_nand.c | 2 +- drivers/mtd/nand/raw/denali.c | 2 +- drivers/mtd/nand/raw/diskonchip.c | 4 ++-- drivers/mtd/nand/raw/fsl_elbc_nand.c | 2 +- drivers/mtd/nand/raw/fsl_ifc_nand.c | 2 +- drivers/mtd/nand/raw/fsl_upm.c | 2 +- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- drivers/mtd/nand/raw/hisi504_nand.c | 2 +- drivers/mtd/nand/raw/jz4740_nand.c | 2 +- drivers/mtd/nand/raw/jz4780_nand.c | 2 +- drivers/mtd/nand/raw/mpc5121_nfc.c | 4 ++-- drivers/mtd/nand/raw/mtk_nand.c | 2 +- drivers/mtd/nand/raw/mxc_nand.c | 2 +- drivers/mtd/nand/raw/nand_base.c | 8 ++++---- drivers/mtd/nand/raw/nand_legacy.c | 9 +++++---- drivers/mtd/nand/raw/ndfc.c | 2 +- drivers/mtd/nand/raw/plat_nand.c | 2 +- drivers/mtd/nand/raw/qcom_nandc.c | 2 +- drivers/mtd/nand/raw/s3c2410.c | 2 +- drivers/mtd/nand/raw/sh_flctl.c | 2 +- drivers/mtd/nand/raw/sunxi_nand.c | 2 +- drivers/mtd/nand/raw/tango_nand.c | 2 +- drivers/mtd/nand/raw/xway_nand.c | 2 +- include/linux/mtd/rawnand.h | 4 ++-- 28 files changed, 39 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index fb33f6be7c4f..d5c58eb040d8 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -1477,7 +1477,7 @@ static void atmel_nand_init(struct atmel_nand_controller *nc, chip->legacy.write_byte = atmel_nand_write_byte; chip->legacy.read_buf = atmel_nand_read_buf; chip->legacy.write_buf = atmel_nand_write_buf; - chip->select_chip = atmel_nand_select_chip; + chip->legacy.select_chip = atmel_nand_select_chip; if (nc->mck && nc->caps->ops->setup_data_interface) chip->setup_data_interface = atmel_nand_setup_data_interface; @@ -1525,7 +1525,7 @@ static void atmel_hsmc_nand_init(struct atmel_nand_controller *nc, /* Overload some methods for the HSMC controller. */ chip->legacy.cmd_ctrl = atmel_hsmc_nand_cmd_ctrl; - chip->select_chip = atmel_hsmc_nand_select_chip; + chip->legacy.select_chip = atmel_hsmc_nand_select_chip; } static int atmel_nand_controller_remove_nand(struct atmel_nand *nand) diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c index 9731c1c487f6..a963002663ed 100644 --- a/drivers/mtd/nand/raw/au1550nd.c +++ b/drivers/mtd/nand/raw/au1550nd.c @@ -430,7 +430,7 @@ static int au1550nd_probe(struct platform_device *pdev) ctx->cs = cs; this->legacy.dev_ready = au1550_device_ready; - this->select_chip = au1550_select_chip; + this->legacy.select_chip = au1550_select_chip; this->legacy.cmdfunc = au1550_command; /* 30 us command delay time */ diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c index 9095a79ebc7d..a37cbfe56567 100644 --- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c +++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c @@ -383,7 +383,7 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n) u8 tbits, col_bits, col_size, row_bits, row_bsize; u32 val; - b47n->nand_chip.select_chip = bcm47xxnflash_ops_bcm4706_select_chip; + nand_chip->legacy.select_chip = bcm47xxnflash_ops_bcm4706_select_chip; nand_chip->legacy.cmd_ctrl = bcm47xxnflash_ops_bcm4706_cmd_ctrl; nand_chip->legacy.dev_ready = bcm47xxnflash_ops_bcm4706_dev_ready; b47n->nand_chip.legacy.cmdfunc = bcm47xxnflash_ops_bcm4706_cmdfunc; diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c index c1a745940d12..a85f5fa5c66d 100644 --- a/drivers/mtd/nand/raw/cafe_nand.c +++ b/drivers/mtd/nand/raw/cafe_nand.c @@ -708,7 +708,7 @@ static int cafe_nand_probe(struct pci_dev *pdev, cafe->nand.legacy.read_byte = cafe_read_byte; cafe->nand.legacy.read_buf = cafe_read_buf; cafe->nand.legacy.write_buf = cafe_write_buf; - cafe->nand.select_chip = cafe_select_chip; + cafe->nand.legacy.select_chip = cafe_select_chip; cafe->nand.legacy.set_features = nand_get_set_features_notsupp; cafe->nand.legacy.get_features = nand_get_set_features_notsupp; diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c index 80f228d23cd2..f430aeb917e8 100644 --- a/drivers/mtd/nand/raw/davinci_nand.c +++ b/drivers/mtd/nand/raw/davinci_nand.c @@ -762,7 +762,7 @@ static int nand_davinci_probe(struct platform_device *pdev) info->chip.legacy.IO_ADDR_R = vaddr; info->chip.legacy.IO_ADDR_W = vaddr; info->chip.legacy.chip_delay = 0; - info->chip.select_chip = nand_davinci_select_chip; + info->chip.legacy.select_chip = nand_davinci_select_chip; /* options such as NAND_BBT_USE_FLASH */ info->chip.bbt_options = pdata->bbt_options; diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index 830ea247277b..64895ca68c8d 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1355,7 +1355,7 @@ int denali_init(struct denali_nand_info *denali) if (!mtd->name) mtd->name = "denali-nand"; - chip->select_chip = denali_select_chip; + chip->legacy.select_chip = denali_select_chip; chip->legacy.read_byte = denali_read_byte; chip->legacy.write_byte = denali_write_byte; chip->legacy.cmd_ctrl = denali_cmd_ctrl; diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 3a4c373affab..53f57e0f007e 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -1390,7 +1390,7 @@ static inline int __init doc2001plus_init(struct mtd_info *mtd) this->legacy.read_buf = doc2001plus_readbuf; doc->late_init = inftl_scan_bbt; this->legacy.cmd_ctrl = NULL; - this->select_chip = doc2001plus_select_chip; + this->legacy.select_chip = doc2001plus_select_chip; this->legacy.cmdfunc = doc2001plus_command; this->ecc.hwctl = doc2001plus_enable_hwecc; @@ -1568,7 +1568,7 @@ static int __init doc_probe(unsigned long physadr) mtd_set_ooblayout(mtd, &doc200x_ooblayout_ops); nand_set_controller_data(nand, doc); - nand->select_chip = doc200x_select_chip; + nand->legacy.select_chip = doc200x_select_chip; nand->legacy.cmd_ctrl = doc200x_hwcontrol; nand->legacy.dev_ready = doc200x_dev_ready; nand->legacy.waitfunc = doc200x_wait; diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c index d6ed697fcfe6..70f0d2b450ea 100644 --- a/drivers/mtd/nand/raw/fsl_elbc_nand.c +++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c @@ -779,7 +779,7 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv) chip->legacy.read_byte = fsl_elbc_read_byte; chip->legacy.write_buf = fsl_elbc_write_buf; chip->legacy.read_buf = fsl_elbc_read_buf; - chip->select_chip = fsl_elbc_select_chip; + chip->legacy.select_chip = fsl_elbc_select_chip; chip->legacy.cmdfunc = fsl_elbc_cmdfunc; chip->legacy.waitfunc = fsl_elbc_wait; chip->legacy.set_features = nand_get_set_features_notsupp; diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c index 6f4afc44381a..e65d274399f9 100644 --- a/drivers/mtd/nand/raw/fsl_ifc_nand.c +++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c @@ -864,7 +864,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->legacy.write_buf = fsl_ifc_write_buf; chip->legacy.read_buf = fsl_ifc_read_buf; - chip->select_chip = fsl_ifc_select_chip; + chip->legacy.select_chip = fsl_ifc_select_chip; chip->legacy.cmdfunc = fsl_ifc_cmdfunc; chip->legacy.waitfunc = fsl_ifc_wait; chip->legacy.set_features = nand_get_set_features_notsupp; diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c index 673c5a0c9345..5ccc28ec0985 100644 --- a/drivers/mtd/nand/raw/fsl_upm.c +++ b/drivers/mtd/nand/raw/fsl_upm.c @@ -170,7 +170,7 @@ static int fun_chip_init(struct fsl_upm_nand *fun, fun->chip.ecc.mode = NAND_ECC_SOFT; fun->chip.ecc.algo = NAND_ECC_HAMMING; if (fun->mchip_count > 1) - fun->chip.select_chip = fun_select_chip; + fun->chip.legacy.select_chip = fun_select_chip; if (fun->rnb_gpio[0] >= 0) fun->chip.legacy.dev_ready = fun_chip_ready; diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 302ddd3d4a5f..c461d5efabc0 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -1907,7 +1907,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this) /* init the nand_chip{}, we don't support a 16-bit NAND Flash bus. */ nand_set_controller_data(chip, this); nand_set_flash_node(chip, this->pdev->dev.of_node); - chip->select_chip = gpmi_select_chip; + chip->legacy.select_chip = gpmi_select_chip; chip->setup_data_interface = gpmi_setup_data_interface; chip->legacy.cmd_ctrl = gpmi_cmd_ctrl; chip->legacy.dev_ready = gpmi_dev_ready; diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c index f043938ee36b..e41c13499fd5 100644 --- a/drivers/mtd/nand/raw/hisi504_nand.c +++ b/drivers/mtd/nand/raw/hisi504_nand.c @@ -783,7 +783,7 @@ static int hisi_nfc_probe(struct platform_device *pdev) nand_set_controller_data(chip, host); nand_set_flash_node(chip, np); chip->legacy.cmdfunc = hisi_nfc_cmdfunc; - chip->select_chip = hisi_nfc_select_chip; + chip->legacy.select_chip = hisi_nfc_select_chip; chip->legacy.read_byte = hisi_nfc_read_byte; chip->legacy.write_buf = hisi_nfc_write_buf; chip->legacy.read_buf = hisi_nfc_read_buf; diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c index d271004f16b0..0bcfdd3d66a8 100644 --- a/drivers/mtd/nand/raw/jz4740_nand.c +++ b/drivers/mtd/nand/raw/jz4740_nand.c @@ -427,7 +427,7 @@ static int jz_nand_probe(struct platform_device *pdev) chip->legacy.chip_delay = 50; chip->legacy.cmd_ctrl = jz_nand_cmd_ctrl; - chip->select_chip = jz_nand_select_chip; + chip->legacy.select_chip = jz_nand_select_chip; chip->dummy_controller.ops = &jz_nand_controller_ops; if (nand->busy_gpio) diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c index cdf22100ab77..22e58975f0d5 100644 --- a/drivers/mtd/nand/raw/jz4780_nand.c +++ b/drivers/mtd/nand/raw/jz4780_nand.c @@ -279,7 +279,7 @@ static int jz4780_nand_init_chip(struct platform_device *pdev, chip->legacy.IO_ADDR_W = cs->base + OFFSET_DATA; chip->legacy.chip_delay = RB_DELAY_US; chip->options = NAND_NO_SUBPAGE_WRITE; - chip->select_chip = jz4780_nand_select_chip; + chip->legacy.select_chip = jz4780_nand_select_chip; chip->legacy.cmd_ctrl = jz4780_nand_cmd_ctrl; chip->ecc.mode = NAND_ECC_HW; chip->controller = &nfc->controller; diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c index 86a0aabe08df..062cd1eb2861 100644 --- a/drivers/mtd/nand/raw/mpc5121_nfc.c +++ b/drivers/mtd/nand/raw/mpc5121_nfc.c @@ -697,7 +697,7 @@ static int mpc5121_nfc_probe(struct platform_device *op) chip->legacy.read_byte = mpc5121_nfc_read_byte; chip->legacy.read_buf = mpc5121_nfc_read_buf; chip->legacy.write_buf = mpc5121_nfc_write_buf; - chip->select_chip = mpc5121_nfc_select_chip; + chip->legacy.select_chip = mpc5121_nfc_select_chip; chip->legacy.set_features = nand_get_set_features_notsupp; chip->legacy.get_features = nand_get_set_features_notsupp; chip->bbt_options = NAND_BBT_USE_FLASH; @@ -712,7 +712,7 @@ static int mpc5121_nfc_probe(struct platform_device *op) return retval; } - chip->select_chip = ads5121_select_chip; + chip->legacy.select_chip = ads5121_select_chip; } /* Enable NFC clock */ diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index 2bb0df1b7244..ce124f8c02cd 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -1333,7 +1333,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, nand->options |= NAND_USE_BOUNCE_BUFFER | NAND_SUBPAGE_READ; nand->legacy.dev_ready = mtk_nfc_dev_ready; - nand->select_chip = mtk_nfc_select_chip; + nand->legacy.select_chip = mtk_nfc_select_chip; nand->legacy.write_byte = mtk_nfc_write_byte; nand->legacy.write_buf = mtk_nfc_write_buf; nand->legacy.read_byte = mtk_nfc_read_byte; diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c index 88bd3f6a499c..c00b1d408a04 100644 --- a/drivers/mtd/nand/raw/mxc_nand.c +++ b/drivers/mtd/nand/raw/mxc_nand.c @@ -1828,7 +1828,7 @@ static int mxcnd_probe(struct platform_device *pdev) this->ecc.bytes = host->devtype_data->eccbytes; host->eccsize = host->devtype_data->eccsize; - this->select_chip = host->devtype_data->select_chip; + this->legacy.select_chip = host->devtype_data->select_chip; this->ecc.size = 512; mtd_set_ooblayout(mtd, host->devtype_data->ooblayout); diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 93a19f551796..cef6633fdce9 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -248,8 +248,8 @@ void nand_select_target(struct nand_chip *chip, unsigned int cs) chip->cur_cs = cs; - if (chip->select_chip) - chip->select_chip(chip, cs); + if (chip->legacy.select_chip) + chip->legacy.select_chip(chip, cs); } EXPORT_SYMBOL_GPL(nand_select_target); @@ -262,8 +262,8 @@ EXPORT_SYMBOL_GPL(nand_select_target); */ void nand_deselect_target(struct nand_chip *chip) { - if (chip->select_chip) - chip->select_chip(chip, -1); + if (chip->legacy.select_chip) + chip->legacy.select_chip(chip, -1); chip->cur_cs = -1; } diff --git a/drivers/mtd/nand/raw/nand_legacy.c b/drivers/mtd/nand/raw/nand_legacy.c index f76b9356ba9c..4596a538b967 100644 --- a/drivers/mtd/nand/raw/nand_legacy.c +++ b/drivers/mtd/nand/raw/nand_legacy.c @@ -592,8 +592,8 @@ void nand_legacy_set_defaults(struct nand_chip *chip) if (chip->legacy.waitfunc == NULL) chip->legacy.waitfunc = nand_wait; - if (!chip->select_chip) - chip->select_chip = nand_select_chip; + if (!chip->legacy.select_chip) + chip->legacy.select_chip = nand_select_chip; /* If called twice, pointers that depend on busw may need to be reset */ if (!chip->legacy.read_byte || chip->legacy.read_byte == nand_read_byte) @@ -626,9 +626,10 @@ int nand_legacy_check_hooks(struct nand_chip *chip) /* * Default functions assigned for ->legacy.cmdfunc() and - * ->select_chip() both expect ->legacy.cmd_ctrl() to be populated. + * ->legacy.select_chip() both expect ->legacy.cmd_ctrl() to be + * populated. */ - if ((!chip->legacy.cmdfunc || !chip->select_chip) && + if ((!chip->legacy.cmdfunc || !chip->legacy.select_chip) && !chip->legacy.cmd_ctrl) { pr_err("->legacy.cmd_ctrl() should be provided\n"); return -EINVAL; diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c index d49a7a17146c..9857e0e5acd4 100644 --- a/drivers/mtd/nand/raw/ndfc.c +++ b/drivers/mtd/nand/raw/ndfc.c @@ -146,7 +146,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc, chip->legacy.IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA; chip->legacy.cmd_ctrl = ndfc_hwcontrol; chip->legacy.dev_ready = ndfc_ready; - chip->select_chip = ndfc_select_chip; + chip->legacy.select_chip = ndfc_select_chip; chip->legacy.chip_delay = 50; chip->controller = &ndfc->ndfc_control; chip->legacy.read_buf = ndfc_read_buf; diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c index 86c536ddaf24..a994b76daa50 100644 --- a/drivers/mtd/nand/raw/plat_nand.c +++ b/drivers/mtd/nand/raw/plat_nand.c @@ -63,7 +63,7 @@ static int plat_nand_probe(struct platform_device *pdev) data->chip.legacy.IO_ADDR_W = data->io_base; data->chip.legacy.cmd_ctrl = pdata->ctrl.cmd_ctrl; data->chip.legacy.dev_ready = pdata->ctrl.dev_ready; - data->chip.select_chip = pdata->ctrl.select_chip; + data->chip.legacy.select_chip = pdata->ctrl.select_chip; data->chip.legacy.write_buf = pdata->ctrl.write_buf; data->chip.legacy.read_buf = pdata->ctrl.read_buf; data->chip.legacy.chip_delay = pdata->chip.chip_delay; diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index ef75dfa62a4f..6b76fb5c0aed 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2804,7 +2804,7 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc, mtd->dev.parent = dev; chip->legacy.cmdfunc = qcom_nandc_command; - chip->select_chip = qcom_nandc_select_chip; + chip->legacy.select_chip = qcom_nandc_select_chip; chip->legacy.read_byte = qcom_nandc_read_byte; chip->legacy.read_buf = qcom_nandc_read_buf; chip->legacy.write_buf = qcom_nandc_write_buf; diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c index d2e42e9d0e8c..a8905463701a 100644 --- a/drivers/mtd/nand/raw/s3c2410.c +++ b/drivers/mtd/nand/raw/s3c2410.c @@ -866,7 +866,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, chip->legacy.write_buf = s3c2410_nand_write_buf; chip->legacy.read_buf = s3c2410_nand_read_buf; - chip->select_chip = s3c2410_nand_select_chip; + chip->legacy.select_chip = s3c2410_nand_select_chip; chip->legacy.chip_delay = 50; nand_set_controller_data(chip, nmtd); chip->options = set->options; diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c index 30edcc77b111..7ab50bc6ad3a 100644 --- a/drivers/mtd/nand/raw/sh_flctl.c +++ b/drivers/mtd/nand/raw/sh_flctl.c @@ -1170,7 +1170,7 @@ static int flctl_probe(struct platform_device *pdev) nand->legacy.read_byte = flctl_read_byte; nand->legacy.write_buf = flctl_write_buf; nand->legacy.read_buf = flctl_read_buf; - nand->select_chip = flctl_select_chip; + nand->legacy.select_chip = flctl_select_chip; nand->legacy.cmdfunc = flctl_cmdfunc; nand->legacy.set_features = nand_get_set_features_notsupp; nand->legacy.get_features = nand_get_set_features_notsupp; diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c index 51b1a548064b..e489a6ff57d7 100644 --- a/drivers/mtd/nand/raw/sunxi_nand.c +++ b/drivers/mtd/nand/raw/sunxi_nand.c @@ -1922,7 +1922,7 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc, */ nand->ecc.mode = NAND_ECC_HW; nand_set_flash_node(nand, np); - nand->select_chip = sunxi_nfc_select_chip; + nand->legacy.select_chip = sunxi_nfc_select_chip; nand->legacy.cmd_ctrl = sunxi_nfc_cmd_ctrl; nand->legacy.read_buf = sunxi_nfc_read_buf; nand->legacy.write_buf = sunxi_nfc_write_buf; diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c index 8818f893f300..ebca4579c033 100644 --- a/drivers/mtd/nand/raw/tango_nand.c +++ b/drivers/mtd/nand/raw/tango_nand.c @@ -567,7 +567,7 @@ static int chip_init(struct device *dev, struct device_node *np) chip->legacy.read_byte = tango_read_byte; chip->legacy.write_buf = tango_write_buf; chip->legacy.read_buf = tango_read_buf; - chip->select_chip = tango_select_chip; + chip->legacy.select_chip = tango_select_chip; chip->legacy.cmd_ctrl = tango_cmd_ctrl; chip->legacy.dev_ready = tango_dev_ready; chip->setup_data_interface = tango_set_timings; diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c index a234a5cb4868..4cb78106af14 100644 --- a/drivers/mtd/nand/raw/xway_nand.c +++ b/drivers/mtd/nand/raw/xway_nand.c @@ -176,7 +176,7 @@ static int xway_nand_probe(struct platform_device *pdev) data->chip.legacy.cmd_ctrl = xway_cmd_ctrl; data->chip.legacy.dev_ready = xway_dev_ready; - data->chip.select_chip = xway_select_chip; + data->chip.legacy.select_chip = xway_select_chip; data->chip.legacy.write_buf = xway_write_buf; data->chip.legacy.read_buf = xway_read_buf; data->chip.legacy.read_byte = xway_read_byte; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index aa1512df38a9..40b74fb1792d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -902,6 +902,7 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * struct nand_legacy - NAND chip legacy fields/hooks * @IO_ADDR_R: address to read the 8 I/O lines of the flash device * @IO_ADDR_W: address to write the 8 I/O lines of the flash device + * @select_chip: select/deselect a specific target/die * @read_byte: read one byte from the chip * @write_byte: write a single byte to the chip on the low 8 I/O lines * @write_buf: write data from the buffer to the chip @@ -927,6 +928,7 @@ int nand_op_parser_exec_op(struct nand_chip *chip, struct nand_legacy { void __iomem *IO_ADDR_R; void __iomem *IO_ADDR_W; + void (*select_chip)(struct nand_chip *chip, int cs); u8 (*read_byte)(struct nand_chip *chip); void (*write_byte)(struct nand_chip *chip, u8 byte); void (*write_buf)(struct nand_chip *chip, const u8 *buf, int len); @@ -954,7 +956,6 @@ struct nand_legacy { * you're modifying an existing driver that is using those * fields/hooks, you should consider reworking the driver * avoid using them. - * @select_chip: [REPLACEABLE] select chip nr * @exec_op: controller specific method to execute NAND operations. * This method replaces ->cmdfunc(), * ->legacy.{read,write}_{buf,byte,word}(), @@ -1040,7 +1041,6 @@ struct nand_chip { struct nand_legacy legacy; - void (*select_chip)(struct nand_chip *chip, int cs); int (*exec_op)(struct nand_chip *chip, const struct nand_operation *op, bool check_only); -- cgit v1.2.3 From f2abfeb2078b9682bfeb77f91816fcf2177b3051 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 11 Nov 2018 08:55:23 +0100 Subject: mtd: rawnand: Move the ->exec_op() method to nand_controller_ops ->exec_op() is a controller method and has nothing to do in the nand_chip struct. Let's move it to the nand_controller_ops struct and adjust the core and drivers accordingly. Signed-off-by: Boris Brezillon Tested-by: Janusz Krzysztofik Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/ams-delta.c | 7 ++- drivers/mtd/nand/raw/fsmc_nand.c | 2 +- drivers/mtd/nand/raw/internals.h | 13 ++++- drivers/mtd/nand/raw/marvell_nand.c | 2 +- drivers/mtd/nand/raw/nand_base.c | 51 +++++++++---------- drivers/mtd/nand/raw/nand_hynix.c | 4 +- drivers/mtd/nand/raw/nand_legacy.c | 4 +- drivers/mtd/nand/raw/tegra_nand.c | 2 +- drivers/mtd/nand/raw/vf610_nfc.c | 4 +- include/linux/mtd/rawnand.h | 99 ++++++++++++++++++------------------- 10 files changed, 100 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c index 611c822e967f..f8eb4a419e77 100644 --- a/drivers/mtd/nand/raw/ams-delta.c +++ b/drivers/mtd/nand/raw/ams-delta.c @@ -176,6 +176,10 @@ static int ams_delta_exec_op(struct nand_chip *this, return ret; } +static const struct nand_controller_ops ams_delta_ops = { + .exec_op = ams_delta_exec_op, +}; + /* * Main initialization routine */ @@ -216,8 +220,6 @@ static int ams_delta_init(struct platform_device *pdev) priv->io_base = io_base; nand_set_controller_data(this, priv); - this->exec_op = ams_delta_exec_op; - priv->gpiod_rdy = devm_gpiod_get_optional(&pdev->dev, "rdy", GPIOD_IN); if (IS_ERR(priv->gpiod_rdy)) { err = PTR_ERR(priv->gpiod_rdy); @@ -277,6 +279,7 @@ static int ams_delta_init(struct platform_device *pdev) ams_delta_dir_input(priv, true); /* Initialize the NAND controller object embedded in ams_delta_nand. */ + priv->base.ops = &ams_delta_ops; nand_controller_init(&priv->base); this->controller = &priv->base; diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index ea69ac6e6d7a..1eb5008e7453 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -995,6 +995,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand) static const struct nand_controller_ops fsmc_nand_controller_ops = { .attach_chip = fsmc_nand_attach_chip, + .exec_op = fsmc_exec_op, }; /* @@ -1082,7 +1083,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) nand_set_flash_node(nand, pdev->dev.of_node); mtd->dev.parent = &pdev->dev; - nand->exec_op = fsmc_exec_op; /* * Setup default ECC mode. nand_dt_init() called from nand_scan_ident() diff --git a/drivers/mtd/nand/raw/internals.h b/drivers/mtd/nand/raw/internals.h index b62728d5884b..ac66b458566f 100644 --- a/drivers/mtd/nand/raw/internals.h +++ b/drivers/mtd/nand/raw/internals.h @@ -95,16 +95,25 @@ void nand_decode_ext_id(struct nand_chip *chip); void panic_nand_wait(struct nand_chip *chip, unsigned long timeo); void sanitize_string(uint8_t *s, size_t len); +static inline bool nand_has_exec_op(struct nand_chip *chip) +{ + if (!chip->controller || !chip->controller->ops || + !chip->controller->ops->exec_op) + return false; + + return true; +} + static inline int nand_exec_op(struct nand_chip *chip, const struct nand_operation *op) { - if (!chip->exec_op) + if (!nand_has_exec_op(chip)) return -ENOTSUPP; if (WARN_ON(op->cs >= chip->numchips)) return -EINVAL; - return chip->exec_op(chip, op, false); + return chip->controller->ops->exec_op(chip, op, false); } /* BBT functions */ diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index ba7a45fb1905..2e8257fe7d00 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -2505,6 +2505,7 @@ static int marvell_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops marvell_nand_controller_ops = { .attach_chip = marvell_nand_attach_chip, + .exec_op = marvell_nfc_exec_op, }; static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, @@ -2627,7 +2628,6 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, chip->controller = &nfc->controller; nand_set_flash_node(chip, np); - chip->exec_op = marvell_nfc_exec_op; if (!of_property_read_bool(np, "marvell,nand-keep-config")) chip->setup_data_interface = marvell_nfc_setup_data_interface; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index cef6633fdce9..eabef6a3857e 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -678,7 +678,7 @@ int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms) u8 status = 0; int ret; - if (!chip->exec_op) + if (!nand_has_exec_op(chip)) return -ENOTSUPP; /* Wait tWB before polling the STATUS reg. */ @@ -1117,7 +1117,7 @@ int nand_read_page_op(struct nand_chip *chip, unsigned int page, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { if (mtd->writesize > 512) return nand_lp_exec_read_page_op(chip, page, offset_in_page, buf, @@ -1156,7 +1156,7 @@ int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf, if (len && !buf) return -EINVAL; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); struct nand_op_instr instrs[] = { @@ -1211,7 +1211,7 @@ int nand_change_read_column_op(struct nand_chip *chip, if (mtd->writesize <= 512) return -ENOTSUPP; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); u8 addrs[2] = {}; @@ -1270,7 +1270,7 @@ int nand_read_oob_op(struct nand_chip *chip, unsigned int page, if (offset_in_oob + len > mtd->oobsize) return -EINVAL; - if (chip->exec_op) + if (nand_has_exec_op(chip)) return nand_read_page_op(chip, page, mtd->writesize + offset_in_oob, buf, len); @@ -1383,7 +1383,7 @@ int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; - if (chip->exec_op) + if (nand_has_exec_op(chip)) return nand_exec_prog_page_op(chip, page, offset_in_page, buf, len, false); @@ -1410,7 +1410,7 @@ int nand_prog_page_end_op(struct nand_chip *chip) int ret; u8 status; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); struct nand_op_instr instrs[] = { @@ -1469,7 +1469,7 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { status = nand_exec_prog_page_op(chip, page, offset_in_page, buf, len, true); } else { @@ -1517,7 +1517,7 @@ int nand_change_write_column_op(struct nand_chip *chip, if (mtd->writesize <= 512) return -ENOTSUPP; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); u8 addrs[2]; @@ -1572,7 +1572,7 @@ int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf, if (len && !buf) return -EINVAL; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); struct nand_op_instr instrs[] = { @@ -1611,7 +1611,7 @@ EXPORT_SYMBOL_GPL(nand_readid_op); */ int nand_status_op(struct nand_chip *chip, u8 *status) { - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); struct nand_op_instr instrs[] = { @@ -1648,7 +1648,7 @@ EXPORT_SYMBOL_GPL(nand_status_op); */ int nand_exit_status_op(struct nand_chip *chip) { - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { struct nand_op_instr instrs[] = { NAND_OP_CMD(NAND_CMD_READ0, 0), }; @@ -1680,7 +1680,7 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock) int ret; u8 status; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); u8 addrs[3] = { page, page >> 8, page >> 16 }; @@ -1739,7 +1739,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature, const u8 *params = data; int i, ret; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); struct nand_op_instr instrs[] = { @@ -1786,7 +1786,7 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature, u8 *params = data; int i; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); struct nand_op_instr instrs[] = { @@ -1812,7 +1812,7 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature, static int nand_wait_rdy_op(struct nand_chip *chip, unsigned int timeout_ms, unsigned int delay_ns) { - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { struct nand_op_instr instrs[] = { NAND_OP_WAIT_RDY(PSEC_TO_MSEC(timeout_ms), PSEC_TO_NSEC(delay_ns)), @@ -1843,7 +1843,7 @@ static int nand_wait_rdy_op(struct nand_chip *chip, unsigned int timeout_ms, */ int nand_reset_op(struct nand_chip *chip) { - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { const struct nand_sdr_timings *sdr = nand_get_sdr_timings(&chip->data_interface); struct nand_op_instr instrs[] = { @@ -1880,7 +1880,7 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, if (!len || !buf) return -EINVAL; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { struct nand_op_instr instrs[] = { NAND_OP_DATA_IN(len, buf, 0), }; @@ -1924,7 +1924,7 @@ int nand_write_data_op(struct nand_chip *chip, const void *buf, if (!len || !buf) return -EINVAL; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { struct nand_op_instr instrs[] = { NAND_OP_DATA_OUT(len, buf, 0), }; @@ -4417,13 +4417,14 @@ static void nand_shutdown(struct mtd_info *mtd) /* Set default functions */ static void nand_set_defaults(struct nand_chip *chip) { - nand_legacy_set_defaults(chip); - + /* If no controller is provided, use the dummy one. */ if (!chip->controller) { chip->controller = &chip->dummy_controller; nand_controller_init(chip->controller); } + nand_legacy_set_defaults(chip); + if (!chip->buf_align) chip->buf_align = 1; } @@ -5025,10 +5026,6 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, if (!mtd->name && mtd->dev.parent) mtd->name = dev_name(mtd->dev.parent); - ret = nand_legacy_check_hooks(chip); - if (ret) - return ret; - /* * Start with chips->numchips = maxchips to let nand_select_target() do * its job. chip->numchips will be adjusted after. @@ -5038,6 +5035,10 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, /* Set the default functions */ nand_set_defaults(chip); + ret = nand_legacy_check_hooks(chip); + if (ret) + return ret; + /* Read the flash type */ ret = nand_detect(chip, table); if (ret) { diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c index 1e4499d01e14..343f477362d1 100644 --- a/drivers/mtd/nand/raw/nand_hynix.c +++ b/drivers/mtd/nand/raw/nand_hynix.c @@ -80,7 +80,7 @@ static bool hynix_nand_has_valid_jedecid(struct nand_chip *chip) static int hynix_nand_cmd_op(struct nand_chip *chip, u8 cmd) { - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { struct nand_op_instr instrs[] = { NAND_OP_CMD(cmd, 0), }; @@ -98,7 +98,7 @@ static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val) { u16 column = ((u16)addr << 8) | addr; - if (chip->exec_op) { + if (nand_has_exec_op(chip)) { struct nand_op_instr instrs[] = { NAND_OP_ADDR(1, &addr, 0), NAND_OP_8BIT_DATA_OUT(1, &val, 0), diff --git a/drivers/mtd/nand/raw/nand_legacy.c b/drivers/mtd/nand/raw/nand_legacy.c index 4596a538b967..47364237861e 100644 --- a/drivers/mtd/nand/raw/nand_legacy.c +++ b/drivers/mtd/nand/raw/nand_legacy.c @@ -577,7 +577,7 @@ void nand_legacy_set_defaults(struct nand_chip *chip) { unsigned int busw = chip->options & NAND_BUSWIDTH_16; - if (chip->exec_op) + if (nand_has_exec_op(chip)) return; /* check for proper chip_delay setup, set 20us if not */ @@ -621,7 +621,7 @@ int nand_legacy_check_hooks(struct nand_chip *chip) * ->legacy.cmdfunc() is legacy and will only be used if ->exec_op() is * not populated. */ - if (chip->exec_op) + if (nand_has_exec_op(chip)) return 0; /* diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index 590393d93ffc..2fe6de09f4ff 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -1050,6 +1050,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops tegra_nand_controller_ops = { .attach_chip = &tegra_nand_attach_chip, + .exec_op = tegra_nand_exec_op, }; static int tegra_nand_chips_init(struct device *dev, @@ -1112,7 +1113,6 @@ static int tegra_nand_chips_init(struct device *dev, mtd->name = "tegra_nand"; chip->options = NAND_NO_SUBPAGE_WRITE | NAND_USE_BOUNCE_BUFFER; - chip->exec_op = tegra_nand_exec_op; chip->setup_data_interface = tegra_nand_setup_data_interface; ret = nand_scan(chip, 1); diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c index 49a174e30211..0fa7cac4ce14 100644 --- a/drivers/mtd/nand/raw/vf610_nfc.c +++ b/drivers/mtd/nand/raw/vf610_nfc.c @@ -812,6 +812,8 @@ static int vf610_nfc_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops vf610_nfc_controller_ops = { .attach_chip = vf610_nfc_attach_chip, + .exec_op = vf610_nfc_exec_op, + }; static int vf610_nfc_probe(struct platform_device *pdev) @@ -879,8 +881,6 @@ static int vf610_nfc_probe(struct platform_device *pdev) goto err_disable_clk; } - chip->exec_op = vf610_nfc_exec_op; - chip->options |= NAND_NO_SUBPAGE_WRITE; init_completion(&nfc->cmd_done); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 40b74fb1792d..297b40c56403 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -240,49 +240,6 @@ struct nand_id { int len; }; -/** - * struct nand_controller_ops - Controller operations - * - * @attach_chip: this method is called after the NAND detection phase after - * flash ID and MTD fields such as erase size, page size and OOB - * size have been set up. ECC requirements are available if - * provided by the NAND chip or device tree. Typically used to - * choose the appropriate ECC configuration and allocate - * associated resources. - * This hook is optional. - * @detach_chip: free all resources allocated/claimed in - * nand_controller_ops->attach_chip(). - * This hook is optional. - */ -struct nand_controller_ops { - int (*attach_chip)(struct nand_chip *chip); - void (*detach_chip)(struct nand_chip *chip); -}; - -/** - * struct nand_controller - Structure used to describe a NAND controller - * - * @lock: protection lock - * @active: the mtd device which holds the controller currently - * @wq: wait queue to sleep on if a NAND operation is in - * progress used instead of the per chip wait queue - * when a hw controller is available. - * @ops: NAND controller operations. - */ -struct nand_controller { - spinlock_t lock; - struct nand_chip *active; - wait_queue_head_t wq; - const struct nand_controller_ops *ops; -}; - -static inline void nand_controller_init(struct nand_controller *nfc) -{ - nfc->active = NULL; - spin_lock_init(&nfc->lock); - init_waitqueue_head(&nfc->wq); -} - /** * struct nand_ecc_step_info - ECC step information of ECC engine * @stepsize: data bytes per ECC step @@ -897,6 +854,55 @@ struct nand_operation { int nand_op_parser_exec_op(struct nand_chip *chip, const struct nand_op_parser *parser, const struct nand_operation *op, bool check_only); +/** + * struct nand_controller_ops - Controller operations + * + * @attach_chip: this method is called after the NAND detection phase after + * flash ID and MTD fields such as erase size, page size and OOB + * size have been set up. ECC requirements are available if + * provided by the NAND chip or device tree. Typically used to + * choose the appropriate ECC configuration and allocate + * associated resources. + * This hook is optional. + * @detach_chip: free all resources allocated/claimed in + * nand_controller_ops->attach_chip(). + * This hook is optional. + * @exec_op: controller specific method to execute NAND operations. + * This method replaces chip->legacy.cmdfunc(), + * chip->legacy.{read,write}_{buf,byte,word}(), + * chip->legacy.dev_ready() and chip->legacy.waifunc(). + */ +struct nand_controller_ops { + int (*attach_chip)(struct nand_chip *chip); + void (*detach_chip)(struct nand_chip *chip); + int (*exec_op)(struct nand_chip *chip, + const struct nand_operation *op, + bool check_only); +}; + +/** + * struct nand_controller - Structure used to describe a NAND controller + * + * @lock: protection lock + * @active: the mtd device which holds the controller currently + * @wq: wait queue to sleep on if a NAND operation is in + * progress used instead of the per chip wait queue + * when a hw controller is available. + * @ops: NAND controller operations. + */ +struct nand_controller { + spinlock_t lock; + struct nand_chip *active; + wait_queue_head_t wq; + const struct nand_controller_ops *ops; +}; + +static inline void nand_controller_init(struct nand_controller *nfc) +{ + nfc->active = NULL; + spin_lock_init(&nfc->lock); + init_waitqueue_head(&nfc->wq); +} /** * struct nand_legacy - NAND chip legacy fields/hooks @@ -956,10 +962,6 @@ struct nand_legacy { * you're modifying an existing driver that is using those * fields/hooks, you should consider reworking the driver * avoid using them. - * @exec_op: controller specific method to execute NAND operations. - * This method replaces ->cmdfunc(), - * ->legacy.{read,write}_{buf,byte,word}(), - * ->legacy.dev_ready() and ->waifunc(). * @setup_read_retry: [FLASHSPECIFIC] flash (vendor) specific function for * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure @@ -1041,9 +1043,6 @@ struct nand_chip { struct nand_legacy legacy; - int (*exec_op)(struct nand_chip *chip, - const struct nand_operation *op, - bool check_only); int (*setup_read_retry)(struct nand_chip *chip, int retry_mode); int (*setup_data_interface)(struct nand_chip *chip, int chipnr, const struct nand_data_interface *conf); -- cgit v1.2.3 From 7a08dbaedd365fa4eb7c9cd504c075e3336eb0c6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 11 Nov 2018 08:55:24 +0100 Subject: mtd: rawnand: Move ->setup_data_interface() to nand_controller_ops ->setup_data_interface() is a controller specific method and should thus be placed in nand_controller_ops. In order to make that work with controllers that support keeping pre-configured timings we need to add a new NAND_KEEP_TIMINGS flag to inform the core it should skip the timings selection step. Signed-off-by: Boris Brezillon Tested-by: Janusz Krzysztofik Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/atmel/nand-controller.c | 5 +++-- drivers/mtd/nand/raw/denali.c | 3 ++- drivers/mtd/nand/raw/fsmc_nand.c | 7 ++++--- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- drivers/mtd/nand/raw/internals.h | 12 ++++++++++++ drivers/mtd/nand/raw/marvell_nand.c | 3 ++- drivers/mtd/nand/raw/mtk_nand.c | 2 +- drivers/mtd/nand/raw/mxc_nand.c | 12 +++++++++++- drivers/mtd/nand/raw/nand_base.c | 14 ++++++++------ drivers/mtd/nand/raw/nand_legacy.c | 2 +- drivers/mtd/nand/raw/s3c2410.c | 5 +++-- drivers/mtd/nand/raw/sunxi_nand.c | 2 +- drivers/mtd/nand/raw/tango_nand.c | 2 +- drivers/mtd/nand/raw/tegra_nand.c | 2 +- include/linux/mtd/rawnand.h | 20 ++++++++++++++------ 15 files changed, 65 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index d5c58eb040d8..dcd3bd73e549 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -1479,8 +1479,8 @@ static void atmel_nand_init(struct atmel_nand_controller *nc, chip->legacy.write_buf = atmel_nand_write_buf; chip->legacy.select_chip = atmel_nand_select_chip; - if (nc->mck && nc->caps->ops->setup_data_interface) - chip->setup_data_interface = atmel_nand_setup_data_interface; + if (!nc->mck || !nc->caps->ops->setup_data_interface) + chip->options |= NAND_KEEP_TIMINGS; /* Some NANDs require a longer delay than the default one (20us). */ chip->legacy.chip_delay = 40; @@ -1908,6 +1908,7 @@ static int atmel_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops atmel_nand_controller_ops = { .attach_chip = atmel_nand_attach_chip, + .setup_data_interface = atmel_nand_setup_data_interface, }; static int atmel_nand_controller_init(struct atmel_nand_controller *nc, diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index 64895ca68c8d..bad3b8ad5e0a 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1316,6 +1316,7 @@ static void denali_detach_chip(struct nand_chip *chip) static const struct nand_controller_ops denali_controller_ops = { .attach_chip = denali_attach_chip, .detach_chip = denali_detach_chip, + .setup_data_interface = denali_setup_data_interface, }; int denali_init(struct denali_nand_info *denali) @@ -1372,7 +1373,7 @@ int denali_init(struct denali_nand_info *denali) /* clk rate info is needed for setup_data_interface */ if (denali->clk_rate && denali->clk_x_rate) - chip->setup_data_interface = denali_setup_data_interface; + chip->options |= NAND_KEEP_TIMINGS; chip->dummy_controller.ops = &denali_controller_ops; ret = nand_scan(chip, denali->max_banks); diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 1eb5008e7453..61927c4c2650 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -996,6 +996,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand) static const struct nand_controller_ops fsmc_nand_controller_ops = { .attach_chip = fsmc_nand_attach_chip, .exec_op = fsmc_exec_op, + .setup_data_interface = fsmc_setup_data_interface, }; /* @@ -1108,10 +1109,10 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) } } - if (host->dev_timings) + if (host->dev_timings) { fsmc_nand_setup(host, host->dev_timings); - else - nand->setup_data_interface = fsmc_setup_data_interface; + nand->options |= NAND_KEEP_TIMINGS; + } if (AMBA_REV_BITS(host->pid) >= 8) { nand->ecc.read_page = fsmc_read_page_hwecc; diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index c461d5efabc0..25f9fe79796a 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -1889,6 +1889,7 @@ static int gpmi_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops gpmi_nand_controller_ops = { .attach_chip = gpmi_nand_attach_chip, + .setup_data_interface = gpmi_setup_data_interface, }; static int gpmi_nand_init(struct gpmi_nand_data *this) @@ -1908,7 +1909,6 @@ static int gpmi_nand_init(struct gpmi_nand_data *this) nand_set_controller_data(chip, this); nand_set_flash_node(chip, this->pdev->dev.of_node); chip->legacy.select_chip = gpmi_select_chip; - chip->setup_data_interface = gpmi_setup_data_interface; chip->legacy.cmd_ctrl = gpmi_cmd_ctrl; chip->legacy.dev_ready = gpmi_dev_ready; chip->legacy.read_byte = gpmi_read_byte; diff --git a/drivers/mtd/nand/raw/internals.h b/drivers/mtd/nand/raw/internals.h index ac66b458566f..fbf6ca015cd7 100644 --- a/drivers/mtd/nand/raw/internals.h +++ b/drivers/mtd/nand/raw/internals.h @@ -116,6 +116,18 @@ static inline int nand_exec_op(struct nand_chip *chip, return chip->controller->ops->exec_op(chip, op, false); } +static inline bool nand_has_setup_data_iface(struct nand_chip *chip) +{ + if (!chip->controller || !chip->controller->ops || + !chip->controller->ops->setup_data_interface) + return false; + + if (chip->options & NAND_KEEP_TIMINGS) + return false; + + return true; +} + /* BBT functions */ int nand_markbad_bbt(struct nand_chip *chip, loff_t offs); int nand_isreserved_bbt(struct nand_chip *chip, loff_t offs); diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 2e8257fe7d00..b7b4d9b14da1 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -2506,6 +2506,7 @@ static int marvell_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops marvell_nand_controller_ops = { .attach_chip = marvell_nand_attach_chip, .exec_op = marvell_nfc_exec_op, + .setup_data_interface = marvell_nfc_setup_data_interface, }; static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, @@ -2629,7 +2630,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, nand_set_flash_node(chip, np); if (!of_property_read_bool(np, "marvell,nand-keep-config")) - chip->setup_data_interface = marvell_nfc_setup_data_interface; + chip->options |= NAND_KEEP_TIMINGS; mtd = nand_to_mtd(chip); mtd->dev.parent = dev; diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index ce124f8c02cd..b6b4602f5132 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -1288,6 +1288,7 @@ static int mtk_nfc_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops mtk_nfc_controller_ops = { .attach_chip = mtk_nfc_attach_chip, + .setup_data_interface = mtk_nfc_setup_data_interface, }; static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, @@ -1339,7 +1340,6 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, nand->legacy.read_byte = mtk_nfc_read_byte; nand->legacy.read_buf = mtk_nfc_read_buf; nand->legacy.cmd_ctrl = mtk_nfc_cmd_ctrl; - nand->setup_data_interface = mtk_nfc_setup_data_interface; /* set default mode in case dt entry is missing */ nand->ecc.mode = NAND_ECC_HW; diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c index c00b1d408a04..9b75d894cb74 100644 --- a/drivers/mtd/nand/raw/mxc_nand.c +++ b/drivers/mtd/nand/raw/mxc_nand.c @@ -1738,8 +1738,17 @@ static int mxcnd_attach_chip(struct nand_chip *chip) return 0; } +static int mxcnd_setup_data_interface(struct nand_chip *chip, int chipnr, + const struct nand_data_interface *conf) +{ + struct mxc_nand_host *host = nand_get_controller_data(chip); + + return host->devtype_data->setup_data_interface(chip, chipnr, conf); +} + static const struct nand_controller_ops mxcnd_controller_ops = { .attach_chip = mxcnd_attach_chip, + .setup_data_interface = mxcnd_setup_data_interface, }; static int mxcnd_probe(struct platform_device *pdev) @@ -1800,7 +1809,8 @@ static int mxcnd_probe(struct platform_device *pdev) if (err < 0) return err; - this->setup_data_interface = host->devtype_data->setup_data_interface; + if (!host->devtype_data->setup_data_interface) + this->options |= NAND_KEEP_TIMINGS; if (host->devtype_data->needs_ip) { res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index eabef6a3857e..3fc5c00f8dba 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -807,7 +807,7 @@ static int nand_reset_data_interface(struct nand_chip *chip, int chipnr) { int ret; - if (!chip->setup_data_interface) + if (!nand_has_setup_data_iface(chip)) return 0; /* @@ -825,7 +825,8 @@ static int nand_reset_data_interface(struct nand_chip *chip, int chipnr) */ onfi_fill_data_interface(chip, NAND_SDR_IFACE, 0); - ret = chip->setup_data_interface(chip, chipnr, &chip->data_interface); + ret = chip->controller->ops->setup_data_interface(chip, chipnr, + &chip->data_interface); if (ret) pr_err("Failed to configure data interface to SDR timing mode 0\n"); @@ -852,7 +853,7 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr) }; int ret; - if (!chip->setup_data_interface) + if (!nand_has_setup_data_iface(chip)) return 0; /* Change the mode on the chip side (if supported by the NAND chip) */ @@ -866,7 +867,8 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr) } /* Change the mode on the controller side */ - ret = chip->setup_data_interface(chip, chipnr, &chip->data_interface); + ret = chip->controller->ops->setup_data_interface(chip, chipnr, + &chip->data_interface); if (ret) return ret; @@ -921,7 +923,7 @@ static int nand_init_data_interface(struct nand_chip *chip) { int modes, mode, ret; - if (!chip->setup_data_interface) + if (!nand_has_setup_data_iface(chip)) return 0; /* @@ -947,7 +949,7 @@ static int nand_init_data_interface(struct nand_chip *chip) * Pass NAND_DATA_IFACE_CHECK_ONLY to only check if the * controller supports the requested timings. */ - ret = chip->setup_data_interface(chip, + ret = chip->controller->ops->setup_data_interface(chip, NAND_DATA_IFACE_CHECK_ONLY, &chip->data_interface); if (!ret) { diff --git a/drivers/mtd/nand/raw/nand_legacy.c b/drivers/mtd/nand/raw/nand_legacy.c index 47364237861e..43575943f13b 100644 --- a/drivers/mtd/nand/raw/nand_legacy.c +++ b/drivers/mtd/nand/raw/nand_legacy.c @@ -364,7 +364,7 @@ static void nand_ccs_delay(struct nand_chip *chip) * Wait tCCS_min if it is correctly defined, otherwise wait 500ns * (which should be safe for all NANDs). */ - if (chip->setup_data_interface) + if (nand_has_setup_data_iface(chip)) ndelay(chip->data_interface.timings.sdr.tCCS_min / 1000); else ndelay(500); diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c index a8905463701a..adc7a196e383 100644 --- a/drivers/mtd/nand/raw/s3c2410.c +++ b/drivers/mtd/nand/raw/s3c2410.c @@ -876,8 +876,8 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, * let's keep behavior unchanged for legacy boards booting via pdata and * auto-detect timings only when booting with a device tree. */ - if (np) - chip->setup_data_interface = s3c2410_nand_setup_data_interface; + if (!np) + chip->options |= NAND_KEEP_TIMINGS; switch (info->cpu_type) { case TYPE_S3C2410: @@ -1011,6 +1011,7 @@ static int s3c2410_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops s3c24xx_nand_controller_ops = { .attach_chip = s3c2410_nand_attach_chip, + .setup_data_interface = s3c2410_nand_setup_data_interface, }; static const struct of_device_id s3c24xx_nand_dt_ids[] = { diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c index e489a6ff57d7..a5c83cbe4897 100644 --- a/drivers/mtd/nand/raw/sunxi_nand.c +++ b/drivers/mtd/nand/raw/sunxi_nand.c @@ -1847,6 +1847,7 @@ static int sunxi_nand_attach_chip(struct nand_chip *nand) static const struct nand_controller_ops sunxi_nand_controller_ops = { .attach_chip = sunxi_nand_attach_chip, + .setup_data_interface = sunxi_nfc_setup_data_interface, }; static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc, @@ -1927,7 +1928,6 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc, nand->legacy.read_buf = sunxi_nfc_read_buf; nand->legacy.write_buf = sunxi_nfc_write_buf; nand->legacy.read_byte = sunxi_nfc_read_byte; - nand->setup_data_interface = sunxi_nfc_setup_data_interface; mtd = nand_to_mtd(nand); mtd->dev.parent = dev; diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c index ebca4579c033..cb3beda88789 100644 --- a/drivers/mtd/nand/raw/tango_nand.c +++ b/drivers/mtd/nand/raw/tango_nand.c @@ -530,6 +530,7 @@ static int tango_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops tango_controller_ops = { .attach_chip = tango_attach_chip, + .setup_data_interface = tango_set_timings, }; static int chip_init(struct device *dev, struct device_node *np) @@ -570,7 +571,6 @@ static int chip_init(struct device *dev, struct device_node *np) chip->legacy.select_chip = tango_select_chip; chip->legacy.cmd_ctrl = tango_cmd_ctrl; chip->legacy.dev_ready = tango_dev_ready; - chip->setup_data_interface = tango_set_timings; chip->options = NAND_USE_BOUNCE_BUFFER | NAND_NO_SUBPAGE_WRITE | NAND_WAIT_TCCS; diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index 2fe6de09f4ff..13be32c38194 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -1051,6 +1051,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops tegra_nand_controller_ops = { .attach_chip = &tegra_nand_attach_chip, .exec_op = tegra_nand_exec_op, + .setup_data_interface = tegra_nand_setup_data_interface, }; static int tegra_nand_chips_init(struct device *dev, @@ -1113,7 +1114,6 @@ static int tegra_nand_chips_init(struct device *dev, mtd->name = "tegra_nand"; chip->options = NAND_NO_SUBPAGE_WRITE | NAND_USE_BOUNCE_BUFFER; - chip->setup_data_interface = tegra_nand_setup_data_interface; ret = nand_scan(chip, 1); if (ret) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 297b40c56403..f50f40643895 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -203,6 +203,13 @@ enum nand_ecc_algo { */ #define NAND_IS_BOOT_MEDIUM 0x00400000 +/* + * Do not try to tweak the timings at runtime. This is needed when the + * controller initializes the timings on itself or when it relies on + * configuration done by the bootloader. + */ +#define NAND_KEEP_TIMINGS 0x00800000 + /* Cell info constants */ #define NAND_CI_CHIPNR_MSK 0x03 #define NAND_CI_CELLTYPE_MSK 0x0C @@ -871,6 +878,11 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * This method replaces chip->legacy.cmdfunc(), * chip->legacy.{read,write}_{buf,byte,word}(), * chip->legacy.dev_ready() and chip->legacy.waifunc(). + * @setup_data_interface: setup the data interface and timing. If + * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this + * means the configuration should not be applied but + * only checked. + * This hook is optional. */ struct nand_controller_ops { int (*attach_chip)(struct nand_chip *chip); @@ -878,6 +890,8 @@ struct nand_controller_ops { int (*exec_op)(struct nand_chip *chip, const struct nand_operation *op, bool check_only); + int (*setup_data_interface)(struct nand_chip *chip, int chipnr, + const struct nand_data_interface *conf); }; /** @@ -1019,10 +1033,6 @@ struct nand_legacy { * cur_cs < numchips. NAND Controller drivers should not * modify this value, but they're allowed to read it. * @read_retries: [INTERN] the number of read retry modes supported - * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If - * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this - * means the configuration should not be applied but - * only checked. * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash * lookup. @@ -1044,8 +1054,6 @@ struct nand_chip { struct nand_legacy legacy; int (*setup_read_retry)(struct nand_chip *chip, int retry_mode); - int (*setup_data_interface)(struct nand_chip *chip, int chipnr, - const struct nand_data_interface *conf); unsigned int options; unsigned int bbt_options; -- cgit v1.2.3 From 7b6a9b28ecf2fd2e2f5dcdb6d4fa8044b48bdb74 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 20 Nov 2018 10:02:39 +0100 Subject: mtd: rawnand: Deprecate the dummy_controller field We try to force NAND controller drivers to properly separate the NAND controller object from the NAND chip one, so let's deprecate the dummy controller object embedded in nand_chip to encourage them to create their own instance. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/cafe_nand.c | 2 +- drivers/mtd/nand/raw/davinci_nand.c | 2 +- drivers/mtd/nand/raw/denali.c | 2 +- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- drivers/mtd/nand/raw/hisi504_nand.c | 2 +- drivers/mtd/nand/raw/jz4740_nand.c | 2 +- drivers/mtd/nand/raw/lpc32xx_mlc.c | 2 +- drivers/mtd/nand/raw/lpc32xx_slc.c | 2 +- drivers/mtd/nand/raw/mxc_nand.c | 2 +- drivers/mtd/nand/raw/nand_base.c | 4 ++-- drivers/mtd/nand/raw/nandsim.c | 2 +- drivers/mtd/nand/raw/sh_flctl.c | 2 +- drivers/mtd/nand/raw/sm_common.c | 2 +- include/linux/mtd/rawnand.h | 6 +++--- 14 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c index a85f5fa5c66d..b1c0cd6b49da 100644 --- a/drivers/mtd/nand/raw/cafe_nand.c +++ b/drivers/mtd/nand/raw/cafe_nand.c @@ -780,7 +780,7 @@ static int cafe_nand_probe(struct pci_dev *pdev, cafe->usedma = 0; /* Scan to find existence of the device */ - cafe->nand.dummy_controller.ops = &cafe_nand_controller_ops; + cafe->nand.legacy.dummy_controller.ops = &cafe_nand_controller_ops; err = nand_scan(&cafe->nand, 2); if (err) goto out_irq; diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c index f430aeb917e8..27bafa5e1ca1 100644 --- a/drivers/mtd/nand/raw/davinci_nand.c +++ b/drivers/mtd/nand/raw/davinci_nand.c @@ -801,7 +801,7 @@ static int nand_davinci_probe(struct platform_device *pdev) spin_unlock_irq(&davinci_nand_lock); /* Scan to find existence of the device(s) */ - info->chip.dummy_controller.ops = &davinci_nand_controller_ops; + info->chip.legacy.dummy_controller.ops = &davinci_nand_controller_ops; ret = nand_scan(&info->chip, pdata->mask_chipsel ? 2 : 1); if (ret < 0) { dev_dbg(&pdev->dev, "no NAND chip(s) found\n"); diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index e1c3099d705a..eebac35304c6 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1325,7 +1325,7 @@ int denali_init(struct denali_nand_info *denali) if (denali->clk_rate && denali->clk_x_rate) chip->options |= NAND_KEEP_TIMINGS; - chip->dummy_controller.ops = &denali_controller_ops; + chip->legacy.dummy_controller.ops = &denali_controller_ops; ret = nand_scan(chip, denali->max_banks); if (ret) goto disable_irq; diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 25f9fe79796a..ed405c9434fe 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -1931,7 +1931,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this) if (ret) goto err_out; - chip->dummy_controller.ops = &gpmi_nand_controller_ops; + chip->legacy.dummy_controller.ops = &gpmi_nand_controller_ops; ret = nand_scan(chip, GPMI_IS_MX6(this) ? 2 : 1); if (ret) goto err_out; diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c index e41c13499fd5..f3f9aa160cff 100644 --- a/drivers/mtd/nand/raw/hisi504_nand.c +++ b/drivers/mtd/nand/raw/hisi504_nand.c @@ -799,7 +799,7 @@ static int hisi_nfc_probe(struct platform_device *pdev) return ret; } - chip->dummy_controller.ops = &hisi_nfc_controller_ops; + chip->legacy.dummy_controller.ops = &hisi_nfc_controller_ops; ret = nand_scan(chip, max_chips); if (ret) return ret; diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c index 0bcfdd3d66a8..f92ae5aa2a54 100644 --- a/drivers/mtd/nand/raw/jz4740_nand.c +++ b/drivers/mtd/nand/raw/jz4740_nand.c @@ -428,7 +428,7 @@ static int jz_nand_probe(struct platform_device *pdev) chip->legacy.chip_delay = 50; chip->legacy.cmd_ctrl = jz_nand_cmd_ctrl; chip->legacy.select_chip = jz_nand_select_chip; - chip->dummy_controller.ops = &jz_nand_controller_ops; + chip->legacy.dummy_controller.ops = &jz_nand_controller_ops; if (nand->busy_gpio) chip->legacy.dev_ready = jz_nand_dev_ready; diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c index abbb655fe154..086964f8d424 100644 --- a/drivers/mtd/nand/raw/lpc32xx_mlc.c +++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c @@ -799,7 +799,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) * Scan to find existence of the device and get the type of NAND device: * SMALL block or LARGE block. */ - nand_chip->dummy_controller.ops = &lpc32xx_nand_controller_ops; + nand_chip->legacy.dummy_controller.ops = &lpc32xx_nand_controller_ops; res = nand_scan(nand_chip, 1); if (res) goto free_irq; diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c index f2f2cdbb9d04..a2c5fdc875bd 100644 --- a/drivers/mtd/nand/raw/lpc32xx_slc.c +++ b/drivers/mtd/nand/raw/lpc32xx_slc.c @@ -924,7 +924,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) } /* Find NAND device */ - chip->dummy_controller.ops = &lpc32xx_nand_controller_ops; + chip->legacy.dummy_controller.ops = &lpc32xx_nand_controller_ops; res = nand_scan(chip, 1); if (res) goto release_dma; diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c index 9b75d894cb74..59554c187e01 100644 --- a/drivers/mtd/nand/raw/mxc_nand.c +++ b/drivers/mtd/nand/raw/mxc_nand.c @@ -1891,7 +1891,7 @@ static int mxcnd_probe(struct platform_device *pdev) } /* Scan the NAND device */ - this->dummy_controller.ops = &mxcnd_controller_ops; + this->legacy.dummy_controller.ops = &mxcnd_controller_ops; err = nand_scan(this, is_imx25_nfc(host) ? 4 : 1); if (err) goto escan; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 3fc5c00f8dba..cca4b24d2ffa 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -4419,9 +4419,9 @@ static void nand_shutdown(struct mtd_info *mtd) /* Set default functions */ static void nand_set_defaults(struct nand_chip *chip) { - /* If no controller is provided, use the dummy one. */ + /* If no controller is provided, use the dummy, legacy one. */ if (!chip->controller) { - chip->controller = &chip->dummy_controller; + chip->controller = &chip->legacy.dummy_controller; nand_controller_init(chip->controller); } diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index c452819f6123..2b3047d53558 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -2304,7 +2304,7 @@ static int __init ns_init_module(void) if ((retval = parse_gravepages()) != 0) goto error; - chip->dummy_controller.ops = &ns_controller_ops; + chip->legacy.dummy_controller.ops = &ns_controller_ops; retval = nand_scan(chip, 1); if (retval) { NS_ERR("Could not scan NAND Simulator device\n"); diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c index 7ab50bc6ad3a..cf6b1be1cf9c 100644 --- a/drivers/mtd/nand/raw/sh_flctl.c +++ b/drivers/mtd/nand/raw/sh_flctl.c @@ -1183,7 +1183,7 @@ static int flctl_probe(struct platform_device *pdev) flctl_setup_dma(flctl); - nand->dummy_controller.ops = &flctl_nand_controller_ops; + nand->legacy.dummy_controller.ops = &flctl_nand_controller_ops; ret = nand_scan(nand, 1); if (ret) goto err_chip; diff --git a/drivers/mtd/nand/raw/sm_common.c b/drivers/mtd/nand/raw/sm_common.c index 6f063ef57640..409d036858dc 100644 --- a/drivers/mtd/nand/raw/sm_common.c +++ b/drivers/mtd/nand/raw/sm_common.c @@ -194,7 +194,7 @@ int sm_register_device(struct mtd_info *mtd, int smartmedia) chip->options |= NAND_SKIP_BBTSCAN; /* Scan for card properties */ - chip->dummy_controller.ops = &sm_controller_ops; + chip->legacy.dummy_controller.ops = &sm_controller_ops; flash_ids = smartmedia ? nand_smartmedia_flash_ids : nand_xd_flash_ids; ret = nand_scan_with_ids(chip, 1, flash_ids); if (ret) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index f50f40643895..33e240acdc6d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -941,6 +941,8 @@ static inline void nand_controller_init(struct nand_controller *nfc) * @get_features: get the NAND chip features * @chip_delay: chip dependent delay for transferring data from array to read * regs (tR). + * @dummy_controller: dummy controller implementation for drivers that can + * only control a single chip * * If you look at this structure you're already wrong. These fields/hooks are * all deprecated. @@ -966,6 +968,7 @@ struct nand_legacy { int (*get_features)(struct nand_chip *chip, int feature_addr, u8 *subfeature_para); int chip_delay; + struct nand_controller dummy_controller; }; /** @@ -980,8 +983,6 @@ struct nand_legacy { * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure * @buf_align: minimum buffer alignment required by a platform - * @dummy_controller: dummy controller implementation for drivers that can - * only control a single chip * @state: [INTERN] the current state of the NAND device * @oob_poi: "poison value buffer," used for laying out OOB data * before writing @@ -1094,7 +1095,6 @@ struct nand_chip { struct nand_ecc_ctrl ecc; unsigned long buf_align; - struct nand_controller dummy_controller; uint8_t *bbt; struct nand_bbt_descr *bbt_td; -- cgit v1.2.3 From c93c613214ac70c87beab5422a60077bf126b855 Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Wed, 28 Nov 2018 21:07:25 +0800 Subject: mtd: spinand: add support for GigaDevice GD5FxGQ4xA Add support for GigaDevice GD5F1G/2G/4GQ4xA SPI NAND. Signed-off-by: Chuanhong Guo Reviewed-by: Frieder Schrempf Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/gigadevice.c | 148 ++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/gigadevice.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index be5f73512ece..753125082640 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -spinand-objs := core.o macronix.o micron.o toshiba.o winbond.o +spinand-objs := core.o gigadevice.o macronix.o micron.o toshiba.o winbond.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 87bdf2a7b724..479c2f2cf17f 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -764,6 +764,7 @@ static const struct nand_ops spinand_ops = { }; static const struct spinand_manufacturer *spinand_manufacturers[] = { + &gigadevice_spinand_manufacturer, ¯onix_spinand_manufacturer, µn_spinand_manufacturer, &toshiba_spinand_manufacturer, diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c new file mode 100644 index 000000000000..e4141c20947a --- /dev/null +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: + * Chuanhong Guo + */ + +#include +#include +#include + +#define SPINAND_MFR_GIGADEVICE 0xC8 +#define GD5FXGQ4XA_STATUS_ECC_1_7_BITFLIPS (1 << 4) +#define GD5FXGQ4XA_STATUS_ECC_8_BITFLIPS (3 << 4) + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), + SPINAND_PROG_LOAD(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), + SPINAND_PROG_LOAD(false, 0, NULL, 0)); + +static int gd5fxgq4xa_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + region->offset = (16 * section) + 8; + region->length = 8; + + return 0; +} + +static int gd5fxgq4xa_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section > 3) + return -ERANGE; + + if (section) { + region->offset = 16 * section; + region->length = 8; + } else { + /* section 0 has one byte reserved for bad block mark */ + region->offset = 1; + region->length = 7; + } + return 0; +} + +static int gd5fxgq4xa_ecc_get_status(struct spinand_device *spinand, + u8 status) +{ + switch (status & STATUS_ECC_MASK) { + case STATUS_ECC_NO_BITFLIPS: + return 0; + + case GD5FXGQ4XA_STATUS_ECC_1_7_BITFLIPS: + /* 1-7 bits are flipped. return the maximum. */ + return 7; + + case GD5FXGQ4XA_STATUS_ECC_8_BITFLIPS: + return 8; + + case STATUS_ECC_UNCOR_ERROR: + return -EBADMSG; + + default: + break; + } + + return -EINVAL; +} + +static const struct mtd_ooblayout_ops gd5fxgq4xa_ooblayout = { + .ecc = gd5fxgq4xa_ooblayout_ecc, + .free = gd5fxgq4xa_ooblayout_free, +}; + +static const struct spinand_info gigadevice_spinand_table[] = { + SPINAND_INFO("GD5F1GQ4xA", 0xF1, + NAND_MEMORG(1, 2048, 64, 64, 1024, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + 0, + SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, + gd5fxgq4xa_ecc_get_status)), + SPINAND_INFO("GD5F2GQ4xA", 0xF2, + NAND_MEMORG(1, 2048, 64, 64, 2048, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + 0, + SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, + gd5fxgq4xa_ecc_get_status)), + SPINAND_INFO("GD5F4GQ4xA", 0xF4, + NAND_MEMORG(1, 2048, 64, 64, 4096, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + 0, + SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, + gd5fxgq4xa_ecc_get_status)), +}; + +static int gigadevice_spinand_detect(struct spinand_device *spinand) +{ + u8 *id = spinand->id.data; + int ret; + + /* + * For GD NANDs, There is an address byte needed to shift in before IDs + * are read out, so the first byte in raw_id is dummy. + */ + if (id[1] != SPINAND_MFR_GIGADEVICE) + return 0; + + ret = spinand_match_and_init(spinand, gigadevice_spinand_table, + ARRAY_SIZE(gigadevice_spinand_table), + id[2]); + if (ret) + return ret; + + return 1; +} + +static const struct spinand_manufacturer_ops gigadevice_spinand_manuf_ops = { + .detect = gigadevice_spinand_detect, +}; + +const struct spinand_manufacturer gigadevice_spinand_manufacturer = { + .id = SPINAND_MFR_GIGADEVICE, + .name = "GigaDevice", + .ops = &gigadevice_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 816c4b00abca..b92e2aa955b6 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -194,6 +194,7 @@ struct spinand_manufacturer { }; /* SPI NAND manufacturers */ +extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; extern const struct spinand_manufacturer toshiba_spinand_manufacturer; -- cgit v1.2.3 From b312d8ca3a7cebe19941d969a51f2b7f899b81e2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 14 Nov 2018 16:11:06 +0100 Subject: dma-buf: make fence sequence numbers 64 bit v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a lot of use cases we need 64bit sequence numbers. Currently drivers overload the dma_fence structure to store the additional bits. Stop doing that and make the sequence number in the dma_fence always 64bit. For compatibility with hardware which can do only 32bit sequences the comparisons in __dma_fence_is_later only takes the lower 32bits as significant when the upper 32bits are all zero. v2: change the logic in __dma_fence_is_later Signed-off-by: Christian König Reviewed-by: Chunming Zhou Link: https://patchwork.freedesktop.org/patch/266927/ --- drivers/dma-buf/dma-fence.c | 2 +- drivers/dma-buf/sw_sync.c | 2 +- drivers/dma-buf/sync_file.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +- drivers/gpu/drm/i915/i915_sw_fence.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 4 ++-- include/linux/dma-fence.h | 22 +++++++++++++++------- 8 files changed, 24 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 136ec04d683f..3aa8733f832a 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -649,7 +649,7 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout); */ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, - spinlock_t *lock, u64 context, unsigned seqno) + spinlock_t *lock, u64 context, u64 seqno) { BUG_ON(!lock); BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 53c1d6d36a64..32dcf7b4c935 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -172,7 +172,7 @@ static bool timeline_fence_enable_signaling(struct dma_fence *fence) static void timeline_fence_value_str(struct dma_fence *fence, char *str, int size) { - snprintf(str, size, "%d", fence->seqno); + snprintf(str, size, "%lld", fence->seqno); } static void timeline_fence_timeline_value_str(struct dma_fence *fence, diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 35dd06479867..4f6305ca52c8 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -144,7 +144,7 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len) } else { struct dma_fence *fence = sync_file->fence; - snprintf(buf, len, "%s-%s%llu-%d", + snprintf(buf, len, "%s-%s%llu-%lld", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), fence->context, @@ -258,7 +258,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, i_b++; } else { - if (pt_a->seqno - pt_b->seqno <= INT_MAX) + if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno)) add_fence(fences, &i, pt_a); else add_fence(fences, &i, pt_b); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 12f2bf97611f..bfaf5c6323be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -388,7 +388,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, soffset, eoffset, eoffset - soffset); if (i->fence) - seq_printf(m, " protected by 0x%08x on context %llu", + seq_printf(m, " protected by 0x%016llx on context %llu", i->fence->seqno, i->fence->context); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 6dbeed079ae5..11bcdabd5177 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -393,7 +393,7 @@ static void timer_i915_sw_fence_wake(struct timer_list *t) if (!fence) return; - pr_notice("Asynchronous wait on fence %s:%s:%x timed out (hint:%pS)\n", + pr_notice("Asynchronous wait on fence %s:%s:%llx timed out (hint:%pS)\n", cb->dma->ops->get_driver_name(cb->dma), cb->dma->ops->get_timeline_name(cb->dma), cb->dma->seqno, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 759c0fd58f8c..dfafa79171df 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1239,7 +1239,7 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", + drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n", prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index c1c420afe2dd..eb17c0cd3727 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -53,13 +53,13 @@ static void vgem_fence_release(struct dma_fence *base) static void vgem_fence_value_str(struct dma_fence *fence, char *str, int size) { - snprintf(str, size, "%u", fence->seqno); + snprintf(str, size, "%llu", fence->seqno); } static void vgem_fence_timeline_value_str(struct dma_fence *fence, char *str, int size) { - snprintf(str, size, "%u", + snprintf(str, size, "%llu", dma_fence_is_signaled(fence) ? fence->seqno : 0); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 999e4b104410..6b788467b2e3 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -77,7 +77,7 @@ struct dma_fence { struct list_head cb_list; spinlock_t *lock; u64 context; - unsigned seqno; + u64 seqno; unsigned long flags; ktime_t timestamp; int error; @@ -244,7 +244,7 @@ struct dma_fence_ops { }; void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, - spinlock_t *lock, u64 context, unsigned seqno); + spinlock_t *lock, u64 context, u64 seqno); void dma_fence_release(struct kref *kref); void dma_fence_free(struct dma_fence *fence); @@ -414,9 +414,17 @@ dma_fence_is_signaled(struct dma_fence *fence) * Returns true if f1 is chronologically later than f2. Both fences must be * from the same context, since a seqno is not common across contexts. */ -static inline bool __dma_fence_is_later(u32 f1, u32 f2) +static inline bool __dma_fence_is_later(u64 f1, u64 f2) { - return (int)(f1 - f2) > 0; + /* This is for backward compatibility with drivers which can only handle + * 32bit sequence numbers. Use a 64bit compare when any of the higher + * bits are none zero, otherwise use a 32bit compare with wrap around + * handling. + */ + if (upper_32_bits(f1) || upper_32_bits(f2)) + return f1 > f2; + + return (int)(lower_32_bits(f1) - lower_32_bits(f2)) > 0; } /** @@ -548,21 +556,21 @@ u64 dma_fence_context_alloc(unsigned num); do { \ struct dma_fence *__ff = (f); \ if (IS_ENABLED(CONFIG_DMA_FENCE_TRACE)) \ - pr_info("f %llu#%u: " fmt, \ + pr_info("f %llu#%llu: " fmt, \ __ff->context, __ff->seqno, ##args); \ } while (0) #define DMA_FENCE_WARN(f, fmt, args...) \ do { \ struct dma_fence *__ff = (f); \ - pr_warn("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ + pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\ ##args); \ } while (0) #define DMA_FENCE_ERR(f, fmt, args...) \ do { \ struct dma_fence *__ff = (f); \ - pr_err("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ + pr_err("f %llu#%llu: " fmt, __ff->context, __ff->seqno, \ ##args); \ } while (0) -- cgit v1.2.3 From cb03f94ffb070b13bc0fa58b4ef4fdb558418d27 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 30 Nov 2018 10:04:08 +1100 Subject: fs/locks: merge posix_unblock_lock() and locks_delete_block() posix_unblock_lock() is not specific to posix locks, and behaves nearly identically to locks_delete_block() - the former returning a status while the later doesn't. So discard posix_unblock_lock() and use locks_delete_block() instead, after giving that function an appropriate return value. Signed-off-by: NeilBrown Reviewed-by: J. Bruce Fields Signed-off-by: Jeff Layton --- fs/cifs/file.c | 2 +- fs/lockd/svclock.c | 2 +- fs/locks.c | 38 ++++++++++++++------------------------ fs/nfsd/nfs4state.c | 6 +++--- include/linux/fs.h | 4 ++-- 5 files changed, 21 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d7ed895e05d1..94c3575e850c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1106,7 +1106,7 @@ try_again: rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); if (!rc) goto try_again; - posix_unblock_lock(flock); + locks_delete_block(flock); } return rc; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 74330daeab71..ea719cdd6a36 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -276,7 +276,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block) dprintk("lockd: unlinking block %p...\n", block); /* Remove block from list */ - status = posix_unblock_lock(&block->b_call->a_args.lock.fl); + status = locks_delete_block(&block->b_call->a_args.lock.fl); nlmsvc_remove_block(block); return status; } diff --git a/fs/locks.c b/fs/locks.c index 4d6a5a3f903a..75a03a9d666e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -748,8 +748,16 @@ static void __locks_wake_up_blocks(struct file_lock *blocker) } } -static void locks_delete_block(struct file_lock *waiter) +/** + * locks_delete_lock - stop waiting for a file lock + * @waiter: the lock which was waiting + * + * lockd/nfsd need to disconnect the lock while working on it. + */ +int locks_delete_block(struct file_lock *waiter) { + int status = -ENOENT; + /* * If fl_blocker is NULL, it won't be set again as this thread * "owns" the lock and is the only one that might try to claim @@ -763,12 +771,16 @@ static void locks_delete_block(struct file_lock *waiter) */ if (waiter->fl_blocker == NULL && list_empty(&waiter->fl_blocked_requests)) - return; + return status; spin_lock(&blocked_lock_lock); + if (waiter->fl_blocker) + status = 0; __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); spin_unlock(&blocked_lock_lock); + return status; } +EXPORT_SYMBOL(locks_delete_block); /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in @@ -2675,28 +2687,6 @@ void locks_remove_file(struct file *filp) spin_unlock(&ctx->flc_lock); } -/** - * posix_unblock_lock - stop waiting for a file lock - * @waiter: the lock which was waiting - * - * lockd needs to block waiting for locks. - */ -int -posix_unblock_lock(struct file_lock *waiter) -{ - int status = -ENOENT; - - spin_lock(&blocked_lock_lock); - if (waiter->fl_blocker) { - __locks_wake_up_blocks(waiter); - __locks_delete_block(waiter); - status = 0; - } - spin_unlock(&blocked_lock_lock); - return status; -} -EXPORT_SYMBOL(posix_unblock_lock); - /** * vfs_cancel_lock - file byte range unblock lock * @filp: The file to apply the unblock to diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f093fbe47133..a334828723fa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -238,7 +238,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, } spin_unlock(&nn->blocked_locks_lock); if (found) - posix_unblock_lock(&found->nbl_lock); + locks_delete_block(&found->nbl_lock); return found; } @@ -293,7 +293,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - posix_unblock_lock(&nbl->nbl_lock); + locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } } @@ -4863,7 +4863,7 @@ nfs4_laundromat(struct nfsd_net *nn) nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); - posix_unblock_lock(&nbl->nbl_lock); + locks_delete_block(&nbl->nbl_lock); free_blocked_lock(nbl); } out: diff --git a/include/linux/fs.h b/include/linux/fs.h index 16df3a7df378..26a8607b3c3c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1124,7 +1124,7 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); -extern int posix_unblock_lock(struct file_lock *); +extern int locks_delete_block(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); @@ -1214,7 +1214,7 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } -static inline int posix_unblock_lock(struct file_lock *waiter) +static inline int locks_delete_block(struct file_lock *waiter) { return -ENOENT; } -- cgit v1.2.3 From 08861d33d680838753f1f9d3ba9480d3651b764d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 19 Sep 2018 13:39:26 +0100 Subject: preempt: Move PREEMPT_NEED_RESCHED definition into arch code PREEMPT_NEED_RESCHED is never used directly, so move it into the arch code where it can potentially be implemented using either a different bit in the preempt count or as an entirely separate entity. Cc: Robert Love Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Martin Schwidefsky Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon --- arch/s390/include/asm/preempt.h | 2 ++ arch/x86/include/asm/preempt.h | 3 +++ include/linux/preempt.h | 3 --- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 23a14d187fb1..b5ea9e14c017 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -8,6 +8,8 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x80000000 #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) static inline int preempt_count(void) diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 90cb2f36c042..99a7fa9ab0a3 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -8,6 +8,9 @@ DECLARE_PER_CPU(int, __preempt_count); +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x80000000 + /* * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such * that a decrement hitting 0 means we can and should reschedule. diff --git a/include/linux/preempt.h b/include/linux/preempt.h index c01813c3fbe9..dd92b1a93919 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -53,9 +53,6 @@ #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) -/* We use the MSB mostly because its available */ -#define PREEMPT_NEED_RESCHED 0x80000000 - #define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) /* -- cgit v1.2.3 From c53431eb696f3c64c12c00afb81048af54b61532 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:22 +1000 Subject: HID: core: store the collections as a basic tree For each collection parsed, store a pointer to the parent collection (if any). This makes it a lot easier to look up which collection(s) any given item is part of Signed-off-by: Peter Hutterer Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-core.c | 4 ++++ include/linux/hid.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 5bec9244c45b..43d488a45120 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -172,6 +172,8 @@ static int open_collection(struct hid_parser *parser, unsigned type) collection->type = type; collection->usage = usage; collection->level = parser->collection_stack_ptr - 1; + collection->parent = parser->active_collection; + parser->active_collection = collection; if (type == HID_COLLECTION_APPLICATION) parser->device->maxapplication++; @@ -190,6 +192,8 @@ static int close_collection(struct hid_parser *parser) return -EINVAL; } parser->collection_stack_ptr--; + if (parser->active_collection) + parser->active_collection = parser->active_collection->parent; return 0; } diff --git a/include/linux/hid.h b/include/linux/hid.h index a355d61940f2..fdfda898656c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -427,6 +427,7 @@ struct hid_local { */ struct hid_collection { + struct hid_collection *parent; unsigned type; unsigned usage; unsigned level; @@ -650,6 +651,7 @@ struct hid_parser { unsigned int *collection_stack; unsigned int collection_stack_ptr; unsigned int collection_stack_size; + struct hid_collection *active_collection; struct hid_device *device; unsigned int scan_flags; }; -- cgit v1.2.3 From 5a4abb36f312cf83206b1b7d1308ba47cba0b3cc Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:23 +1000 Subject: HID: core: process the Resolution Multiplier The Resolution Multiplier is a feature report that modifies the value of Usages within the same Logical Collection. If the multiplier is set to anything but 1, the hardware reports (value * multiplier) for the same amount of physical movement, i.e. the value we receive in the kernel is pre-multiplied. The hardware may either send a single (value * multiplier), or by sending multiplier as many reports with the same value, or a combination of these two options. For example, when the Microsoft Sculpt Ergonomic mouse Resolution Multiplier is set to 12, the Wheel sends out 12 for every detent but AC Pan sends out a value of 3 at 4 times the frequency. The effective multiplier is based on the physical min/max of the multiplier field, a logical min/max of [0,1] with a physical min/max of [1,8] means the multiplier is either 1 or 8. The Resolution Multiplier was introduced for high-resolution scrolling in Windows Vista and is commonly used on Microsoft mice. The recommendation for the Resolution Multiplier is to default to 1 for backwards compatibility. This patch adds an arbitrary upper limit at 255. The only known use case for the Resolution Multiplier is for scroll wheels where the multiplier has to be a fraction of 120 to work with Windows. Signed-off-by: Peter Hutterer Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-core.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/hid.h | 5 ++ 2 files changed, 175 insertions(+) (limited to 'include/linux') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 43d488a45120..f41d5fe51abe 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -294,6 +294,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign field->usage[i].collection_index = parser->local.collection_index[j]; field->usage[i].usage_index = i; + field->usage[i].resolution_multiplier = 1; } field->maxusage = usages; @@ -947,6 +948,167 @@ struct hid_report *hid_validate_values(struct hid_device *hid, } EXPORT_SYMBOL_GPL(hid_validate_values); +static int hid_calculate_multiplier(struct hid_device *hid, + struct hid_field *multiplier) +{ + int m; + __s32 v = *multiplier->value; + __s32 lmin = multiplier->logical_minimum; + __s32 lmax = multiplier->logical_maximum; + __s32 pmin = multiplier->physical_minimum; + __s32 pmax = multiplier->physical_maximum; + + /* + * "Because OS implementations will generally divide the control's + * reported count by the Effective Resolution Multiplier, designers + * should take care not to establish a potential Effective + * Resolution Multiplier of zero." + * HID Usage Table, v1.12, Section 4.3.1, p31 + */ + if (lmax - lmin == 0) + return 1; + /* + * Handling the unit exponent is left as an exercise to whoever + * finds a device where that exponent is not 0. + */ + m = ((v - lmin)/(lmax - lmin) * (pmax - pmin) + pmin); + if (unlikely(multiplier->unit_exponent != 0)) { + hid_warn(hid, + "unsupported Resolution Multiplier unit exponent %d\n", + multiplier->unit_exponent); + } + + /* There are no devices with an effective multiplier > 255 */ + if (unlikely(m == 0 || m > 255 || m < -255)) { + hid_warn(hid, "unsupported Resolution Multiplier %d\n", m); + m = 1; + } + + return m; +} + +static void hid_apply_multiplier_to_field(struct hid_device *hid, + struct hid_field *field, + struct hid_collection *multiplier_collection, + int effective_multiplier) +{ + struct hid_collection *collection; + struct hid_usage *usage; + int i; + + /* + * If multiplier_collection is NULL, the multiplier applies + * to all fields in the report. + * Otherwise, it is the Logical Collection the multiplier applies to + * but our field may be in a subcollection of that collection. + */ + for (i = 0; i < field->maxusage; i++) { + usage = &field->usage[i]; + + collection = &hid->collection[usage->collection_index]; + while (collection && collection != multiplier_collection) + collection = collection->parent; + + if (collection || multiplier_collection == NULL) + usage->resolution_multiplier = effective_multiplier; + + } +} + +static void hid_apply_multiplier(struct hid_device *hid, + struct hid_field *multiplier) +{ + struct hid_report_enum *rep_enum; + struct hid_report *rep; + struct hid_field *field; + struct hid_collection *multiplier_collection; + int effective_multiplier; + int i; + + /* + * "The Resolution Multiplier control must be contained in the same + * Logical Collection as the control(s) to which it is to be applied. + * If no Resolution Multiplier is defined, then the Resolution + * Multiplier defaults to 1. If more than one control exists in a + * Logical Collection, the Resolution Multiplier is associated with + * all controls in the collection. If no Logical Collection is + * defined, the Resolution Multiplier is associated with all + * controls in the report." + * HID Usage Table, v1.12, Section 4.3.1, p30 + * + * Thus, search from the current collection upwards until we find a + * logical collection. Then search all fields for that same parent + * collection. Those are the fields the multiplier applies to. + * + * If we have more than one multiplier, it will overwrite the + * applicable fields later. + */ + multiplier_collection = &hid->collection[multiplier->usage->collection_index]; + while (multiplier_collection && + multiplier_collection->type != HID_COLLECTION_LOGICAL) + multiplier_collection = multiplier_collection->parent; + + effective_multiplier = hid_calculate_multiplier(hid, multiplier); + + rep_enum = &hid->report_enum[HID_INPUT_REPORT]; + list_for_each_entry(rep, &rep_enum->report_list, list) { + for (i = 0; i < rep->maxfield; i++) { + field = rep->field[i]; + hid_apply_multiplier_to_field(hid, field, + multiplier_collection, + effective_multiplier); + } + } +} + +/* + * hid_setup_resolution_multiplier - set up all resolution multipliers + * + * @device: hid device + * + * Search for all Resolution Multiplier Feature Reports and apply their + * value to all matching Input items. This only updates the internal struct + * fields. + * + * The Resolution Multiplier is applied by the hardware. If the multiplier + * is anything other than 1, the hardware will send pre-multiplied events + * so that the same physical interaction generates an accumulated + * accumulated_value = value * * multiplier + * This may be achieved by sending + * - "value * multiplier" for each event, or + * - "value" but "multiplier" times as frequently, or + * - a combination of the above + * The only guarantee is that the same physical interaction always generates + * an accumulated 'value * multiplier'. + * + * This function must be called before any event processing and after + * any SetRequest to the Resolution Multiplier. + */ +void hid_setup_resolution_multiplier(struct hid_device *hid) +{ + struct hid_report_enum *rep_enum; + struct hid_report *rep; + struct hid_usage *usage; + int i, j; + + rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; + list_for_each_entry(rep, &rep_enum->report_list, list) { + for (i = 0; i < rep->maxfield; i++) { + /* Ignore if report count is out of bounds. */ + if (rep->field[i]->report_count < 1) + continue; + + for (j = 0; j < rep->field[i]->maxusage; j++) { + usage = &rep->field[i]->usage[j]; + if (usage->hid == HID_GD_RESOLUTION_MULTIPLIER) + hid_apply_multiplier(hid, + rep->field[i]); + } + } + } +} +EXPORT_SYMBOL_GPL(hid_setup_resolution_multiplier); + /** * hid_open_report - open a driver-specific device report * @@ -1043,9 +1205,17 @@ int hid_open_report(struct hid_device *device) hid_err(device, "unbalanced delimiter at end of report description\n"); goto err; } + + /* + * fetch initial values in case the device's + * default multiplier isn't the recommended 1 + */ + hid_setup_resolution_multiplier(device); + kfree(parser->collection_stack); vfree(parser); device->status |= HID_STAT_PARSED; + return 0; } } diff --git a/include/linux/hid.h b/include/linux/hid.h index fdfda898656c..fd8d860365a4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -219,6 +219,7 @@ struct hid_item { #define HID_GD_VBRZ 0x00010045 #define HID_GD_VNO 0x00010046 #define HID_GD_FEATURE 0x00010047 +#define HID_GD_RESOLUTION_MULTIPLIER 0x00010048 #define HID_GD_SYSTEM_CONTROL 0x00010080 #define HID_GD_UP 0x00010090 #define HID_GD_DOWN 0x00010091 @@ -437,6 +438,8 @@ struct hid_usage { unsigned hid; /* hid usage code */ unsigned collection_index; /* index into collection array */ unsigned usage_index; /* index into usage array */ + __s8 resolution_multiplier;/* Effective Resolution Multiplier + (HUT v1.12, 4.3.1), default: 1 */ /* hidinput data */ __u16 code; /* input driver code */ __u8 type; /* input driver type */ @@ -894,6 +897,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid, unsigned int type, unsigned int id, unsigned int field_index, unsigned int report_counts); + +void hid_setup_resolution_multiplier(struct hid_device *hid); int hid_open_report(struct hid_device *device); int hid_check_keys_pressed(struct hid_device *hid); int hid_connect(struct hid_device *hid, unsigned int connect_mask); -- cgit v1.2.3 From 2dc702c991e3774af9d7ce410eef410ca9e2357e Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:24 +1000 Subject: HID: input: use the Resolution Multiplier for high-resolution scrolling Windows uses a magic number of 120 for a wheel click. High-resolution scroll wheels are supposed to use a fraction of 120 to signal smaller scroll steps. This is implemented by the Resolution Multiplier in the device itself. If the multiplier is present in the report descriptor, set it to the logical max and then use the resolution multiplier to calculate the high-resolution events. This is the recommendation by Microsoft, see http://msdn.microsoft.com/en-us/windows/hardware/gg487477.aspx Note that all mice encountered so far have a logical min/max of 0/1, so it's a binary "yes or no" to high-res scrolling anyway. To make userspace simpler, always enable the REL_WHEEL_HI_RES bit. Where the device doesn't support high-resolution scrolling, the value for the high-res data will simply be a multiple of 120 every time. For userspace, if REL_WHEEL_HI_RES is available that is the one to be used. Potential side-effect: a device with a Resolution Multiplier applying to other Input items will have those items set to the logical max as well. This cannot easily be worked around but it is doubtful such devices exist. Signed-off-by: Peter Hutterer Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-input.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/hid.h | 3 ++ 2 files changed, 108 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d6fab5798487..59a5608b8dc0 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -712,7 +712,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel map_abs_clear(usage->hid & 0xf); break; - case HID_GD_SLIDER: case HID_GD_DIAL: case HID_GD_WHEEL: + case HID_GD_WHEEL: + if (field->flags & HID_MAIN_ITEM_RELATIVE) { + set_bit(REL_WHEEL, input->relbit); + map_rel(REL_WHEEL_HI_RES); + } else { + map_abs(usage->hid & 0xf); + } + break; + case HID_GD_SLIDER: case HID_GD_DIAL: if (field->flags & HID_MAIN_ITEM_RELATIVE) map_rel(usage->hid & 0xf); else @@ -1012,7 +1020,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x22f: map_key_clear(KEY_ZOOMRESET); break; case 0x233: map_key_clear(KEY_SCROLLUP); break; case 0x234: map_key_clear(KEY_SCROLLDOWN); break; - case 0x238: map_rel(REL_HWHEEL); break; + case 0x238: /* AC Pan */ + set_bit(REL_HWHEEL, input->relbit); + map_rel(REL_HWHEEL_HI_RES); + break; case 0x23d: map_key_clear(KEY_EDIT); break; case 0x25f: map_key_clear(KEY_CANCEL); break; case 0x269: map_key_clear(KEY_INSERT); break; @@ -1200,6 +1211,38 @@ ignore: } +static void hidinput_handle_scroll(struct hid_usage *usage, + struct input_dev *input, + __s32 value) +{ + int code; + int hi_res, lo_res; + + if (value == 0) + return; + + if (usage->code == REL_WHEEL_HI_RES) + code = REL_WHEEL; + else + code = REL_HWHEEL; + + /* + * Windows reports one wheel click as value 120. Where a high-res + * scroll wheel is present, a fraction of 120 is reported instead. + * Our REL_WHEEL_HI_RES axis does the same because all HW must + * adhere to the 120 expectation. + */ + hi_res = value * 120/usage->resolution_multiplier; + + usage->wheel_accumulated += hi_res; + lo_res = usage->wheel_accumulated/120; + if (lo_res) + usage->wheel_accumulated -= lo_res * 120; + + input_event(input, EV_REL, code, lo_res); + input_event(input, EV_REL, usage->code, hi_res); +} + void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct input_dev *input; @@ -1262,6 +1305,12 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct if ((usage->type == EV_KEY) && (usage->code == 0)) /* Key 0 is "unassigned", not KEY_UNKNOWN */ return; + if ((usage->type == EV_REL) && (usage->code == REL_WHEEL_HI_RES || + usage->code == REL_HWHEEL_HI_RES)) { + hidinput_handle_scroll(usage, input, value); + return; + } + if ((usage->type == EV_ABS) && (field->flags & HID_MAIN_ITEM_RELATIVE) && (usage->code == ABS_VOLUME)) { int count = abs(value); @@ -1489,6 +1538,58 @@ static void hidinput_close(struct input_dev *dev) hid_hw_close(hid); } +static void hidinput_change_resolution_multipliers(struct hid_device *hid) +{ + struct hid_report_enum *rep_enum; + struct hid_report *rep; + struct hid_usage *usage; + int i, j; + + rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; + list_for_each_entry(rep, &rep_enum->report_list, list) { + bool update_needed = false; + + if (rep->maxfield == 0) + continue; + + /* + * If we have more than one feature within this report we + * need to fill in the bits from the others before we can + * overwrite the ones for the Resolution Multiplier. + */ + if (rep->maxfield > 1) { + hid_hw_request(hid, rep, HID_REQ_GET_REPORT); + hid_hw_wait(hid); + } + + for (i = 0; i < rep->maxfield; i++) { + __s32 logical_max = rep->field[i]->logical_maximum; + + /* There is no good reason for a Resolution + * Multiplier to have a count other than 1. + * Ignore that case. + */ + if (rep->field[i]->report_count != 1) + continue; + + for (j = 0; j < rep->field[i]->maxusage; j++) { + usage = &rep->field[i]->usage[j]; + + if (usage->hid != HID_GD_RESOLUTION_MULTIPLIER) + continue; + + *rep->field[i]->value = logical_max; + update_needed = true; + } + } + if (update_needed) + hid_hw_request(hid, rep, HID_REQ_SET_REPORT); + } + + /* refresh our structs */ + hid_setup_resolution_multiplier(hid); +} + static void report_features(struct hid_device *hid) { struct hid_driver *drv = hid->driver; @@ -1782,6 +1883,8 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } } + hidinput_change_resolution_multipliers(hid); + list_for_each_entry_safe(hidinput, next, &hid->inputs, list) { if (drv->input_configured && drv->input_configured(hid, hidinput)) @@ -1840,4 +1943,3 @@ void hidinput_disconnect(struct hid_device *hid) cancel_work_sync(&hid->led_work); } EXPORT_SYMBOL_GPL(hidinput_disconnect); - diff --git a/include/linux/hid.h b/include/linux/hid.h index fd8d860365a4..93db548f8761 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -233,6 +233,7 @@ struct hid_item { #define HID_DC_BATTERYSTRENGTH 0x00060020 #define HID_CP_CONSUMER_CONTROL 0x000c0001 +#define HID_CP_AC_PAN 0x000c0238 #define HID_DG_DIGITIZER 0x000d0001 #define HID_DG_PEN 0x000d0002 @@ -441,11 +442,13 @@ struct hid_usage { __s8 resolution_multiplier;/* Effective Resolution Multiplier (HUT v1.12, 4.3.1), default: 1 */ /* hidinput data */ + __s8 wheel_factor; /* 120/resolution_multiplier */ __u16 code; /* input driver code */ __u8 type; /* input driver type */ __s8 hat_min; /* hat switch fun */ __s8 hat_max; /* ditto */ __s8 hat_dir; /* ditto */ + __s16 wheel_accumulated; /* hi-res wheel */ }; struct hid_input; -- cgit v1.2.3 From 43920edf3b24b0a3d136019c816e84ffcbef83ab Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Dec 2018 19:55:07 +0000 Subject: bridge: Add br_fdb_clear_offload() When a driver unoffloads all FDB entries en bloc, it's inefficient to send the switchdev notification one by one. Add a helper that unsets the offload flag on FDB entries on a given bridge port and VLAN. Signed-off-by: Petr Machata Acked-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 6 ++++++ net/bridge/br_fdb.c | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index ef7c3d376b21..627b788ba0ff 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -119,6 +119,7 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, struct net_device *br_fdb_find_port(const struct net_device *br_dev, const unsigned char *addr, __u16 vid); +void br_fdb_clear_offload(const struct net_device *dev, u16 vid); bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); #else static inline struct net_device * @@ -128,6 +129,11 @@ br_fdb_find_port(const struct net_device *br_dev, { return NULL; } + +static inline void br_fdb_clear_offload(const struct net_device *dev, u16 vid) +{ +} + static inline bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag) { diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e56ba3912a90..38b1d0dd0529 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1164,3 +1164,23 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, spin_unlock_bh(&br->hash_lock); } + +void br_fdb_clear_offload(const struct net_device *dev, u16 vid) +{ + struct net_bridge_fdb_entry *f; + struct net_bridge_port *p; + + ASSERT_RTNL(); + + p = br_port_get_rtnl(dev); + if (!p) + return; + + spin_lock_bh(&p->br->hash_lock); + hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) { + if (f->dst == p && f->key.vlan_id == vid) + f->offloaded = 0; + } + spin_unlock_bh(&p->br->hash_lock); +} +EXPORT_SYMBOL_GPL(br_fdb_clear_offload); -- cgit v1.2.3 From 04e7712f4460585e5eed5b853fd8b82a9943958f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Apr 2018 16:31:07 +0200 Subject: y2038: futex: Move compat implementation into futex.c We are going to share the compat_sys_futex() handler between 64-bit architectures and 32-bit architectures that need to deal with both 32-bit and 64-bit time_t, and this is easier if both entry points are in the same file. In fact, most other system call handlers do the same thing these days, so let's follow the trend here and merge all of futex_compat.c into futex.c. In the process, a few minor changes have to be done to make sure everything still makes sense: handle_futex_death() and futex_cmpxchg_enabled() become local symbol, and the compat version of the fetch_robust_entry() function gets renamed to compat_fetch_robust_entry() to avoid a symbol clash. This is intended as a purely cosmetic patch, no behavior should change. Signed-off-by: Arnd Bergmann --- include/linux/futex.h | 8 -- kernel/Makefile | 3 - kernel/futex.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++- kernel/futex_compat.c | 202 -------------------------------------------------- 4 files changed, 192 insertions(+), 216 deletions(-) delete mode 100644 kernel/futex_compat.c (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 821ae502d3d8..ccaef0097785 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -9,9 +9,6 @@ struct inode; struct mm_struct; struct task_struct; -extern int -handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); - /* * Futexes are matched on equal values of this key. * The key type depends on whether it's a shared or private mapping. @@ -55,11 +52,6 @@ extern void exit_robust_list(struct task_struct *curr); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -#ifdef CONFIG_HAVE_FUTEX_CMPXCHG -#define futex_cmpxchg_enabled 1 -#else -extern int futex_cmpxchg_enabled; -#endif #else static inline void exit_robust_list(struct task_struct *curr) { diff --git a/kernel/Makefile b/kernel/Makefile index 7343b3a9bff0..8e40a6742d23 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -49,9 +49,6 @@ obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_FUTEX) += futex.o -ifeq ($(CONFIG_COMPAT),y) -obj-$(CONFIG_FUTEX) += futex_compat.o -endif obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) diff --git a/kernel/futex.c b/kernel/futex.c index f423f9b6577e..5cc7c3b098e9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -44,6 +44,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -173,8 +174,10 @@ * double_lock_hb() and double_unlock_hb(), respectively. */ -#ifndef CONFIG_HAVE_FUTEX_CMPXCHG -int __read_mostly futex_cmpxchg_enabled; +#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +#define futex_cmpxchg_enabled 1 +#else +static int __read_mostly futex_cmpxchg_enabled; #endif /* @@ -3360,7 +3363,7 @@ err_unlock: * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) +static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; @@ -3589,6 +3592,192 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } +#ifdef CONFIG_COMPAT +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t __user *head, unsigned int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + +static void __user *futex_uaddr(struct robust_list __user *entry, + compat_long_t futex_offset) +{ + compat_uptr_t base = ptr_to_compat(entry); + void __user *uaddr = compat_ptr(base + futex_offset); + + return uaddr; +} + +/* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. + * + * We silently return on any sign of list-walking problem. + */ +void compat_exit_robust_list(struct task_struct *curr) +{ + struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct robust_list __user *entry, *next_entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; + unsigned int uninitialized_var(next_pi); + compat_uptr_t uentry, next_uentry, upending; + compat_long_t futex_offset; + int rc; + + if (!futex_cmpxchg_enabled) + return; + + /* + * Fetch the list head (which was registered earlier, via + * sys_set_robust_list()): + */ + if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) + return; + /* + * Fetch the relative futex offset: + */ + if (get_user(futex_offset, &head->futex_offset)) + return; + /* + * Fetch any possibly pending lock-add first, and handle it + * if it exists: + */ + if (compat_fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pip)) + return; + + next_entry = NULL; /* avoid warning with gcc */ + while (entry != (struct robust_list __user *) &head->list) { + /* + * Fetch the next entry in the list before calling + * handle_futex_death: + */ + rc = compat_fetch_robust_entry(&next_uentry, &next_entry, + (compat_uptr_t __user *)&entry->next, &next_pi); + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ + if (entry != pending) { + void __user *uaddr = futex_uaddr(entry, futex_offset); + + if (handle_futex_death(uaddr, curr, pi)) + return; + } + if (rc) + return; + uentry = next_uentry; + entry = next_entry; + pi = next_pi; + /* + * Avoid excessively long or circular lists: + */ + if (!--limit) + break; + + cond_resched(); + } + if (pending) { + void __user *uaddr = futex_uaddr(pending, futex_offset); + + handle_futex_death(uaddr, curr, pip); + } +} + +COMPAT_SYSCALL_DEFINE2(set_robust_list, + struct compat_robust_list_head __user *, head, + compat_size_t, len) +{ + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + if (unlikely(len != sizeof(*head))) + return -EINVAL; + + current->compat_robust_list = head; + + return 0; +} + +COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + compat_uptr_t __user *, head_ptr, + compat_size_t __user *, len_ptr) +{ + struct compat_robust_list_head __user *head; + unsigned long ret; + struct task_struct *p; + + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + rcu_read_lock(); + + ret = -ESRCH; + if (!pid) + p = current; + else { + p = find_task_by_vpid(pid); + if (!p) + goto err_unlock; + } + + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) + goto err_unlock; + + head = p->compat_robust_list; + rcu_read_unlock(); + + if (put_user(sizeof(*head), len_ptr)) + return -EFAULT; + return put_user(ptr_to_compat(head), head_ptr); + +err_unlock: + rcu_read_unlock(); + + return ret; +} + +COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct old_timespec32 __user *, utime, u32 __user *, uaddr2, + u32, val3) +{ + struct timespec ts; + ktime_t t, *tp = NULL; + int val2 = 0; + int cmd = op & FUTEX_CMD_MASK; + + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { + if (compat_get_timespec(&ts, utime)) + return -EFAULT; + if (!timespec_valid(&ts)) + return -EINVAL; + + t = timespec_to_ktime(ts); + if (cmd == FUTEX_WAIT) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (int) (unsigned long) utime; + + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); +} +#endif /* CONFIG_COMPAT */ + static void __init futex_detect_cmpxchg(void) { #ifndef CONFIG_HAVE_FUTEX_CMPXCHG diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c deleted file mode 100644 index 410a77a8f6e2..000000000000 --- a/kernel/futex_compat.c +++ /dev/null @@ -1,202 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/kernel/futex_compat.c - * - * Futex compatibililty routines. - * - * Copyright 2006, Red Hat, Inc., Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include - -#include - - -/* - * Fetch a robust-list pointer. Bit 0 signals PI futexes: - */ -static inline int -fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, - compat_uptr_t __user *head, unsigned int *pi) -{ - if (get_user(*uentry, head)) - return -EFAULT; - - *entry = compat_ptr((*uentry) & ~1); - *pi = (unsigned int)(*uentry) & 1; - - return 0; -} - -static void __user *futex_uaddr(struct robust_list __user *entry, - compat_long_t futex_offset) -{ - compat_uptr_t base = ptr_to_compat(entry); - void __user *uaddr = compat_ptr(base + futex_offset); - - return uaddr; -} - -/* - * Walk curr->robust_list (very carefully, it's a userspace list!) - * and mark any locks found there dead, and notify any waiters. - * - * We silently return on any sign of list-walking problem. - */ -void compat_exit_robust_list(struct task_struct *curr) -{ - struct compat_robust_list_head __user *head = curr->compat_robust_list; - struct robust_list __user *entry, *next_entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; - unsigned int uninitialized_var(next_pi); - compat_uptr_t uentry, next_uentry, upending; - compat_long_t futex_offset; - int rc; - - if (!futex_cmpxchg_enabled) - return; - - /* - * Fetch the list head (which was registered earlier, via - * sys_set_robust_list()): - */ - if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) - return; - /* - * Fetch the relative futex offset: - */ - if (get_user(futex_offset, &head->futex_offset)) - return; - /* - * Fetch any possibly pending lock-add first, and handle it - * if it exists: - */ - if (fetch_robust_entry(&upending, &pending, - &head->list_op_pending, &pip)) - return; - - next_entry = NULL; /* avoid warning with gcc */ - while (entry != (struct robust_list __user *) &head->list) { - /* - * Fetch the next entry in the list before calling - * handle_futex_death: - */ - rc = fetch_robust_entry(&next_uentry, &next_entry, - (compat_uptr_t __user *)&entry->next, &next_pi); - /* - * A pending lock might already be on the list, so - * dont process it twice: - */ - if (entry != pending) { - void __user *uaddr = futex_uaddr(entry, futex_offset); - - if (handle_futex_death(uaddr, curr, pi)) - return; - } - if (rc) - return; - uentry = next_uentry; - entry = next_entry; - pi = next_pi; - /* - * Avoid excessively long or circular lists: - */ - if (!--limit) - break; - - cond_resched(); - } - if (pending) { - void __user *uaddr = futex_uaddr(pending, futex_offset); - - handle_futex_death(uaddr, curr, pip); - } -} - -COMPAT_SYSCALL_DEFINE2(set_robust_list, - struct compat_robust_list_head __user *, head, - compat_size_t, len) -{ - if (!futex_cmpxchg_enabled) - return -ENOSYS; - - if (unlikely(len != sizeof(*head))) - return -EINVAL; - - current->compat_robust_list = head; - - return 0; -} - -COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, - compat_uptr_t __user *, head_ptr, - compat_size_t __user *, len_ptr) -{ - struct compat_robust_list_head __user *head; - unsigned long ret; - struct task_struct *p; - - if (!futex_cmpxchg_enabled) - return -ENOSYS; - - rcu_read_lock(); - - ret = -ESRCH; - if (!pid) - p = current; - else { - p = find_task_by_vpid(pid); - if (!p) - goto err_unlock; - } - - ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) - goto err_unlock; - - head = p->compat_robust_list; - rcu_read_unlock(); - - if (put_user(sizeof(*head), len_ptr)) - return -EFAULT; - return put_user(ptr_to_compat(head), head_ptr); - -err_unlock: - rcu_read_unlock(); - - return ret; -} - -COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - struct old_timespec32 __user *, utime, u32 __user *, uaddr2, - u32, val3) -{ - struct timespec ts; - ktime_t t, *tp = NULL; - int val2 = 0; - int cmd = op & FUTEX_CMD_MASK; - - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || - cmd == FUTEX_WAIT_REQUEUE_PI)) { - if (compat_get_timespec(&ts, utime)) - return -EFAULT; - if (!timespec_valid(&ts)) - return -EINVAL; - - t = timespec_to_ktime(ts); - if (cmd == FUTEX_WAIT) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } - if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) - val2 = (int) (unsigned long) utime; - - return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); -} -- cgit v1.2.3 From bec2f7cbb73eadf5e1cc7d54ecb0980ede244257 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Apr 2018 17:23:35 +0200 Subject: y2038: futex: Add support for __kernel_timespec This prepares sys_futex for y2038 safe calling: the native syscall is changed to receive a __kernel_timespec argument, which will be switched to 64-bit time_t in the future. All the internal time handling gets changed to timespec64, and the compat_sys_futex entry point is moved under the CONFIG_COMPAT_32BIT_TIME check to provide compatibility for existing 32-bit architectures. Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- kernel/futex.c | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a27cf407de92..247ad9eca955 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -553,7 +553,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags); /* kernel/futex.c */ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, + struct __kernel_timespec __user *utime, u32 __user *uaddr2, u32 val3); asmlinkage long sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, diff --git a/kernel/futex.c b/kernel/futex.c index 5cc7c3b098e9..b305beaab739 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3558,10 +3558,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - struct timespec __user *, utime, u32 __user *, uaddr2, + struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, u32, val3) { - struct timespec ts; + struct timespec64 ts; ktime_t t, *tp = NULL; u32 val2 = 0; int cmd = op & FUTEX_CMD_MASK; @@ -3571,12 +3571,12 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, cmd == FUTEX_WAIT_REQUEUE_PI)) { if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) return -EFAULT; - if (copy_from_user(&ts, utime, sizeof(ts)) != 0) + if (get_timespec64(&ts, utime)) return -EFAULT; - if (!timespec_valid(&ts)) + if (!timespec64_valid(&ts)) return -EINVAL; - t = timespec_to_ktime(ts); + t = timespec64_to_ktime(ts); if (cmd == FUTEX_WAIT) t = ktime_add_safe(ktime_get(), t); tp = &t; @@ -3747,12 +3747,14 @@ err_unlock: return ret; } +#endif /* CONFIG_COMPAT */ +#ifdef CONFIG_COMPAT_32BIT_TIME COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, struct old_timespec32 __user *, utime, u32 __user *, uaddr2, u32, val3) { - struct timespec ts; + struct timespec64 ts; ktime_t t, *tp = NULL; int val2 = 0; int cmd = op & FUTEX_CMD_MASK; @@ -3760,12 +3762,12 @@ COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || cmd == FUTEX_WAIT_REQUEUE_PI)) { - if (compat_get_timespec(&ts, utime)) + if (get_old_timespec32(&ts, utime)) return -EFAULT; - if (!timespec_valid(&ts)) + if (!timespec64_valid(&ts)) return -EINVAL; - t = timespec_to_ktime(ts); + t = timespec64_to_ktime(ts); if (cmd == FUTEX_WAIT) t = ktime_add_safe(ktime_get(), t); tp = &t; @@ -3776,7 +3778,7 @@ COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } -#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_COMPAT_32BIT_TIME */ static void __init futex_detect_cmpxchg(void) { -- cgit v1.2.3 From 6e0de61107f03c3222550d9b548cd331d31d82d1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Dec 2018 06:50:40 -0700 Subject: blk-mq: remove QUEUE_FLAG_POLL from default MQ flags We only support polling if we have poll queues now, but the flag is being set by default. Remove the default QUEUE_FLAG_POLL setting, we'll set it in blk_mq_init_allocated_queue() if we have poll queues available for this device. Fixes: 6544d229bf43 ("block: enable polling by default if a poll map is initalized") Reported-by: Kirill Tkhai Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0b3874bdbc6a..81f1b105946b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -606,8 +606,7 @@ struct request_queue { (1 << QUEUE_FLAG_ADD_RANDOM)) #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_POLL)) + (1 << QUEUE_FLAG_SAME_COMP)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -- cgit v1.2.3 From 0fe061b9f03c27d0370888efc22d4b3ac7af90cf Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:26 -0500 Subject: blkcg: fix ref count issue with bio_blkcg() using task_css The bio_blkcg() function turns out to be inconsistent and consequently dangerous to use. The first part returns a blkcg where a reference is owned by the bio meaning it does not need to be rcu protected. However, the third case, the last line, is problematic: return css_to_blkcg(task_css(current, io_cgrp_id)); This can race against task migration and the cgroup dying. It is also semantically different as it must be called rcu protected and is susceptible to failure when trying to get a reference to it. This patch adds association ahead of calling bio_blkcg() rather than after. This makes association a required and explicit step along the code paths for calling bio_blkcg(). In blk-iolatency, association is moved above the bio_blkcg() call to ensure it will not return %NULL. BFQ uses the old bio_blkcg() function, but I do not want to address it in this series due to the complexity. I have created a private version documenting the inconsistency and noting not to use it. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 +- block/bfq-iosched.c | 2 +- block/bio.c | 10 ++++- block/blk-iolatency.c | 2 +- include/linux/blk-cgroup.h | 98 ++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 102 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index a7a1712632b0..c6113af31960 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = bio_blkcg(bio)->css.serial_nr; + serial_nr = __bio_blkcg(bio)->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger @@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) goto out; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); + bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 67b22c924aee..3d1f319fe977 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, rcu_read_lock(); - bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); + bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); if (!bfqg) { bfqq = &bfqd->oom_bfqq; goto out; diff --git a/block/bio.c b/block/bio.c index 03895cc0d74a..346a7f5cb2dd 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1990,13 +1990,19 @@ int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) * * This function takes an extra reference of @blkcg_css which will be put * when @bio is released. The caller must own @bio and is responsible for - * synchronizing calls to this function. + * synchronizing calls to this function. If @blkcg_css is %NULL, a call to + * blkcg_get_css() finds the current css from the kthread or task. */ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { if (unlikely(bio->bi_css)) return -EBUSY; - css_get(blkcg_css); + + if (blkcg_css) + css_get(blkcg_css); + else + blkcg_css = blkcg_get_css(); + bio->bi_css = blkcg_css; return 0; } diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 5f7f1773be61..fe0c4ca312ff 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -481,8 +481,8 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) return; rcu_read_lock(); + bio_associate_blkcg(bio, NULL); blkcg = bio_blkcg(bio); - bio_associate_blkcg(bio, &blkcg->css); blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(&q->queue_lock); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index a9e2e2037129..f619307171a6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -227,22 +227,103 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} + +/** + * blkcg_get_css - find and get a reference to the css + * + * Find the css associated with either the kthread or the current task. + * This takes a reference on the blkcg which will need to be managed by the + * caller. + */ +static inline struct cgroup_subsys_state *blkcg_get_css(void) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + + css = kthread_blkcg(); + if (css) { + css_get(css); + } else { + /* + * This is a bit complicated. It is possible task_css() is + * seeing an old css pointer here. This is caused by the + * current thread migrating away from this cgroup and this + * cgroup dying. css_tryget() will fail when trying to take a + * ref on a cgroup that's ref count has hit 0. + * + * Therefore, if it does fail, this means current must have + * been swapped away already and this is waiting for it to + * propagate on the polling cpu. Hence the use of cpu_relax(). + */ + while (true) { + css = task_css(current, io_cgrp_id); + if (likely(css_tryget(css))) + break; + cpu_relax(); + } + } + + rcu_read_unlock(); + + return css; +} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *bio_blkcg(struct bio *bio) +/** + * __bio_blkcg - internal, inconsistent version to get blkcg + * + * DO NOT USE. + * This function is inconsistent and consequently is dangerous to use. The + * first part of the function returns a blkcg where a reference is owned by the + * bio. This means it does not need to be rcu protected as it cannot go away + * with the bio owning a reference to it. However, the latter potentially gets + * it from task_css(). This can race against task migration and the cgroup + * dying. It is also semantically different as it must be called rcu protected + * and is susceptible to failure when trying to get a reference to it. + * Therefore, it is not ok to assume that *_get() will always succeed on the + * blkcg returned here. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) { - struct cgroup_subsys_state *css; + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + return css_to_blkcg(blkcg_css()); +} +/** + * bio_blkcg - grab the blkcg associated with a bio + * @bio: target bio + * + * This returns the blkcg associated with a bio, %NULL if not associated. + * Callers are expected to either handle %NULL or know association has been + * done prior to calling this. + */ +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ if (bio && bio->bi_css) return css_to_blkcg(bio->bi_css); - css = kthread_blkcg(); - if (css) - return css_to_blkcg(css); - return css_to_blkcg(task_css(current, io_cgrp_id)); + return NULL; } static inline bool blk_cgroup_congested(void) @@ -710,10 +791,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, bool throtl = false; rcu_read_lock(); - blkcg = bio_blkcg(bio); /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, &blkcg->css); + bio_associate_blkcg(bio, NULL); + blkcg = bio_blkcg(bio); blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { @@ -835,6 +916,7 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, -- cgit v1.2.3 From b978962ad4f7f9c06e5aa07b2a9b22f6d600456c Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:27 -0500 Subject: blkcg: update blkg_lookup_create() to do locking To know when to create a blkg, the general pattern is to do a blkg_lookup() and if that fails, lock and do the lookup again, and if that fails finally create. It doesn't make much sense for everyone who wants to do creation to write this themselves. This changes blkg_lookup_create() to do locking and implement this pattern. The old blkg_lookup_create() is renamed to __blkg_lookup_create(). If a call site wants to do its own error handling or already owns the queue lock, they can use __blkg_lookup_create(). This will be used in upcoming patches. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Reviewed-by: Liu Bo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 28 +++++++++++++++++++++++++--- block/blk-iolatency.c | 2 +- include/linux/blk-cgroup.h | 4 +++- 3 files changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 63d226a084cd..b421a9457e05 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -249,7 +249,7 @@ err_free_blkg: } /** - * blkg_lookup_create - lookup blkg, try to create one if not there + * __blkg_lookup_create - lookup blkg, try to create one if not there * @blkcg: blkcg of interest * @q: request_queue of interest * @@ -262,8 +262,8 @@ err_free_blkg: * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not * dead and bypassing, returns ERR_PTR(-EBUSY). */ -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) { struct blkcg_gq *blkg; @@ -293,6 +293,28 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, } } +/** + * blkg_lookup_create - find or create a blkg + * @blkcg: target block cgroup + * @q: target request_queue + * + * This looks up or creates the blkg representing the unique pair + * of the blkcg and the request_queue. + */ +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) +{ + struct blkcg_gq *blkg = blkg_lookup(blkcg, q); + + if (unlikely(!blkg)) { + spin_lock_irq(&q->queue_lock); + blkg = __blkg_lookup_create(blkcg, q); + spin_unlock_irq(&q->queue_lock); + } + + return blkg; +} + static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index fe0c4ca312ff..e6f68f15dee9 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -486,7 +486,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(&q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); + blkg = __blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; spin_unlock_irq(&q->queue_lock); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f619307171a6..b3b1a8187d23 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -181,6 +181,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -799,7 +801,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(&q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); + blkg = __blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; spin_unlock_irq(&q->queue_lock); -- cgit v1.2.3 From beea9da07d8a6228a7e4a31a83f9478d513bf03f Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:28 -0500 Subject: blkcg: convert blkg_lookup_create() to find closest blkg There are several scenarios where blkg_lookup_create() can fail such as the blkcg dying, request_queue is dying, or simply being OOM. Most handle this by simply falling back to the q->root_blkg and calling it a day. This patch implements the notion of closest blkg. During blkg_lookup_create(), if it fails to create, return the closest blkg found or the q->root_blkg. blkg_try_get_closest() is introduced and used during association so a bio is always attached to a blkg. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- block/bio.c | 17 ++++++++++------- block/blk-cgroup.c | 23 ++++++++++++++++------- block/blk-iolatency.c | 14 ++------------ block/blk-throttle.c | 4 +--- include/linux/blk-cgroup.h | 24 +++++++++++++++--------- 5 files changed, 44 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 346a7f5cb2dd..5c9828524adc 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2009,21 +2009,24 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_associate_blkg - associate a bio with the specified blkg + * bio_associate_blkg - associate a bio with the a blkg * @bio: target bio * @blkg: the blkg to associate * - * Associate @bio with the blkg specified by @blkg. This is the queue specific - * blkcg information associated with the @bio, a reference will be taken on the - * @blkg and will be freed when the bio is freed. + * This tries to associate @bio with the specified @blkg. Association failure + * is handled by walking up the blkg tree. Therefore, the blkg associated can + * be anything between @blkg and the root_blkg. This situation only happens + * when a cgroup is dying and then the remaining bios will spill to the closest + * alive blkg. + * + * A reference will be taken on the @blkg and will be released when @bio is + * freed. */ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { if (unlikely(bio->bi_blkg)) return -EBUSY; - if (!blkg_try_get(blkg)) - return -ENODEV; - bio->bi_blkg = blkg; + bio->bi_blkg = blkg_try_get_closest(blkg); return 0; } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b421a9457e05..120f2e2835fb 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -258,9 +258,8 @@ err_free_blkg: * that all non-root blkg's have access to the parent blkg. This function * should be called under RCU read lock and @q->queue_lock. * - * Returns pointer to the looked up or created blkg on success, ERR_PTR() - * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not - * dead and bypassing, returns ERR_PTR(-EBUSY). + * Returns the blkg or the closest blkg if blkg_create() fails as it walks + * down from root. */ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q) @@ -276,19 +275,29 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, /* * Create blkgs walking down from blkcg_root to @blkcg, so that all - * non-root blkgs have access to their parents. + * non-root blkgs have access to their parents. Returns the closest + * blkg to the intended blkg should blkg_create() fail. */ while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - - while (parent && !__blkg_lookup(parent, q, false)) { + struct blkcg_gq *ret_blkg = q->root_blkg; + + while (parent) { + blkg = __blkg_lookup(parent, q, false); + if (blkg) { + /* remember closest blkg */ + ret_blkg = blkg; + break; + } pos = parent; parent = blkcg_parent(parent); } blkg = blkg_create(pos, q, NULL); - if (pos == blkcg || IS_ERR(blkg)) + if (IS_ERR(blkg)) + return ret_blkg; + if (pos == blkcg) return blkg; } } diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index e6f68f15dee9..46e86c34cf79 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -483,21 +483,11 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) rcu_read_lock(); bio_associate_blkcg(bio, NULL); blkcg = bio_blkcg(bio); - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - spin_lock_irq(&q->queue_lock); - blkg = __blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - spin_unlock_irq(&q->queue_lock); - } - if (!blkg) - goto out; - + blkg = blkg_lookup_create(blkcg, q); bio_issue_init(&bio->bi_issue, bio_sectors(bio)); bio_associate_blkg(bio, blkg); -out: rcu_read_unlock(); + while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 8f0a104770ee..d648d6720f46 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2118,9 +2118,7 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) { #ifdef CONFIG_BLK_DEV_THROTTLING_LOW - /* fallback to root_blkg if we fail to get a blkg ref */ - if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) - bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); + bio_associate_blkg(bio, tg_to_blkg(tg)); bio_issue_init(&bio->bi_issue, bio_sectors(bio)); #endif } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b3b1a8187d23..c08e96e521ed 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -545,6 +545,20 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) return NULL; } +/** + * blkg_try_get_closest - try and get a blkg ref on the closet blkg + * @blkg: blkg to get + * + * This walks up the blkg tree to find the closest non-dying blkg and returns + * the blkg that it did association with as it may not be the passed in blkg. + */ +static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) +{ + while (!atomic_inc_not_zero(&blkg->refcnt)) + blkg = blkg->parent; + + return blkg; +} void __blkg_release_rcu(struct rcu_head *rcu); @@ -797,15 +811,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, /* associate blkcg if bio hasn't attached one */ bio_associate_blkcg(bio, NULL); blkcg = bio_blkcg(bio); - - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - spin_lock_irq(&q->queue_lock); - blkg = __blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - spin_unlock_irq(&q->queue_lock); - } + blkg = blkg_lookup_create(blkcg, q); throtl = blk_throtl_bio(q, blkg, bio); -- cgit v1.2.3 From 2268c0feb0ffb1c1bb6e1d4d5505d30f485aa77b Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:29 -0500 Subject: blkcg: introduce common blkg association logic There are 3 ways blkg association can happen: association with the current css, with the page css (swap), or from the wbc css (writeback). This patch handles how association is done for the first case where we are associating bsaed on the current css. If there is already a blkg associated, the css will be reused and association will be redone as the request_queue may have changed. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 62 +++++++++++++++++++++++++++++++++++++++++++-------- block/blk-iolatency.c | 10 ++------- block/blk-throttle.c | 6 ++--- include/linux/bio.h | 5 ++++- 4 files changed, 62 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 5c9828524adc..452b8e79b998 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2009,7 +2009,21 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_associate_blkg - associate a bio with the a blkg + * bio_disassociate_blkg - puts back the blkg reference if associated + * @bio: target bio + * + * Helper to disassociate the blkg from @bio if a blkg is associated. + */ +void bio_disassociate_blkg(struct bio *bio) +{ + if (bio->bi_blkg) { + blkg_put(bio->bi_blkg); + bio->bi_blkg = NULL; + } +} + +/** + * __bio_associate_blkg - associate a bio with the a blkg * @bio: target bio * @blkg: the blkg to associate * @@ -2022,12 +2036,42 @@ EXPORT_SYMBOL_GPL(bio_associate_blkcg); * A reference will be taken on the @blkg and will be released when @bio is * freed. */ -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) +static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { - if (unlikely(bio->bi_blkg)) - return -EBUSY; + bio_disassociate_blkg(bio); + bio->bi_blkg = blkg_try_get_closest(blkg); - return 0; +} + +/** + * bio_associate_blkg - associate a bio with a blkg + * @bio: target bio + * + * Associate @bio with the blkg found from the bio's css and request_queue. + * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is + * already associated, the css is reused and association redone as the + * request_queue may have changed. + */ +void bio_associate_blkg(struct bio *bio) +{ + struct request_queue *q = bio->bi_disk->queue; + struct blkcg *blkcg; + struct blkcg_gq *blkg; + + rcu_read_lock(); + + bio_associate_blkcg(bio, NULL); + blkcg = bio_blkcg(bio); + + if (!blkcg->css.parent) { + __bio_associate_blkg(bio, q->root_blkg); + } else { + blkg = blkg_lookup_create(blkcg, q); + + __bio_associate_blkg(bio, blkg); + } + + rcu_read_unlock(); } /** @@ -2040,10 +2084,7 @@ void bio_disassociate_task(struct bio *bio) css_put(bio->bi_css); bio->bi_css = NULL; } - if (bio->bi_blkg) { - blkg_put(bio->bi_blkg); - bio->bi_blkg = NULL; - } + bio_disassociate_blkg(bio); } /** @@ -2055,6 +2096,9 @@ void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { if (src->bi_css) WARN_ON(bio_associate_blkcg(dst, src->bi_css)); + + if (src->bi_blkg) + __bio_associate_blkg(dst, src->bi_blkg); } EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); #endif /* CONFIG_BLK_CGROUP */ diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 46e86c34cf79..cdbd10564e66 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -472,21 +472,15 @@ static void check_scale_change(struct iolatency_grp *iolat) static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg *blkcg; struct blkcg_gq *blkg; - struct request_queue *q = rqos->q; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; - rcu_read_lock(); - bio_associate_blkcg(bio, NULL); - blkcg = bio_blkcg(bio); - blkg = blkg_lookup_create(blkcg, q); + bio_associate_blkg(bio); + blkg = bio->bi_blkg; bio_issue_init(&bio->bi_issue, bio_sectors(bio)); - bio_associate_blkg(bio, blkg); - rcu_read_unlock(); while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index d648d6720f46..228c3a007ebc 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2115,10 +2115,10 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif -static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) +static void blk_throtl_assoc_bio(struct bio *bio) { #ifdef CONFIG_BLK_DEV_THROTTLING_LOW - bio_associate_blkg(bio, tg_to_blkg(tg)); + bio_associate_blkg(bio); bio_issue_init(&bio->bi_issue, bio_sectors(bio)); #endif } @@ -2143,7 +2143,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, throtl_update_latency_buckets(td); - blk_throtl_assoc_bio(tg, bio); + blk_throtl_assoc_bio(bio); blk_throtl_update_idletime(tg); sq = &tg->service_queue; diff --git a/include/linux/bio.h b/include/linux/bio.h index 056fb627edb3..62715a5a4f32 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -511,12 +511,15 @@ static inline int bio_associate_blkcg_from_page(struct bio *bio, #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); +void bio_disassociate_blkg(struct bio *bio); +void bio_associate_blkg(struct bio *bio); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { return 0; } +static inline void bio_disassociate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { } -- cgit v1.2.3 From 5cdf2e3fea5ee37b66842d76a9b06e6dac0b933d Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:31 -0500 Subject: blkcg: associate blkg when associating a device Previously, blkg association was handled by controller specific code in blk-throttle and blk-iolatency. However, because a blkg represents a relationship between a blkcg and a request_queue, it makes sense to keep the blkg->q and bio->bi_disk->queue consistent. This patch moves association into the bio_set_dev macro(). This should cover the majority of cases where the device is set/changed keeping the two pointers consistent. Fallback code is added to blkcg_bio_issue_check() to catch any missing paths. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- block/bio.c | 1 + block/blk-iolatency.c | 4 +--- block/blk-throttle.c | 1 - include/linux/bio.h | 2 ++ include/linux/blk-cgroup.h | 18 ++++++++++-------- 5 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 41ebb3f8e2fc..1e852ab904aa 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2074,6 +2074,7 @@ void bio_associate_blkg(struct bio *bio) rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(bio_associate_blkg); /** * bio_disassociate_task - undo bio_associate_current() diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index cdbd10564e66..e6b47c255521 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -472,14 +472,12 @@ static void check_scale_change(struct iolatency_grp *iolat) static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg_gq *blkg; + struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; - bio_associate_blkg(bio); - blkg = bio->bi_blkg; bio_issue_init(&bio->bi_issue, bio_sectors(bio)); while (blkg && blkg->parent) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 228c3a007ebc..1c6529df2002 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2118,7 +2118,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) static void blk_throtl_assoc_bio(struct bio *bio) { #ifdef CONFIG_BLK_DEV_THROTTLING_LOW - bio_associate_blkg(bio); bio_issue_init(&bio->bi_issue, bio_sectors(bio)); #endif } diff --git a/include/linux/bio.h b/include/linux/bio.h index 62715a5a4f32..6ee2ea8b378a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -491,12 +491,14 @@ do { \ bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ + bio_associate_blkg(bio); \ } while (0) #define bio_copy_dev(dst, src) \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ + bio_clone_blkcg_association(dst, src); \ } while (0) #define bio_dev(bio) \ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c08e96e521ed..f09752968c2a 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -21,6 +21,7 @@ #include #include #include +#include /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -802,21 +803,23 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { - struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; - rcu_read_lock(); + if (!bio->bi_blkg) { + char b[BDEVNAME_SIZE]; + + WARN_ONCE(1, + "no blkg associated for bio on block-device: %s\n", + bio_devname(bio, b)); + bio_associate_blkg(bio); + } - /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, NULL); - blkcg = bio_blkcg(bio); - blkg = blkg_lookup_create(blkcg, q); + blkg = bio->bi_blkg; throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { - blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the @@ -828,7 +831,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } - rcu_read_unlock(); return !throtl; } -- cgit v1.2.3 From e439bedf6b24264f620cc05627e23a90054bde41 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:32 -0500 Subject: blkcg: consolidate bio_issue_init() to be a part of core bio_issue_init among other things initializes the timestamp for an IO. Rather than have this logic handled by policies, this consolidates it to be on the init paths (normal, clone, bounce clone). Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- block/bio.c | 1 + block/blk-iolatency.c | 2 -- block/blk-throttle.c | 8 -------- block/bounce.c | 1 + include/linux/blk-cgroup.h | 9 +++++++++ 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 1e852ab904aa..90089124b512 100644 --- a/block/bio.c +++ b/block/bio.c @@ -611,6 +611,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_io_vec = bio_src->bi_io_vec; bio_clone_blkcg_association(bio, bio_src); + blkcg_bio_issue_init(bio); } EXPORT_SYMBOL(__bio_clone_fast); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index e6b47c255521..5a79f06a730d 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -478,8 +478,6 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) if (!blk_iolatency_enabled(blkiolat)) return; - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); - while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 1c6529df2002..1b97a73d2fb1 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2115,13 +2115,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif -static void blk_throtl_assoc_bio(struct bio *bio) -{ -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -#endif -} - bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { @@ -2142,7 +2135,6 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, throtl_update_latency_buckets(td); - blk_throtl_assoc_bio(bio); blk_throtl_update_idletime(tg); sq = &tg->service_queue; diff --git a/block/bounce.c b/block/bounce.c index 559c55bda040..cfb96d5170d0 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -278,6 +278,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, } bio_clone_blkcg_association(bio, bio_src); + blkcg_bio_issue_init(bio); return bio; } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f09752968c2a..8b069c3775ee 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -800,6 +800,12 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif + +static inline void blkcg_bio_issue_init(struct bio *bio) +{ + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +} + static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { @@ -831,6 +837,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } + blkcg_bio_issue_init(bio); + return !throtl; } @@ -936,6 +944,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } +static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } -- cgit v1.2.3 From 6a7f6d86a561473032287c8e4583eac5853c6efa Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:33 -0500 Subject: blkcg: associate a blkg for pages being evicted by swap A prior patch in this series added blkg association to bios issued by cgroups. There are two other paths that we want to attribute work back to the appropriate cgroup: swap and writeback. Here we modify the way swap tags bios to include the blkg. Writeback will be tackle in the next patch. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 62 ++++++++++++++++++++++++++++++++--------------------- include/linux/bio.h | 6 +++--- mm/page_io.c | 2 +- 3 files changed, 42 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 90089124b512..f0f069c1823c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1957,30 +1957,6 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP -#ifdef CONFIG_MEMCG -/** - * bio_associate_blkcg_from_page - associate a bio with the page's blkcg - * @bio: target bio - * @page: the page to lookup the blkcg from - * - * Associate @bio with the blkcg from @page's owning memcg. This works like - * every other associate function wrt references. - */ -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) -{ - struct cgroup_subsys_state *blkcg_css; - - if (unlikely(bio->bi_css)) - return -EBUSY; - if (!page->mem_cgroup) - return 0; - blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, - &io_cgrp_subsys); - bio->bi_css = blkcg_css; - return 0; -} -#endif /* CONFIG_MEMCG */ - /** * bio_associate_blkcg - associate a bio with the specified blkcg * @bio: target bio @@ -2045,6 +2021,44 @@ static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) bio->bi_blkg = blkg_try_get_closest(blkg); } +static void __bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + struct blkcg_gq *blkg; + + rcu_read_lock(); + + blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue); + __bio_associate_blkg(bio, blkg); + + rcu_read_unlock(); +} + +#ifdef CONFIG_MEMCG +/** + * bio_associate_blkg_from_page - associate a bio with the page's blkg + * @bio: target bio + * @page: the page to lookup the blkcg from + * + * Associate @bio with the blkg from @page's owning memcg and the respective + * request_queue. This works like every other associate function wrt + * references. + */ +void bio_associate_blkg_from_page(struct bio *bio, struct page *page) +{ + struct cgroup_subsys_state *css; + + if (unlikely(bio->bi_css)) + return; + if (!page->mem_cgroup) + return; + + css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + bio->bi_css = css; + __bio_associate_blkg_from_css(bio, css); +} +#endif /* CONFIG_MEMCG */ + /** * bio_associate_blkg - associate a bio with a blkg * @bio: target bio diff --git a/include/linux/bio.h b/include/linux/bio.h index 6ee2ea8b378a..f13572c254a7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -505,10 +505,10 @@ do { \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); +void bio_associate_blkg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkcg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline void bio_associate_blkg_from_page(struct bio *bio, + struct page *page) { } #endif #ifdef CONFIG_BLK_CGROUP diff --git a/mm/page_io.c b/mm/page_io.c index 5bdfd21c1bd9..3475733b1926 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, goto out; } bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); - bio_associate_blkcg_from_page(bio, page); + bio_associate_blkg_from_page(bio, page); count_swpout_vm_event(page); set_page_writeback(page); unlock_page(page); -- cgit v1.2.3 From fd42df305f804ddc0d5ac028e944784283b2f92d Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:34 -0500 Subject: blkcg: associate writeback bios with a blkg One of the goals of this series is to remove a separate reference to the css of the bio. This can and should be accessed via bio_blkcg(). In this patch, wbc_init_bio() now requires a bio to have a device associated with it. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- Documentation/admin-guide/cgroup-v2.rst | 8 +++++--- block/bio.c | 18 ++++++++++++++++++ fs/buffer.c | 10 +++++----- fs/ext4/page-io.c | 2 +- include/linux/bio.h | 5 +++++ include/linux/writeback.h | 5 +++-- 6 files changed, 37 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 476722b7b636..baf19bf28385 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1879,8 +1879,10 @@ following two functions. wbc_init_bio(@wbc, @bio) Should be called for each bio carrying writeback data and - associates the bio with the inode's owner cgroup. Can be - called anytime between bio allocation and submission. + associates the bio with the inode's owner cgroup and the + corresponding request queue. This must be called after + a queue (device) has been associated with the bio and + before submission. wbc_account_io(@wbc, @page, @bytes) Should be called for each data segment being written out. @@ -1899,7 +1901,7 @@ the configuration, the bio may be executed at a lower priority and if the writeback session is holding shared resources, e.g. a journal entry, may lead to priority inversion. There is no one easy solution for the problem. Filesystems can try to work around specific problem -cases by skipping wbc_init_bio() or using bio_associate_blkcg() +cases by skipping wbc_init_bio() and using bio_associate_blkg() directly. diff --git a/block/bio.c b/block/bio.c index f0f069c1823c..b42477b6a225 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2034,6 +2034,24 @@ static void __bio_associate_blkg_from_css(struct bio *bio, rcu_read_unlock(); } +/** + * bio_associate_blkg_from_css - associate a bio with a specified css + * @bio: target bio + * @css: target css + * + * Associate @bio with the blkg found by combining the css's blkg and the + * request_queue of the @bio. This takes a reference on the css that will + * be put upon freeing of @bio. + */ +void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + css_get(css); + bio->bi_css = css; + __bio_associate_blkg_from_css(bio, css); +} +EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); + #ifdef CONFIG_MEMCG /** * bio_associate_blkg_from_page - associate a bio with the page's blkg diff --git a/fs/buffer.c b/fs/buffer.c index 1286c2b95498..d60d61e8ed7d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3060,11 +3060,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, */ bio = bio_alloc(GFP_NOIO, 1); - if (wbc) { - wbc_init_bio(wbc, bio); - wbc_account_io(wbc, bh->b_page, bh->b_size); - } - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_write_hint = write_hint; @@ -3084,6 +3079,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, op_flags |= REQ_PRIO; bio_set_op_attrs(bio, op, op_flags); + if (wbc) { + wbc_init_bio(wbc, bio); + wbc_account_io(wbc, bh->b_page, bh->b_size); + } + submit_bio(bio); return 0; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index db7590178dfc..2aa62d58d8dd 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io, bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; - wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; io->io_next_block = bh->b_blocknr; + wbc_init_bio(io->io_wbc, bio); return 0; } diff --git a/include/linux/bio.h b/include/linux/bio.h index f13572c254a7..f0438061a5a3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -515,6 +515,8 @@ static inline void bio_associate_blkg_from_page(struct bio *bio, int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); void bio_disassociate_blkg(struct bio *bio); void bio_associate_blkg(struct bio *bio); +void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ @@ -522,6 +524,9 @@ static inline int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { return 0; } static inline void bio_disassociate_blkg(struct bio *bio) { } static inline void bio_associate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ } static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fdfd04e348f6..738a0c24874f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. + * writeback context. Must be called after the bio has been associated with + * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkcg(bio, wbc->wb->blkcg_css); + bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ -- cgit v1.2.3 From db6638d7d177a8bc74c9e539e2e0d7d061c767b1 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:35 -0500 Subject: blkcg: remove bio->bi_css and instead use bio->bi_blkg Prior patches ensured that any bio that interacts with a request_queue is properly associated with a blkg. This makes bio->bi_css unnecessary as blkg maintains a reference to blkcg already. This removes the bio field bi_css and transfers corresponding uses to access via bi_blkg. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 59 ++++++++++------------------------------------ block/bounce.c | 2 +- drivers/block/loop.c | 5 ++-- drivers/md/raid0.c | 2 +- include/linux/bio.h | 11 ++++----- include/linux/blk-cgroup.h | 8 +++---- include/linux/blk_types.h | 7 +++--- kernel/trace/blktrace.c | 4 ++-- 8 files changed, 32 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index b42477b6a225..2b6bc7b805ec 100644 --- a/block/bio.c +++ b/block/bio.c @@ -610,7 +610,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); blkcg_bio_issue_init(bio); } EXPORT_SYMBOL(__bio_clone_fast); @@ -1957,34 +1957,6 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP -/** - * bio_associate_blkcg - associate a bio with the specified blkcg - * @bio: target bio - * @blkcg_css: css of the blkcg to associate - * - * Associate @bio with the blkcg specified by @blkcg_css. Block layer will - * treat @bio as if it were issued by a task which belongs to the blkcg. - * - * This function takes an extra reference of @blkcg_css which will be put - * when @bio is released. The caller must own @bio and is responsible for - * synchronizing calls to this function. If @blkcg_css is %NULL, a call to - * blkcg_get_css() finds the current css from the kthread or task. - */ -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) -{ - if (unlikely(bio->bi_css)) - return -EBUSY; - - if (blkcg_css) - css_get(blkcg_css); - else - blkcg_css = blkcg_get_css(); - - bio->bi_css = blkcg_css; - return 0; -} -EXPORT_SYMBOL_GPL(bio_associate_blkcg); - /** * bio_disassociate_blkg - puts back the blkg reference if associated * @bio: target bio @@ -1994,6 +1966,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkcg); void bio_disassociate_blkg(struct bio *bio) { if (bio->bi_blkg) { + /* a ref is always taken on css */ + css_put(&bio_blkcg(bio)->css); blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; } @@ -2047,7 +2021,6 @@ void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { css_get(css); - bio->bi_css = css; __bio_associate_blkg_from_css(bio, css); } EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); @@ -2066,13 +2039,10 @@ void bio_associate_blkg_from_page(struct bio *bio, struct page *page) { struct cgroup_subsys_state *css; - if (unlikely(bio->bi_css)) - return; if (!page->mem_cgroup) return; css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); - bio->bi_css = css; __bio_associate_blkg_from_css(bio, css); } #endif /* CONFIG_MEMCG */ @@ -2094,8 +2064,10 @@ void bio_associate_blkg(struct bio *bio) rcu_read_lock(); - bio_associate_blkcg(bio, NULL); - blkcg = bio_blkcg(bio); + if (bio->bi_blkg) + blkcg = bio->bi_blkg->blkcg; + else + blkcg = css_to_blkcg(blkcg_get_css()); if (!blkcg->css.parent) { __bio_associate_blkg(bio, q->root_blkg); @@ -2115,27 +2087,22 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg); */ void bio_disassociate_task(struct bio *bio) { - if (bio->bi_css) { - css_put(bio->bi_css); - bio->bi_css = NULL; - } bio_disassociate_blkg(bio); } /** - * bio_clone_blkcg_association - clone blkcg association from src to dst bio + * bio_clone_blkg_association - clone blkg association from src to dst bio * @dst: destination bio * @src: source bio */ -void bio_clone_blkcg_association(struct bio *dst, struct bio *src) +void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_css) - WARN_ON(bio_associate_blkcg(dst, src->bi_css)); - - if (src->bi_blkg) + if (src->bi_blkg) { + css_get(&bio_blkcg(src)->css); __bio_associate_blkg(dst, src->bi_blkg); + } } -EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); +EXPORT_SYMBOL_GPL(bio_clone_blkg_association); #endif /* CONFIG_BLK_CGROUP */ static void __init biovec_init_slabs(void) diff --git a/block/bounce.c b/block/bounce.c index cfb96d5170d0..ffb9e9ecfa7e 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -277,7 +277,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, } } - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); blkcg_bio_issue_init(bio); return bio; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 176ab1f28eca..0770004616de 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -77,6 +77,7 @@ #include #include #include +#include #include "loop.h" @@ -1820,8 +1821,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_css) { - cmd->css = rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { + cmd->css = &bio_blkcg(rq->bio)->css; css_get(cmd->css); } else #endif diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ac1cffd2a09b..f3fb5bb8c82a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); - bio_clone_blkcg_association(discard_bio, bio); + bio_clone_blkg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), diff --git a/include/linux/bio.h b/include/linux/bio.h index f0438061a5a3..84e1c4dc703a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -498,7 +498,7 @@ do { \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ - bio_clone_blkcg_association(dst, src); \ + bio_clone_blkg_association(dst, src); \ } while (0) #define bio_dev(bio) \ @@ -512,24 +512,21 @@ static inline void bio_associate_blkg_from_page(struct bio *bio, #endif #ifdef CONFIG_BLK_CGROUP -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); void bio_disassociate_blkg(struct bio *bio); void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_disassociate_task(struct bio *bio); -void bio_clone_blkcg_association(struct bio *dst, struct bio *src); +void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkcg(struct bio *bio, - struct cgroup_subsys_state *blkcg_css) { return 0; } static inline void bio_disassociate_blkg(struct bio *bio) { } static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkcg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 8b069c3775ee..f11c37f8ce09 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -309,8 +309,8 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) */ static inline struct blkcg *__bio_blkcg(struct bio *bio) { - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; return css_to_blkcg(blkcg_css()); } @@ -324,8 +324,8 @@ static inline struct blkcg *__bio_blkcg(struct bio *bio) */ static inline struct blkcg *bio_blkcg(struct bio *bio) { - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; return NULL; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c0ba1a038ff3..46c005d601ac 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -174,10 +174,11 @@ struct bio { void *bi_private; #ifdef CONFIG_BLK_CGROUP /* - * Optional css associated with this bio. Put on bio - * release. Read comment on top of bio_associate_current(). + * Represents the association of the css and request_queue for the bio. + * If a bio goes direct to device, it will not have a blkg as it will + * not have a request_queue associated with it. The reference is put + * on release of the bio. */ - struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2868d85f1fb1..fac0ddf8a8e2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return NULL; - if (!bio->bi_css) + if (!bio->bi_blkg) return NULL; - return cgroup_get_kernfs_id(bio->bi_css->cgroup); + return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); } #else static union kernfs_node_id * -- cgit v1.2.3 From fc5a828bfad628c1092194f2814604943561c52d Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:36 -0500 Subject: blkcg: remove additional reference to the css The previous patch in this series removed carrying around a pointer to the css in blkg. However, the blkg association logic still relied on taking a reference on the css to ensure we wouldn't fail in getting a reference for the blkg. Here the implicit dependency on the css is removed. The association continues to rely on the tryget logic walking up the blkg tree. This streamlines the three ways that association can happen: normal, swap, and writeback. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- block/bio.c | 66 ++++++++++++++++++++-------------------------- include/linux/blk-cgroup.h | 41 ---------------------------- include/linux/cgroup.h | 2 ++ kernel/cgroup/cgroup.c | 48 ++++++++++++++++++++++++++------- 4 files changed, 69 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 2b6bc7b805ec..ce1e512dca5a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1966,8 +1966,6 @@ EXPORT_SYMBOL(bioset_init_from_src); void bio_disassociate_blkg(struct bio *bio) { if (bio->bi_blkg) { - /* a ref is always taken on css */ - css_put(&bio_blkcg(bio)->css); blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; } @@ -1995,33 +1993,31 @@ static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) bio->bi_blkg = blkg_try_get_closest(blkg); } -static void __bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css) -{ - struct blkcg_gq *blkg; - - rcu_read_lock(); - - blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue); - __bio_associate_blkg(bio, blkg); - - rcu_read_unlock(); -} - /** * bio_associate_blkg_from_css - associate a bio with a specified css * @bio: target bio * @css: target css * * Associate @bio with the blkg found by combining the css's blkg and the - * request_queue of the @bio. This takes a reference on the css that will - * be put upon freeing of @bio. + * request_queue of the @bio. This falls back to the queue's root_blkg if + * the association fails with the css. */ void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { - css_get(css); - __bio_associate_blkg_from_css(bio, css); + struct request_queue *q = bio->bi_disk->queue; + struct blkcg_gq *blkg; + + rcu_read_lock(); + + if (!css || !css->parent) + blkg = q->root_blkg; + else + blkg = blkg_lookup_create(css_to_blkcg(css), q); + + __bio_associate_blkg(bio, blkg); + + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); @@ -2032,8 +2028,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); * @page: the page to lookup the blkcg from * * Associate @bio with the blkg from @page's owning memcg and the respective - * request_queue. This works like every other associate function wrt - * references. + * request_queue. If cgroup_e_css returns %NULL, fall back to the queue's + * root_blkg. */ void bio_associate_blkg_from_page(struct bio *bio, struct page *page) { @@ -2042,8 +2038,12 @@ void bio_associate_blkg_from_page(struct bio *bio, struct page *page) if (!page->mem_cgroup) return; - css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); - __bio_associate_blkg_from_css(bio, css); + rcu_read_lock(); + + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); } #endif /* CONFIG_MEMCG */ @@ -2058,24 +2058,16 @@ void bio_associate_blkg_from_page(struct bio *bio, struct page *page) */ void bio_associate_blkg(struct bio *bio) { - struct request_queue *q = bio->bi_disk->queue; - struct blkcg *blkcg; - struct blkcg_gq *blkg; + struct cgroup_subsys_state *css; rcu_read_lock(); if (bio->bi_blkg) - blkcg = bio->bi_blkg->blkcg; + css = &bio_blkcg(bio)->css; else - blkcg = css_to_blkcg(blkcg_get_css()); + css = blkcg_css(); - if (!blkcg->css.parent) { - __bio_associate_blkg(bio, q->root_blkg); - } else { - blkg = blkg_lookup_create(blkcg, q); - - __bio_associate_blkg(bio, blkg); - } + bio_associate_blkg_from_css(bio, css); rcu_read_unlock(); } @@ -2097,10 +2089,8 @@ void bio_disassociate_task(struct bio *bio) */ void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_blkg) { - css_get(&bio_blkcg(src)->css); + if (src->bi_blkg) __bio_associate_blkg(dst, src->bi_blkg); - } } EXPORT_SYMBOL_GPL(bio_clone_blkg_association); #endif /* CONFIG_BLK_CGROUP */ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f11c37f8ce09..284819a4d122 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -247,47 +247,6 @@ static inline struct cgroup_subsys_state *blkcg_css(void) return task_css(current, io_cgrp_id); } -/** - * blkcg_get_css - find and get a reference to the css - * - * Find the css associated with either the kthread or the current task. - * This takes a reference on the blkcg which will need to be managed by the - * caller. - */ -static inline struct cgroup_subsys_state *blkcg_get_css(void) -{ - struct cgroup_subsys_state *css; - - rcu_read_lock(); - - css = kthread_blkcg(); - if (css) { - css_get(css); - } else { - /* - * This is a bit complicated. It is possible task_css() is - * seeing an old css pointer here. This is caused by the - * current thread migrating away from this cgroup and this - * cgroup dying. css_tryget() will fail when trying to take a - * ref on a cgroup that's ref count has hit 0. - * - * Therefore, if it does fail, this means current must have - * been swapped away already and this is waiting for it to - * propagate on the polling cpu. Hence the use of cpu_relax(). - */ - while (true) { - css = task_css(current, io_cgrp_id); - if (likely(css_tryget(css))) - break; - cpu_relax(); - } - } - - rcu_read_unlock(); - - return css; -} - static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9d12757a65b0..9968332cceed 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,6 +93,8 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6aaf5dd5383b..8b79318810ad 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, } /** - * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * @@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, * enabled. If @ss is associated with the hierarchy @cgrp is on, this * function is guaranteed to return non-NULL css. */ -static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, - struct cgroup_subsys *ss) +static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, + struct cgroup_subsys *ss) { lockdep_assert_held(&cgroup_mutex); @@ -523,6 +523,35 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, return cgroup_css(cgrp, ss); } +/** + * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * @cgrp: the cgroup of interest + * @ss: the subsystem of interest + * + * Find and get the effective css of @cgrp for @ss. The effective css is + * defined as the matching css of the nearest ancestor including self which + * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, + * the root css is returned, so this function always returns a valid css. + * + * The returned css is not guaranteed to be online, and therefore it is the + * callers responsiblity to tryget a reference for it. + */ +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) +{ + struct cgroup_subsys_state *css; + + do { + css = cgroup_css(cgrp, ss); + + if (css) + return css; + cgrp = cgroup_parent(cgrp); + } while (cgrp); + + return init_css_set.subsys[ss->id]; +} + /** * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest @@ -605,10 +634,11 @@ EXPORT_SYMBOL_GPL(of_css); * * Should be called under cgroup_[tree_]mutex. */ -#define for_each_e_css(css, ssid, cgrp) \ - for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ - if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \ - ; \ +#define for_each_e_css(css, ssid, cgrp) \ + for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ + if (!((css) = cgroup_e_css_by_mask(cgrp, \ + cgroup_subsys[(ssid)]))) \ + ; \ else /** @@ -1007,7 +1037,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, * @ss is in this hierarchy, so we want the * effective css from @cgrp. */ - template[i] = cgroup_e_css(cgrp, ss); + template[i] = cgroup_e_css_by_mask(cgrp, ss); } else { /* * @ss is not in this hierarchy, so we don't want @@ -3024,7 +3054,7 @@ static int cgroup_apply_control(struct cgroup *cgrp) return ret; /* - * At this point, cgroup_e_css() results reflect the new csses + * At this point, cgroup_e_css_by_mask() results reflect the new csses * making the following cgroup_update_dfl_csses() properly update * css associations of all tasks in the subtree. */ -- cgit v1.2.3 From 6f70fb66182b02e50deea65e9a3a86b7bf659a39 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:37 -0500 Subject: blkcg: remove bio_disassociate_task() Now that a bio only holds a blkg reference, so clean up is simply putting back that reference. Remove bio_disassociate_task() as it just calls bio_disassociate_blkg() and call the latter directly. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- block/bio.c | 11 +---------- include/linux/bio.h | 2 -- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index ce1e512dca5a..7ec5316e6ecc 100644 --- a/block/bio.c +++ b/block/bio.c @@ -244,7 +244,7 @@ fallback: void bio_uninit(struct bio *bio) { - bio_disassociate_task(bio); + bio_disassociate_blkg(bio); } EXPORT_SYMBOL(bio_uninit); @@ -2073,15 +2073,6 @@ void bio_associate_blkg(struct bio *bio) } EXPORT_SYMBOL_GPL(bio_associate_blkg); -/** - * bio_disassociate_task - undo bio_associate_current() - * @bio: target bio - */ -void bio_disassociate_task(struct bio *bio) -{ - bio_disassociate_blkg(bio); -} - /** * bio_clone_blkg_association - clone blkg association from src to dst bio * @dst: destination bio diff --git a/include/linux/bio.h b/include/linux/bio.h index 84e1c4dc703a..7380b094dcca 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -516,7 +516,6 @@ void bio_disassociate_blkg(struct bio *bio); void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); -void bio_disassociate_task(struct bio *bio); void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline void bio_disassociate_blkg(struct bio *bio) { } @@ -524,7 +523,6 @@ static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } -static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ -- cgit v1.2.3 From 7fcf2b033b84e261dca283bc2911aaea4b07b525 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:38 -0500 Subject: blkcg: change blkg reference counting to use percpu_ref Every bio is now associated with a blkg putting blkg_get, blkg_try_get, and blkg_put on the hot path. Switch over the refcnt in blkg to use percpu_ref. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 41 +++++++++++++++++++++++++++++++++++++++-- include/linux/blk-cgroup.h | 15 +++++---------- 2 files changed, 44 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 120f2e2835fb..2ca7611fe274 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -81,6 +81,37 @@ static void blkg_free(struct blkcg_gq *blkg) kfree(blkg); } +static void __blkg_release(struct rcu_head *rcu) +{ + struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + + percpu_ref_exit(&blkg->refcnt); + + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); + + wb_congested_put(blkg->wb_congested); + + blkg_free(blkg); +} + +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +static void blkg_release(struct percpu_ref *ref) +{ + struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); + + call_rcu(&blkg->rcu_head, __blkg_release); +} + /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with @@ -107,7 +138,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; - atomic_set(&blkg->refcnt, 1); for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -207,6 +237,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_get(blkg->parent); } + ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0, + GFP_NOWAIT | __GFP_NOWARN); + if (ret) + goto err_cancel_ref; + /* invoke per-policy init */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -239,6 +274,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_put(blkg); return ERR_PTR(ret); +err_cancel_ref: + percpu_ref_exit(&blkg->refcnt); err_put_congested: wb_congested_put(wb_congested); err_put_css: @@ -367,7 +404,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ - blkg_put(blkg); + percpu_ref_kill(&blkg->refcnt); } /** diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 284819a4d122..d19ef15a673d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -124,7 +124,7 @@ struct blkcg_gq { struct blkcg_gq *parent; /* reference count */ - atomic_t refcnt; + struct percpu_ref refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -487,8 +487,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - atomic_inc(&blkg->refcnt); + percpu_ref_get(&blkg->refcnt); } /** @@ -500,7 +499,7 @@ static inline void blkg_get(struct blkcg_gq *blkg) */ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) { - if (atomic_inc_not_zero(&blkg->refcnt)) + if (percpu_ref_tryget(&blkg->refcnt)) return blkg; return NULL; } @@ -514,23 +513,19 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) */ static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) { - while (!atomic_inc_not_zero(&blkg->refcnt)) + while (!percpu_ref_tryget(&blkg->refcnt)) blkg = blkg->parent; return blkg; } -void __blkg_release_rcu(struct rcu_head *rcu); - /** * blkg_put - put a blkg reference * @blkg: blkg to put */ static inline void blkg_put(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - if (atomic_dec_and_test(&blkg->refcnt)) - call_rcu(&blkg->rcu_head, __blkg_release_rcu); + percpu_ref_put(&blkg->refcnt); } /** -- cgit v1.2.3 From 7754f669ffde3919e398a9e591cd7510d6cf4e73 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:39 -0500 Subject: blkcg: rename blkg_try_get() to blkg_tryget() blkg reference counting now uses percpu_ref rather than atomic_t. Let's make this consistent with css_tryget. This renames blkg_try_get to blkg_tryget and now returns a bool rather than the blkg or %NULL. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 2 +- block/blk-cgroup.c | 3 +-- block/blk-iolatency.c | 2 +- include/linux/blk-cgroup.h | 12 +++++------- 4 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 7ec5316e6ecc..06760543ec81 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1990,7 +1990,7 @@ static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { bio_disassociate_blkg(bio); - bio->bi_blkg = blkg_try_get_closest(blkg); + bio->bi_blkg = blkg_tryget_closest(blkg); } /** diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2ca7611fe274..6bd0619a7d6e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1736,8 +1736,7 @@ void blkcg_maybe_throttle_current(void) blkg = blkg_lookup(blkcg, q); if (!blkg) goto out; - blkg = blkg_try_get(blkg); - if (!blkg) + if (!blkg_tryget(blkg)) goto out; rcu_read_unlock(); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 5a79f06a730d..0b14c3d57769 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -698,7 +698,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) * We could be exiting, don't access the pd unless we have a * ref on the blkg. */ - if (!blkg_try_get(blkg)) + if (!blkg_tryget(blkg)) continue; iolat = blkg_to_lat(blkg); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index d19ef15a673d..752de1becb5c 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -491,27 +491,25 @@ static inline void blkg_get(struct blkcg_gq *blkg) } /** - * blkg_try_get - try and get a blkg reference + * blkg_tryget - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +static inline bool blkg_tryget(struct blkcg_gq *blkg) { - if (percpu_ref_tryget(&blkg->refcnt)) - return blkg; - return NULL; + return percpu_ref_tryget(&blkg->refcnt); } /** - * blkg_try_get_closest - try and get a blkg ref on the closet blkg + * blkg_tryget_closest - try and get a blkg ref on the closet blkg * @blkg: blkg to get * * This walks up the blkg tree to find the closest non-dying blkg and returns * the blkg that it did association with as it may not be the passed in blkg. */ -static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) +static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) { while (!percpu_ref_tryget(&blkg->refcnt)) blkg = blkg->parent; -- cgit v1.2.3 From 4705de735b3383792c84a92e57508d6865caa85f Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 6 Dec 2018 12:49:38 -0500 Subject: blkcg: put back rcu lock in blkcg_bio_issue_check() I was a little overzealous in removing the rcu_read_lock() call from blkcg_bio_issue_check() and it broke blk-throttle. Put it back. Fixes: e35403a034bf ("blkcg: associate blkg when associating a device") Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 752de1becb5c..bf13ecb0fe4f 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -764,6 +764,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, struct blkcg_gq *blkg; bool throtl = false; + rcu_read_lock(); + if (!bio->bi_blkg) { char b[BDEVNAME_SIZE]; @@ -791,6 +793,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkcg_bio_issue_init(bio); + rcu_read_unlock(); return !throtl; } -- cgit v1.2.3 From 12b2117161ddbdcdb69777404c5aa2a9fe6ad7d5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Nov 2018 10:28:12 -0700 Subject: nvme: introduce ctrl attributes enumeration We are growing more controller attributes, so use a proper enumeration for it. For now just add the 128-bit hostid which we support. Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/admin-cmd.c | 2 +- include/linux/nvme.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 1179f6314323..30778ffc46f5 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -304,7 +304,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* XXX: figure out what to do about RTD3R/RTD3 */ id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL); - id->ctratt = cpu_to_le32(1 << 0); + id->ctratt = cpu_to_le32(NVME_CTRL_ATTR_HID_128_BIT); id->oacs = 0; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 818dbe9331be..753c83a5c01f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -198,6 +198,10 @@ enum { NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, }; +enum nvme_ctrl_attr { + NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), +}; + struct nvme_id_ctrl { __le16 vid; __le16 ssvid; -- cgit v1.2.3 From 6e3ca03ee934572d5de4fb2224c01e12c4d422c8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Nov 2018 10:28:15 -0700 Subject: nvme: support traffic based keep-alive If the controller supports traffic based keep alive, we restart the keep alive timer if any admin or io commands was completed during the kato period. This prevents a possible starvation of keep alive commands in the presence of heavy traffic as in such case, we already have a health indication from the host perspective. Only set a comp_seen indicator in case the controller supports keep alive to minimize the overhead for pci controllers. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 13 +++++++++++++ drivers/nvme/host/nvme.h | 1 + include/linux/nvme.h | 1 + 3 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9de6244a345c..48ffb1d685c2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -251,6 +251,9 @@ void nvme_complete_rq(struct request *req) trace_nvme_complete_rq(req); + if (nvme_req(req)->ctrl->kas) + nvme_req(req)->ctrl->comp_seen = true; + if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { if ((req->cmd_flags & REQ_NVME_MPATH) && blk_path_error(status)) { @@ -839,6 +842,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) return; } + ctrl->comp_seen = false; schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } @@ -863,6 +867,15 @@ static void nvme_keep_alive_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_ctrl, ka_work); + bool comp_seen = ctrl->comp_seen; + + if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { + dev_dbg(ctrl->device, + "reschedule traffic based keep-alive timer\n"); + ctrl->comp_seen = false; + schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + return; + } if (nvme_keep_alive(ctrl)) { /* allocation failure, reset the controller */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4be7bbcfe66d..f2594d468f29 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -145,6 +145,7 @@ enum nvme_ctrl_state { }; struct nvme_ctrl { + bool comp_seen; enum nvme_ctrl_state state; bool identified; spinlock_t lock; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 753c83a5c01f..429c4cf90899 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -200,6 +200,7 @@ enum { enum nvme_ctrl_attr { NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), + NVME_CTRL_ATTR_TBKAS = (1 << 6), }; struct nvme_id_ctrl { -- cgit v1.2.3 From 7114ddeb40c0ccc584d86df598da4054ca4cd79f Mon Sep 17 00:00:00 2001 From: Jay Sternberg Date: Mon, 12 Nov 2018 13:56:34 -0800 Subject: nvmet: change aen mask functions to use bit numbers Functions nvmet_aen_disabled and nvmet_clear_aen were using values not bit numbers ie 1 << 9 not 9 for bit function clear_bit and test_and_set_bit. Signed-off-by: Jay Sternberg Reviewed-by: Phil Cayton Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/admin-cmd.c | 4 ++-- drivers/nvme/target/core.c | 4 ++-- drivers/nvme/target/nvmet.h | 10 +++++----- include/linux/nvme.h | 12 +++++++++--- 4 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e82262c988f1..2e89f4e3364b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -176,7 +176,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) if (!status) status = nvmet_zero_sgl(req, len, req->data_len - len); ctrl->nr_changed_ns = 0; - nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR); mutex_unlock(&ctrl->lock); out: nvmet_req_complete(req, status); @@ -239,7 +239,7 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt); hdr.ngrps = cpu_to_le16(ngrps); - nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE); + nvmet_clear_aen_bit(req, NVME_AEN_BIT_ANA_CHANGE); up_read(&nvmet_ana_sem); kfree(desc); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index f33c4a20b572..f42a105ef17f 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -180,7 +180,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); - if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR)) + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_NS_CHANGED, @@ -197,7 +197,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys, list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { if (port && ctrl->port != port) continue; - if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE)) + if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) continue; nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, NVME_AER_NOTICE_ANA, NVME_LOG_ANA); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7efee345d467..8ddc54fa98c7 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -342,19 +342,19 @@ struct nvmet_async_event { u8 log_page; }; -static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit) +static inline void nvmet_clear_aen_bit(struct nvmet_req *req, u32 bn) { int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15; if (!rae) - clear_bit(aen_bit, &req->sq->ctrl->aen_masked); + clear_bit(bn, &req->sq->ctrl->aen_masked); } -static inline bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen) +static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn) { - if (!(READ_ONCE(ctrl->aen_enabled) & aen)) + if (!(READ_ONCE(ctrl->aen_enabled) & (1 << bn))) return true; - return test_and_set_bit(aen, &ctrl->aen_masked); + return test_and_set_bit(bn, &ctrl->aen_masked); } u16 nvmet_parse_connect_cmd(struct nvmet_req *req); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 429c4cf90899..d6cfa194be80 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -489,9 +489,15 @@ enum { }; enum { - NVME_AEN_CFG_NS_ATTR = 1 << 8, - NVME_AEN_CFG_FW_ACT = 1 << 9, - NVME_AEN_CFG_ANA_CHANGE = 1 << 11, + NVME_AEN_BIT_NS_ATTR = 8, + NVME_AEN_BIT_FW_ACT = 9, + NVME_AEN_BIT_ANA_CHANGE = 11, +}; + +enum { + NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR, + NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT, + NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE, }; struct nvme_lba_range_type { -- cgit v1.2.3 From f301c2b1368905340133ff8ef4485befdd0b7e2d Mon Sep 17 00:00:00 2001 From: Jay Sternberg Date: Mon, 12 Nov 2018 13:56:37 -0800 Subject: nvmet: add defines for discovery change async events Add AEN/AER values as defined by the specification Signed-off-by: Jay Sternberg Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/nvmet.h | 2 ++ include/linux/nvme.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index a8ee265a3806..bc99c700a583 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -36,6 +36,8 @@ */ #define NVMET_AEN_CFG_OPTIONAL \ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE) +#define NVMET_DISC_AEN_CFG_OPTIONAL \ + (NVME_AEN_CFG_DISC_CHANGE) /* * Plus mandatory SMART AENs (we'll never send them, but allow enabling them): diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d6cfa194be80..77d320d32ee5 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -486,18 +486,21 @@ enum { NVME_AER_NOTICE_NS_CHANGED = 0x00, NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, NVME_AER_NOTICE_ANA = 0x03, + NVME_AER_NOTICE_DISC_CHANGED = 0xf0, }; enum { NVME_AEN_BIT_NS_ATTR = 8, NVME_AEN_BIT_FW_ACT = 9, NVME_AEN_BIT_ANA_CHANGE = 11, + NVME_AEN_BIT_DISC_CHANGE = 31, }; enum { NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR, NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT, NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE, + NVME_AEN_CFG_DISC_CHANGE = 1 << NVME_AEN_BIT_DISC_CHANGE, }; struct nvme_lba_range_type { -- cgit v1.2.3 From 6e2e312ea7ff73acfafaa5c9851e151e9483c761 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 14 Nov 2018 15:57:46 -0800 Subject: nvmet-fc: remove the IN_ISR deferred scheduling options All target lldd's call the cmd receive and op completions in non-isr thread contexts. As such the IN_ISR options are not necessary. Remove the functionality and flags, which also removes cpu assignments to queues. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 66 ++---------------------------------------- include/linux/nvme-fc-driver.h | 16 ---------- 2 files changed, 2 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 409081a03b24..f98f5c5bea26 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -86,8 +86,6 @@ struct nvmet_fc_fcp_iod { spinlock_t flock; struct nvmet_req req; - struct work_struct work; - struct work_struct done_work; struct work_struct defer_work; struct nvmet_fc_tgtport *tgtport; @@ -134,7 +132,6 @@ struct nvmet_fc_tgt_queue { u16 sqsize; u16 ersp_ratio; __le16 sqhd; - int cpu; atomic_t connected; atomic_t sqtail; atomic_t zrspcnt; @@ -232,8 +229,6 @@ static LIST_HEAD(nvmet_fc_portentry_list); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); -static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); -static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); @@ -438,8 +433,6 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, int i; for (i = 0; i < queue->sqsize; fod++, i++) { - INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); - INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work); fod->tgtport = tgtport; fod->queue = queue; @@ -517,10 +510,7 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, fcpreq->hwqid = queue->qid ? ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; - if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) - queue_work_on(queue->cpu, queue->work_q, &fod->work); - else - nvmet_fc_handle_fcp_rqst(tgtport, fod); + nvmet_fc_handle_fcp_rqst(tgtport, fod); } static void @@ -599,30 +589,6 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, queue_work(queue->work_q, &fod->defer_work); } -static int -nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid) -{ - int cpu, idx, cnt; - - if (tgtport->ops->max_hw_queues == 1) - return WORK_CPU_UNBOUND; - - /* Simple cpu selection based on qid modulo active cpu count */ - idx = !qid ? 0 : (qid - 1) % num_active_cpus(); - - /* find the n'th active cpu */ - for (cpu = 0, cnt = 0; ; ) { - if (cpu_active(cpu)) { - if (cnt == idx) - break; - cnt++; - } - cpu = (cpu + 1) % num_possible_cpus(); - } - - return cpu; -} - static struct nvmet_fc_tgt_queue * nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, u16 qid, u16 sqsize) @@ -653,7 +619,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; - queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); INIT_LIST_HEAD(&queue->avail_defer_list); INIT_LIST_HEAD(&queue->pending_cmd_list); @@ -2145,26 +2110,12 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) } } -static void -nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) -{ - struct nvmet_fc_fcp_iod *fod = - container_of(work, struct nvmet_fc_fcp_iod, done_work); - - nvmet_fc_fod_op_done(fod); -} - static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) { struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; - struct nvmet_fc_tgt_queue *queue = fod->queue; - if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) - /* context switch so completion is not in ISR context */ - queue_work_on(queue->cpu, queue->work_q, &fod->done_work); - else - nvmet_fc_fod_op_done(fod); + nvmet_fc_fod_op_done(fod); } /* @@ -2332,19 +2283,6 @@ transport_error: nvmet_fc_abort_op(tgtport, fod); } -/* - * Actual processing routine for received FC-NVME LS Requests from the LLD - */ -static void -nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) -{ - struct nvmet_fc_fcp_iod *fod = - container_of(work, struct nvmet_fc_fcp_iod, work); - struct nvmet_fc_tgtport *tgtport = fod->tgtport; - - nvmet_fc_handle_fcp_rqst(tgtport, fod); -} - /** * nvmet_fc_rcv_fcp_req - transport entry point called by an LLDD * upon the reception of a NVME FCP CMD IU. diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f4ab3b1925ac..91745cc3704c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -648,22 +648,6 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), - /* Bit 2: When 0, the LLDD is calling the cmd rcv handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the cmd rcv handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), - /* Bit 3: When 0, the LLDD is calling the op done handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the op done handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ }; -- cgit v1.2.3 From e6a622fd6d66b83779357e3400f487fc159a7d83 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 19 Nov 2018 14:11:12 -0800 Subject: nvmet: support fabrics sq flow control Technical proposal 8005 "fabrics SQ flow control" introduces a mode where a host and controller agree to omit sq_head pointer updates when sending nvme completions. In case the host indicated desire to operate in this mode (connect attribute) the controller will return back a connect completion with sq_head value of 0xffff as indication that it will omit sq_head pointer updates. This mode saves us an atomic update in the I/O path. Reviewed-by: Hannes Reinecke [hch: suggested better implementation] Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 23 +++++++++++++---------- drivers/nvme/target/fabrics-cmd.c | 6 ++++++ drivers/nvme/target/nvmet.h | 1 + include/linux/nvme.h | 4 ++++ 4 files changed, 24 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5aa5a3cc5395..2df70010e9f2 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -597,26 +597,28 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) return ns; } -static void __nvmet_req_complete(struct nvmet_req *req, u16 status) +static void nvmet_update_sq_head(struct nvmet_req *req) { - u32 old_sqhd, new_sqhd; - u16 sqhd; - - if (status) - nvmet_set_status(req, status); - if (req->sq->size) { + u32 old_sqhd, new_sqhd; + do { old_sqhd = req->sq->sqhd; new_sqhd = (old_sqhd + 1) % req->sq->size; } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != old_sqhd); } - sqhd = req->sq->sqhd & 0x0000FFFF; - req->rsp->sq_head = cpu_to_le16(sqhd); + req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); +} + +static void __nvmet_req_complete(struct nvmet_req *req, u16 status) +{ + if (!req->sq->sqhd_disabled) + nvmet_update_sq_head(req); req->rsp->sq_id = cpu_to_le16(req->sq->qid); req->rsp->command_id = req->cmd->common.command_id; - + if (status) + nvmet_set_status(req, status); if (req->ns) nvmet_put_namespace(req->ns); req->ops->queue_response(req); @@ -765,6 +767,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sg_cnt = 0; req->transfer_len = 0; req->rsp->status = 0; + req->rsp->sq_head = 0; req->ns = NULL; /* no support for fused commands yet */ diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d84ae004cb85..328ae46d8344 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -115,6 +115,12 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) /* note: convert queue size from 0's-based value to 1's-based value */ nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); + + if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) { + req->sq->sqhd_disabled = true; + req->rsp->sq_head = cpu_to_le16(0xffff); + } + return 0; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 03988fe9d915..547108c41ce9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -106,6 +106,7 @@ struct nvmet_sq { u16 qid; u16 size; u32 sqhd; + bool sqhd_disabled; struct completion free_done; struct completion confirm_done; }; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 77d320d32ee5..e7d731776f62 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1044,6 +1044,10 @@ struct nvmf_disc_rsp_page_hdr { struct nvmf_disc_rsp_page_entry entries[0]; }; +enum { + NVME_CONNECT_DISABLE_SQFLOW = (1 << 2), +}; + struct nvmf_connect_command { __u8 opcode; __u8 resv1; -- cgit v1.2.3 From 0445e1b5a2fed4612b7f72d9a56889c026b60aa9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 19 Nov 2018 14:11:13 -0800 Subject: nvmet: don't override treq upon modification. Only override the allowed parts of it. Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg [hch: slight tweak to the NVME_TREQ_SECURE_CHANNEL_MASK definition] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 11 +++++++---- include/linux/nvme.h | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d37fd7713bbc..260a401db01c 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -153,7 +153,8 @@ CONFIGFS_ATTR(nvmet_, addr_traddr); static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.treq) { + switch (to_nvmet_port(item)->disc_addr.treq & + NVME_TREQ_SECURE_CHANNEL_MASK) { case NVMF_TREQ_NOT_SPECIFIED: return sprintf(page, "not specified\n"); case NVMF_TREQ_REQUIRED: @@ -169,6 +170,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -177,15 +179,16 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, } if (sysfs_streq(page, "not specified")) { - port->disc_addr.treq = NVMF_TREQ_NOT_SPECIFIED; + treq |= NVMF_TREQ_NOT_SPECIFIED; } else if (sysfs_streq(page, "required")) { - port->disc_addr.treq = NVMF_TREQ_REQUIRED; + treq |= NVMF_TREQ_REQUIRED; } else if (sysfs_streq(page, "not required")) { - port->disc_addr.treq = NVMF_TREQ_NOT_REQUIRED; + treq |= NVMF_TREQ_NOT_REQUIRED; } else { pr_err("Invalid value '%s' for treq\n", page); return -EINVAL; } + port->disc_addr.treq = treq; return count; } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index e7d731776f62..4fc48071e5ea 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -61,6 +61,8 @@ enum { NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ NVMF_TREQ_REQUIRED = 1, /* Required */ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ +#define NVME_TREQ_SECURE_CHANNEL_MASK \ + (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED) }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS -- cgit v1.2.3 From 9b95d2fb857f242aacbf4e205656818b0ef067e1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 20 Nov 2018 10:34:19 +0100 Subject: nvmet: expose support for fabrics SQ flow control disable in treq Technical Proposal introduces an indication for SQ flow control disable support. Expose it since we are able to operate in this mode. Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 1 + include/linux/nvme.h | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 260a401db01c..db2cb64be7ba 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1214,6 +1214,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, port->inline_data_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); + port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; config_group_init_type_name(&port->group, name, &nvmet_port_type); config_group_init_type_name(&port->subsys_group, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fc48071e5ea..c03973c215ad 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -58,11 +58,13 @@ enum { /* Transport Requirements codes for Discovery Log Page entry TREQ field */ enum { - NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ - NVMF_TREQ_REQUIRED = 1, /* Required */ - NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ + NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ + NVMF_TREQ_REQUIRED = 1, /* Required */ + NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ #define NVME_TREQ_SECURE_CHANNEL_MASK \ (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED) + + NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* Supports SQ flow control disable */ }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS -- cgit v1.2.3 From 49cd84b6f8b677ef45731ed56ddb802cdbb94c9e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 27 Nov 2018 09:40:57 -0700 Subject: nvme: implement Enhanced Command Retry A controller may have an internal state that is not able to successfully process commands for a short duration. In such states, an immediate command requeue is expected to fail. The driver may exceed its max retry count, which permanently ends the command in failure when the same command would succeed after waiting for the controller to be ready. NVMe ratified TP 4033 provides a delay hint in the completion status code for failed commands. Implement the retry delay based on the command completion status and the controller's requested delay. Note that requeued commands are handled per request_queue, not per individual request. If multiple commands fail, the controller should consistently report the desired delay time for retryable commands in all CQEs, otherwise the requeue list may be kicked too soon. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 1 + include/linux/nvme.h | 17 ++++++++++++++++- 3 files changed, 62 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 71d2a89bbd1d..f90576862736 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -244,6 +244,22 @@ static inline bool nvme_req_needs_retry(struct request *req) return true; } +static void nvme_retry_req(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + unsigned long delay = 0; + u16 crd; + + /* The mask and shift result must be <= 3 */ + crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11; + if (ns && crd) + delay = ns->ctrl->crdt[crd - 1] * 100; + + nvme_req(req)->retries++; + blk_mq_requeue_request(req, false); + blk_mq_delay_kick_requeue_list(req->q, delay); +} + void nvme_complete_rq(struct request *req) { blk_status_t status = nvme_error_status(req); @@ -261,8 +277,7 @@ void nvme_complete_rq(struct request *req) } if (!blk_queue_dying(req->q)) { - nvme_req(req)->retries++; - blk_mq_requeue_request(req, true); + nvme_retry_req(req); return; } } @@ -1883,6 +1898,26 @@ static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) return ret; } +static int nvme_configure_acre(struct nvme_ctrl *ctrl) +{ + struct nvme_feat_host_behavior *host; + int ret; + + /* Don't bother enabling the feature if retry delay is not reported */ + if (!ctrl->crdt[0]) + return 0; + + host = kzalloc(sizeof(*host), GFP_KERNEL); + if (!host) + return 0; + + host->acre = NVME_ENABLE_ACRE; + ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0, + host, sizeof(*host), NULL); + kfree(host); + return ret; +} + static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* @@ -2404,6 +2439,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; } + ctrl->crdt[0] = le16_to_cpu(id->crdt1); + ctrl->crdt[1] = le16_to_cpu(id->crdt2); + ctrl->crdt[2] = le16_to_cpu(id->crdt3); + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->oncs = le16_to_cpup(&id->oncs); ctrl->oaes = le32_to_cpu(id->oaes); @@ -2504,6 +2543,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; + ret = nvme_configure_acre(ctrl); + if (ret < 0) + return ret; + ctrl->identified = true; return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f2594d468f29..79e621f5b326 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -181,6 +181,7 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u32 max_segments; + u16 crdt[3]; u16 oncs; u16 oacs; u16 nssa; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c03973c215ad..88812cb15be0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -223,7 +223,11 @@ struct nvme_id_ctrl { __le32 rtd3e; __le32 oaes; __le32 ctratt; - __u8 rsvd100[156]; + __u8 rsvd100[28]; + __le16 crdt1; + __le16 crdt2; + __le16 crdt3; + __u8 rsvd134[122]; __le16 oacs; __u8 acl; __u8 aerl; @@ -756,6 +760,15 @@ enum { NVME_HOST_MEM_RETURN = (1 << 1), }; +struct nvme_feat_host_behavior { + __u8 acre; + __u8 resv1[511]; +}; + +enum { + NVME_ENABLE_ACRE = 1, +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -810,6 +823,7 @@ enum { NVME_FEAT_RRL = 0x12, NVME_FEAT_PLM_CONFIG = 0x13, NVME_FEAT_PLM_WINDOW = 0x14, + NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1265,6 +1279,7 @@ enum { NVME_SC_ANA_TRANSITION = 0x303, NVME_SC_HOST_PATH_ERROR = 0x370, + NVME_SC_CRD = 0x1800, NVME_SC_DNR = 0x4000, }; -- cgit v1.2.3 From ad3bc25a320742f42b3015115384f5aec69c7ce2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 5 Dec 2018 00:34:56 +0100 Subject: x86/kernel: Fix more -Wmissing-prototypes warnings ... with the goal of eventually enabling -Wmissing-prototypes by default. At least on x86. Make functions static where possible, otherwise add prototypes or make them visible through includes. asm/trace/ changes courtesy of Steven Rostedt . Signed-off-by: Borislav Petkov Reviewed-by: Masami Hiramatsu Reviewed-by: Ingo Molnar Acked-by: Rafael J. Wysocki # ACPI + cpufreq bits Cc: Andrew Banman Cc: Dimitri Sivanich Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Mike Travis Cc: "Steven Rostedt (VMware)" Cc: Thomas Gleixner Cc: Yi Wang Cc: linux-acpi@vger.kernel.org --- arch/x86/include/asm/setup.h | 3 +++ arch/x86/include/asm/trace/exceptions.h | 1 + arch/x86/include/asm/trace/irq_vectors.h | 1 + arch/x86/include/asm/traps.h | 5 +++++ arch/x86/kernel/apic/apic.c | 1 + arch/x86/kernel/apic/apic_flat_64.c | 7 ++++--- arch/x86/kernel/apic/vector.c | 1 + arch/x86/kernel/apic/x2apic_uv_x.c | 4 ++-- arch/x86/kernel/asm-offsets.c | 3 ++- arch/x86/kernel/cpu/amd.c | 1 + arch/x86/kernel/cpu/aperfmperf.c | 1 + arch/x86/kernel/cpu/bugs.c | 2 ++ arch/x86/kernel/cpu/cacheinfo.c | 1 + arch/x86/kernel/cpu/scattered.c | 3 ++- arch/x86/kernel/cpu/topology.c | 2 ++ arch/x86/kernel/fpu/xstate.c | 2 +- arch/x86/kernel/kprobes/core.c | 2 ++ arch/x86/kernel/sysfb_efi.c | 3 +++ arch/x86/kernel/tracepoint.c | 1 + include/acpi/cppc_acpi.h | 3 +++ include/linux/kprobes.h | 3 +++ 21 files changed, 42 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ae13bc974416..ed8ec011a9fd 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -46,6 +46,9 @@ extern unsigned long saved_video_mode; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); +extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp); +extern unsigned long __startup_secondary_64(void); +extern int early_make_pgtable(unsigned long address); #ifdef CONFIG_X86_INTEL_MID extern void x86_intel_mid_early_setup(void); diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index 69615e387973..e0e6d7f21399 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -45,6 +45,7 @@ DEFINE_PAGE_FAULT_EVENT(page_fault_user); DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); #undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE exceptions #endif /* _TRACE_PAGE_FAULT_H */ diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 0af81b590a0c..33b9d0f0aafe 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -389,6 +389,7 @@ TRACE_EVENT(vector_free_moved, #endif /* CONFIG_X86_LOCAL_APIC */ #undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE irq_vectors #endif /* _TRACE_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 5fcdf5687406..7d6f3f3fad78 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -113,6 +113,11 @@ asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); #endif +void smp_apic_timer_interrupt(struct pt_regs *regs); +void smp_spurious_interrupt(struct pt_regs *regs); +void smp_error_interrupt(struct pt_regs *regs); +asmlinkage void smp_irq_move_cleanup_interrupt(void); + extern void ist_enter(struct pt_regs *regs); extern void ist_exit(struct pt_regs *regs); extern void ist_begin_non_atomic(struct pt_regs *regs); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 32b2b7a41ef5..b7bcdd781651 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index e84c9eb4e5b4..0005c284a5c5 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -8,6 +8,7 @@ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ +#include #include #include #include @@ -16,13 +17,13 @@ #include #include #include + #include -#include #include +#include +#include #include -#include - static struct apic apic_physflat; static struct apic apic_flat; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 652e7ffa9b9d..3173e07d3791 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 391f358ebb4c..a555da094157 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1079,7 +1079,7 @@ late_initcall(uv_init_heartbeat); #endif /* !CONFIG_HOTPLUG_CPU */ /* Direct Legacy VGA I/O traffic to designated IOH */ -int uv_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags) +static int uv_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags) { int domain, bus, rc; @@ -1148,7 +1148,7 @@ static void get_mn(struct mn *mnp) mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0; } -void __init uv_init_hub_info(struct uv_hub_info_s *hi) +static void __init uv_init_hub_info(struct uv_hub_info_s *hi) { union uvh_node_id_u node_id; struct mn mn; diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 72adf6c335dc..168543d077d7 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -29,7 +29,8 @@ # include "asm-offsets_64.c" #endif -void common(void) { +static void __used common(void) +{ BLANK(); OFFSET(TASK_threadsp, task_struct, thread.sp); #ifdef CONFIG_STACKPROTECTOR diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index eeea634bee0a..69f6bbb41be0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 # include diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 7eba34df54c3..804c49493938 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 500278f5308e..923e954a0075 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -32,6 +32,8 @@ #include #include +#include "cpu.h" + static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index dc1b9342e9c4..c4d1023fb0ab 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -17,6 +17,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 772c219b6889..389168fa6e24 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -5,9 +5,10 @@ #include #include +#include #include -#include +#include "cpu.h" struct cpuid_bit { u16 feature; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 71ca064e3794..8f6c784141d1 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -10,6 +10,8 @@ #include #include +#include "cpu.h" + /* leaf 0xb SMT level */ #define SMT_LEVEL 0 diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 87a57b7642d3..cd3956fc8158 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -811,7 +811,7 @@ void fpu__resume_cpu(void) * * Note: does not work for compacted buffers. */ -void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) +static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) { int feature_nr = fls64(xstate_feature_mask) - 1; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index c33b06f5faa4..6480056d370f 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -66,6 +66,8 @@ #include "common.h" +void *trampoline_handler(struct pt_regs *regs); + DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c index 623965e86b65..fa51723571c8 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/arch/x86/kernel/sysfb_efi.c @@ -19,12 +19,15 @@ #include #include +#include #include #include #include #include #include #include