From 97c2b5cba2044f1c0bc3f14d7102176dbcf81af0 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 21 May 2018 10:59:54 +0100
Subject: mfd: madera: Add register definitions for Cirrus Logic Madera codecs

This patch adds a header file of register definitions for Cirrus
Logic "Madera" class codecs. These codecs are all based off a common
set of hardware IP so have a common register map (with a few minor
device-to-device variations).

The registers.h file is tool-generated directly from the hardware design
but has been manually stripped down to reduce size (full register
map is >44000 lines). All names are kept the same as datasheet names
so that they can be cross-referenced between source and datasheet without
confusion.

The register map layout is kept fully-defined rather than factored into
macros and/or block-indexing code. The major reasons for this are:

 - #1 is that it makes the source highly greppable, which is important.
   "What does the driver do with register bits XYZ" or "Where does it use
   register bits XYZ" are commonly types of questions. These can be quickly
   answered by a grep. Squashing definitions into generator macros or block-
   indexing code is a way of defeating grep.

 - most of the register definitions are used in tables, so a constant value
   is required. Using generator macros make the table definition clunky and
   obscure.

 - the code is clearer when it's there in the source exactly what register
   and field it is using

 - it is easier to diff the register map of a new (unsupported) codec against
   what is already supported and merge in differences

 - it makes the register map available in source for maintenance/debugging
   instead of having to refer back to the datasheet for a register map

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/madera/registers.h | 3917 ++++++++++++++++++++++++++++++++++
 1 file changed, 3917 insertions(+)
 create mode 100644 include/linux/mfd/madera/registers.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/madera/registers.h b/include/linux/mfd/madera/registers.h
new file mode 100644
index 000000000000..a6b7c4222c5e
--- /dev/null
+++ b/include/linux/mfd/madera/registers.h
@@ -0,0 +1,3917 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Madera register definitions
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#ifndef MADERA_REGISTERS_H
+#define MADERA_REGISTERS_H
+
+/*
+ * Register Addresses.
+ */
+#define MADERA_SOFTWARE_RESET				0x00
+#define MADERA_HARDWARE_REVISION			0x01
+#define MADERA_CTRL_IF_CFG_1				0x08
+#define MADERA_CTRL_IF_CFG_2				0x09
+#define MADERA_CTRL_IF_CFG_3				0x0A
+#define MADERA_WRITE_SEQUENCER_CTRL_0			0x16
+#define MADERA_WRITE_SEQUENCER_CTRL_1			0x17
+#define MADERA_WRITE_SEQUENCER_CTRL_2			0x18
+#define MADERA_TONE_GENERATOR_1				0x20
+#define MADERA_TONE_GENERATOR_2				0x21
+#define MADERA_TONE_GENERATOR_3				0x22
+#define MADERA_TONE_GENERATOR_4				0x23
+#define MADERA_TONE_GENERATOR_5				0x24
+#define MADERA_PWM_DRIVE_1				0x30
+#define MADERA_PWM_DRIVE_2				0x31
+#define MADERA_PWM_DRIVE_3				0x32
+#define MADERA_SEQUENCE_CONTROL				0x41
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_1		0x61
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_2		0x62
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_3		0x63
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_4		0x64
+#define MADERA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_1	0x66
+#define MADERA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_2	0x67
+#define MADERA_HAPTICS_CONTROL_1			0x90
+#define MADERA_HAPTICS_CONTROL_2			0x91
+#define MADERA_HAPTICS_PHASE_1_INTENSITY		0x92
+#define MADERA_HAPTICS_PHASE_1_DURATION			0x93
+#define MADERA_HAPTICS_PHASE_2_INTENSITY		0x94
+#define MADERA_HAPTICS_PHASE_2_DURATION			0x95
+#define MADERA_HAPTICS_PHASE_3_INTENSITY		0x96
+#define MADERA_HAPTICS_PHASE_3_DURATION			0x97
+#define MADERA_HAPTICS_STATUS				0x98
+#define MADERA_COMFORT_NOISE_GENERATOR			0xA0
+#define MADERA_CLOCK_32K_1				0x100
+#define MADERA_SYSTEM_CLOCK_1				0x101
+#define MADERA_SAMPLE_RATE_1				0x102
+#define MADERA_SAMPLE_RATE_2				0x103
+#define MADERA_SAMPLE_RATE_3				0x104
+#define MADERA_SAMPLE_RATE_1_STATUS			0x10A
+#define MADERA_SAMPLE_RATE_2_STATUS			0x10B
+#define MADERA_SAMPLE_RATE_3_STATUS			0x10C
+#define MADERA_ASYNC_CLOCK_1				0x112
+#define MADERA_ASYNC_SAMPLE_RATE_1			0x113
+#define MADERA_ASYNC_SAMPLE_RATE_2			0x114
+#define MADERA_ASYNC_SAMPLE_RATE_1_STATUS		0x11B
+#define MADERA_ASYNC_SAMPLE_RATE_2_STATUS		0x11C
+#define MADERA_DSP_CLOCK_1				0x120
+#define MADERA_DSP_CLOCK_2				0x122
+#define MADERA_OUTPUT_SYSTEM_CLOCK			0x149
+#define MADERA_OUTPUT_ASYNC_CLOCK			0x14A
+#define MADERA_RATE_ESTIMATOR_1				0x152
+#define MADERA_RATE_ESTIMATOR_2				0x153
+#define MADERA_RATE_ESTIMATOR_3				0x154
+#define MADERA_RATE_ESTIMATOR_4				0x155
+#define MADERA_RATE_ESTIMATOR_5				0x156
+#define MADERA_FLL1_CONTROL_1				0x171
+#define MADERA_FLL1_CONTROL_2				0x172
+#define MADERA_FLL1_CONTROL_3				0x173
+#define MADERA_FLL1_CONTROL_4				0x174
+#define MADERA_FLL1_CONTROL_5				0x175
+#define MADERA_FLL1_CONTROL_6				0x176
+#define MADERA_FLL1_LOOP_FILTER_TEST_1			0x177
+#define MADERA_FLL1_NCO_TEST_0				0x178
+#define MADERA_FLL1_CONTROL_7				0x179
+#define MADERA_FLL1_EFS_2				0x17A
+#define CS47L35_FLL1_SYNCHRONISER_1			0x17F
+#define CS47L35_FLL1_SYNCHRONISER_2			0x180
+#define CS47L35_FLL1_SYNCHRONISER_3			0x181
+#define CS47L35_FLL1_SYNCHRONISER_4			0x182
+#define CS47L35_FLL1_SYNCHRONISER_5			0x183
+#define CS47L35_FLL1_SYNCHRONISER_6			0x184
+#define CS47L35_FLL1_SYNCHRONISER_7			0x185
+#define CS47L35_FLL1_SPREAD_SPECTRUM			0x187
+#define CS47L35_FLL1_GPIO_CLOCK				0x188
+#define MADERA_FLL1_SYNCHRONISER_1			0x181
+#define MADERA_FLL1_SYNCHRONISER_2			0x182
+#define MADERA_FLL1_SYNCHRONISER_3			0x183
+#define MADERA_FLL1_SYNCHRONISER_4			0x184
+#define MADERA_FLL1_SYNCHRONISER_5			0x185
+#define MADERA_FLL1_SYNCHRONISER_6			0x186
+#define MADERA_FLL1_SYNCHRONISER_7			0x187
+#define MADERA_FLL1_SPREAD_SPECTRUM			0x189
+#define MADERA_FLL1_GPIO_CLOCK				0x18A
+#define MADERA_FLL2_CONTROL_1				0x191
+#define MADERA_FLL2_CONTROL_2				0x192
+#define MADERA_FLL2_CONTROL_3				0x193
+#define MADERA_FLL2_CONTROL_4				0x194
+#define MADERA_FLL2_CONTROL_5				0x195
+#define MADERA_FLL2_CONTROL_6				0x196
+#define MADERA_FLL2_LOOP_FILTER_TEST_1			0x197
+#define MADERA_FLL2_NCO_TEST_0				0x198
+#define MADERA_FLL2_CONTROL_7				0x199
+#define MADERA_FLL2_EFS_2				0x19A
+#define MADERA_FLL2_SYNCHRONISER_1			0x1A1
+#define MADERA_FLL2_SYNCHRONISER_2			0x1A2
+#define MADERA_FLL2_SYNCHRONISER_3			0x1A3
+#define MADERA_FLL2_SYNCHRONISER_4			0x1A4
+#define MADERA_FLL2_SYNCHRONISER_5			0x1A5
+#define MADERA_FLL2_SYNCHRONISER_6			0x1A6
+#define MADERA_FLL2_SYNCHRONISER_7			0x1A7
+#define MADERA_FLL2_SPREAD_SPECTRUM			0x1A9
+#define MADERA_FLL2_GPIO_CLOCK				0x1AA
+#define MADERA_FLL3_CONTROL_1				0x1B1
+#define MADERA_FLL3_CONTROL_2				0x1B2
+#define MADERA_FLL3_CONTROL_3				0x1B3
+#define MADERA_FLL3_CONTROL_4				0x1B4
+#define MADERA_FLL3_CONTROL_5				0x1B5
+#define MADERA_FLL3_CONTROL_6				0x1B6
+#define MADERA_FLL3_LOOP_FILTER_TEST_1			0x1B7
+#define MADERA_FLL3_NCO_TEST_0				0x1B8
+#define MADERA_FLL3_CONTROL_7				0x1B9
+#define MADERA_FLL3_SYNCHRONISER_1			0x1C1
+#define MADERA_FLL3_SYNCHRONISER_2			0x1C2
+#define MADERA_FLL3_SYNCHRONISER_3			0x1C3
+#define MADERA_FLL3_SYNCHRONISER_4			0x1C4
+#define MADERA_FLL3_SYNCHRONISER_5			0x1C5
+#define MADERA_FLL3_SYNCHRONISER_6			0x1C6
+#define MADERA_FLL3_SYNCHRONISER_7			0x1C7
+#define MADERA_FLL3_SPREAD_SPECTRUM			0x1C9
+#define MADERA_FLL3_GPIO_CLOCK				0x1CA
+#define MADERA_FLLAO_CONTROL_1				0x1D1
+#define MADERA_FLLAO_CONTROL_2				0x1D2
+#define MADERA_FLLAO_CONTROL_3				0x1D3
+#define MADERA_FLLAO_CONTROL_4				0x1D4
+#define MADERA_FLLAO_CONTROL_5				0x1D5
+#define MADERA_FLLAO_CONTROL_6				0x1D6
+#define MADERA_FLLAO_CONTROL_7				0x1D8
+#define MADERA_FLLAO_CONTROL_8				0x1DA
+#define MADERA_FLLAO_CONTROL_9				0x1DB
+#define MADERA_FLLAO_CONTROL_10				0x1DC
+#define MADERA_FLLAO_CONTROL_11				0x1DD
+#define MADERA_MIC_CHARGE_PUMP_1			0x200
+#define MADERA_HP_CHARGE_PUMP_8				0x20B
+#define MADERA_LDO1_CONTROL_1				0x210
+#define MADERA_LDO2_CONTROL_1				0x213
+#define MADERA_MIC_BIAS_CTRL_1				0x218
+#define MADERA_MIC_BIAS_CTRL_2				0x219
+#define MADERA_MIC_BIAS_CTRL_3				0x21A
+#define MADERA_MIC_BIAS_CTRL_4				0x21B
+#define MADERA_MIC_BIAS_CTRL_5				0x21C
+#define MADERA_MIC_BIAS_CTRL_6				0x21E
+#define MADERA_HP_CTRL_1L				0x225
+#define MADERA_HP_CTRL_1R				0x226
+#define MADERA_HP_CTRL_2L				0x227
+#define MADERA_HP_CTRL_2R				0x228
+#define MADERA_HP_CTRL_3L				0x229
+#define MADERA_HP_CTRL_3R				0x22A
+#define MADERA_DCS_HP1L_CONTROL				0x232
+#define MADERA_DCS_HP1R_CONTROL				0x238
+#define MADERA_EDRE_HP_STEREO_CONTROL			0x27E
+#define MADERA_ACCESSORY_DETECT_MODE_1			0x293
+#define MADERA_HEADPHONE_DETECT_0			0x299
+#define MADERA_HEADPHONE_DETECT_1			0x29B
+#define MADERA_HEADPHONE_DETECT_2			0x29C
+#define MADERA_HEADPHONE_DETECT_3			0x29D
+#define MADERA_HEADPHONE_DETECT_4			0x29E
+#define MADERA_HEADPHONE_DETECT_5			0x29F
+#define MADERA_MIC_DETECT_1_CONTROL_0			0x2A2
+#define MADERA_MIC_DETECT_1_CONTROL_1			0x2A3
+#define MADERA_MIC_DETECT_1_CONTROL_2			0x2A4
+#define MADERA_MIC_DETECT_1_CONTROL_3			0x2A5
+#define MADERA_MIC_DETECT_1_LEVEL_1			0x2A6
+#define MADERA_MIC_DETECT_1_LEVEL_2			0x2A7
+#define MADERA_MIC_DETECT_1_LEVEL_3			0x2A8
+#define MADERA_MIC_DETECT_1_LEVEL_4			0x2A9
+#define MADERA_MIC_DETECT_1_CONTROL_4			0x2AB
+#define MADERA_MIC_DETECT_2_CONTROL_0			0x2B2
+#define MADERA_MIC_DETECT_2_CONTROL_1			0x2B3
+#define MADERA_MIC_DETECT_2_CONTROL_2			0x2B4
+#define MADERA_MIC_DETECT_2_CONTROL_3			0x2B5
+#define MADERA_MIC_DETECT_2_LEVEL_1			0x2B6
+#define MADERA_MIC_DETECT_2_LEVEL_2			0x2B7
+#define MADERA_MIC_DETECT_2_LEVEL_3			0x2B8
+#define MADERA_MIC_DETECT_2_LEVEL_4			0x2B9
+#define MADERA_MIC_DETECT_2_CONTROL_4			0x2BB
+#define MADERA_MICD_CLAMP_CONTROL			0x2C6
+#define MADERA_GP_SWITCH_1				0x2C8
+#define MADERA_JACK_DETECT_ANALOGUE			0x2D3
+#define MADERA_INPUT_ENABLES				0x300
+#define MADERA_INPUT_ENABLES_STATUS			0x301
+#define MADERA_INPUT_RATE				0x308
+#define MADERA_INPUT_VOLUME_RAMP			0x309
+#define MADERA_HPF_CONTROL				0x30C
+#define MADERA_IN1L_CONTROL				0x310
+#define MADERA_ADC_DIGITAL_VOLUME_1L			0x311
+#define MADERA_DMIC1L_CONTROL				0x312
+#define MADERA_IN1L_RATE_CONTROL			0x313
+#define MADERA_IN1R_CONTROL				0x314
+#define MADERA_ADC_DIGITAL_VOLUME_1R			0x315
+#define MADERA_DMIC1R_CONTROL				0x316
+#define MADERA_IN1R_RATE_CONTROL			0x317
+#define MADERA_IN2L_CONTROL				0x318
+#define MADERA_ADC_DIGITAL_VOLUME_2L			0x319
+#define MADERA_DMIC2L_CONTROL				0x31A
+#define MADERA_IN2L_RATE_CONTROL			0x31B
+#define MADERA_IN2R_CONTROL				0x31C
+#define MADERA_ADC_DIGITAL_VOLUME_2R			0x31D
+#define MADERA_DMIC2R_CONTROL				0x31E
+#define MADERA_IN2R_RATE_CONTROL			0x31F
+#define MADERA_IN3L_CONTROL				0x320
+#define MADERA_ADC_DIGITAL_VOLUME_3L			0x321
+#define MADERA_DMIC3L_CONTROL				0x322
+#define MADERA_IN3L_RATE_CONTROL			0x323
+#define MADERA_IN3R_CONTROL				0x324
+#define MADERA_ADC_DIGITAL_VOLUME_3R			0x325
+#define MADERA_DMIC3R_CONTROL				0x326
+#define MADERA_IN3R_RATE_CONTROL			0x327
+#define MADERA_IN4L_CONTROL				0x328
+#define MADERA_ADC_DIGITAL_VOLUME_4L			0x329
+#define MADERA_DMIC4L_CONTROL				0x32A
+#define MADERA_IN4L_RATE_CONTROL			0x32B
+#define MADERA_IN4R_CONTROL				0x32C
+#define MADERA_ADC_DIGITAL_VOLUME_4R			0x32D
+#define MADERA_DMIC4R_CONTROL				0x32E
+#define MADERA_IN4R_RATE_CONTROL			0x32F
+#define MADERA_IN5L_CONTROL				0x330
+#define MADERA_ADC_DIGITAL_VOLUME_5L			0x331
+#define MADERA_DMIC5L_CONTROL				0x332
+#define MADERA_IN5L_RATE_CONTROL			0x333
+#define MADERA_IN5R_CONTROL				0x334
+#define MADERA_ADC_DIGITAL_VOLUME_5R			0x335
+#define MADERA_DMIC5R_CONTROL				0x336
+#define MADERA_IN5R_RATE_CONTROL			0x337
+#define MADERA_IN6L_CONTROL				0x338
+#define MADERA_ADC_DIGITAL_VOLUME_6L			0x339
+#define MADERA_DMIC6L_CONTROL				0x33A
+#define MADERA_IN6R_CONTROL				0x33C
+#define MADERA_ADC_DIGITAL_VOLUME_6R			0x33D
+#define MADERA_DMIC6R_CONTROL				0x33E
+#define MADERA_OUTPUT_ENABLES_1				0x400
+#define MADERA_OUTPUT_STATUS_1				0x401
+#define MADERA_RAW_OUTPUT_STATUS_1			0x406
+#define MADERA_OUTPUT_RATE_1				0x408
+#define MADERA_OUTPUT_VOLUME_RAMP			0x409
+#define MADERA_OUTPUT_PATH_CONFIG_1L			0x410
+#define MADERA_DAC_DIGITAL_VOLUME_1L			0x411
+#define MADERA_OUTPUT_PATH_CONFIG_1			0x412
+#define MADERA_NOISE_GATE_SELECT_1L			0x413
+#define MADERA_OUTPUT_PATH_CONFIG_1R			0x414
+#define MADERA_DAC_DIGITAL_VOLUME_1R			0x415
+#define MADERA_NOISE_GATE_SELECT_1R			0x417
+#define MADERA_OUTPUT_PATH_CONFIG_2L			0x418
+#define MADERA_DAC_DIGITAL_VOLUME_2L			0x419
+#define MADERA_OUTPUT_PATH_CONFIG_2			0x41A
+#define MADERA_NOISE_GATE_SELECT_2L			0x41B
+#define MADERA_OUTPUT_PATH_CONFIG_2R			0x41C
+#define MADERA_DAC_DIGITAL_VOLUME_2R			0x41D
+#define MADERA_NOISE_GATE_SELECT_2R			0x41F
+#define MADERA_OUTPUT_PATH_CONFIG_3L			0x420
+#define MADERA_DAC_DIGITAL_VOLUME_3L			0x421
+#define MADERA_NOISE_GATE_SELECT_3L			0x423
+#define MADERA_OUTPUT_PATH_CONFIG_3R			0x424
+#define MADERA_DAC_DIGITAL_VOLUME_3R			0x425
+#define MADERA_NOISE_GATE_SELECT_3R			0x427
+#define MADERA_OUTPUT_PATH_CONFIG_4L			0x428
+#define MADERA_DAC_DIGITAL_VOLUME_4L			0x429
+#define MADERA_NOISE_GATE_SELECT_4L			0x42B
+#define MADERA_OUTPUT_PATH_CONFIG_4R			0x42C
+#define MADERA_DAC_DIGITAL_VOLUME_4R			0x42D
+#define MADERA_NOISE_GATE_SELECT_4R			0x42F
+#define MADERA_OUTPUT_PATH_CONFIG_5L			0x430
+#define MADERA_DAC_DIGITAL_VOLUME_5L			0x431
+#define MADERA_NOISE_GATE_SELECT_5L			0x433
+#define MADERA_OUTPUT_PATH_CONFIG_5R			0x434
+#define MADERA_DAC_DIGITAL_VOLUME_5R			0x435
+#define MADERA_NOISE_GATE_SELECT_5R			0x437
+#define MADERA_OUTPUT_PATH_CONFIG_6L			0x438
+#define MADERA_DAC_DIGITAL_VOLUME_6L			0x439
+#define MADERA_NOISE_GATE_SELECT_6L			0x43B
+#define MADERA_OUTPUT_PATH_CONFIG_6R			0x43C
+#define MADERA_DAC_DIGITAL_VOLUME_6R			0x43D
+#define MADERA_NOISE_GATE_SELECT_6R			0x43F
+#define MADERA_DRE_ENABLE				0x440
+#define MADERA_EDRE_ENABLE				0x448
+#define MADERA_EDRE_MANUAL				0x44A
+#define MADERA_DAC_AEC_CONTROL_1			0x450
+#define MADERA_DAC_AEC_CONTROL_2			0x451
+#define MADERA_NOISE_GATE_CONTROL			0x458
+#define MADERA_PDM_SPK1_CTRL_1				0x490
+#define MADERA_PDM_SPK1_CTRL_2				0x491
+#define MADERA_PDM_SPK2_CTRL_1				0x492
+#define MADERA_PDM_SPK2_CTRL_2				0x493
+#define MADERA_HP1_SHORT_CIRCUIT_CTRL			0x4A0
+#define MADERA_HP2_SHORT_CIRCUIT_CTRL			0x4A1
+#define MADERA_HP3_SHORT_CIRCUIT_CTRL			0x4A2
+#define MADERA_HP_TEST_CTRL_1				0x4A4
+#define MADERA_HP_TEST_CTRL_5				0x4A8
+#define MADERA_HP_TEST_CTRL_6				0x4A9
+#define MADERA_AIF1_BCLK_CTRL				0x500
+#define MADERA_AIF1_TX_PIN_CTRL				0x501
+#define MADERA_AIF1_RX_PIN_CTRL				0x502
+#define MADERA_AIF1_RATE_CTRL				0x503
+#define MADERA_AIF1_FORMAT				0x504
+#define MADERA_AIF1_RX_BCLK_RATE			0x506
+#define MADERA_AIF1_FRAME_CTRL_1			0x507
+#define MADERA_AIF1_FRAME_CTRL_2			0x508
+#define MADERA_AIF1_FRAME_CTRL_3			0x509
+#define MADERA_AIF1_FRAME_CTRL_4			0x50A
+#define MADERA_AIF1_FRAME_CTRL_5			0x50B
+#define MADERA_AIF1_FRAME_CTRL_6			0x50C
+#define MADERA_AIF1_FRAME_CTRL_7			0x50D
+#define MADERA_AIF1_FRAME_CTRL_8			0x50E
+#define MADERA_AIF1_FRAME_CTRL_9			0x50F
+#define MADERA_AIF1_FRAME_CTRL_10			0x510
+#define MADERA_AIF1_FRAME_CTRL_11			0x511
+#define MADERA_AIF1_FRAME_CTRL_12			0x512
+#define MADERA_AIF1_FRAME_CTRL_13			0x513
+#define MADERA_AIF1_FRAME_CTRL_14			0x514
+#define MADERA_AIF1_FRAME_CTRL_15			0x515
+#define MADERA_AIF1_FRAME_CTRL_16			0x516
+#define MADERA_AIF1_FRAME_CTRL_17			0x517
+#define MADERA_AIF1_FRAME_CTRL_18			0x518
+#define MADERA_AIF1_TX_ENABLES				0x519
+#define MADERA_AIF1_RX_ENABLES				0x51A
+#define MADERA_AIF1_FORCE_WRITE				0x51B
+#define MADERA_AIF2_BCLK_CTRL				0x540
+#define MADERA_AIF2_TX_PIN_CTRL				0x541
+#define MADERA_AIF2_RX_PIN_CTRL				0x542
+#define MADERA_AIF2_RATE_CTRL				0x543
+#define MADERA_AIF2_FORMAT				0x544
+#define MADERA_AIF2_RX_BCLK_RATE			0x546
+#define MADERA_AIF2_FRAME_CTRL_1			0x547
+#define MADERA_AIF2_FRAME_CTRL_2			0x548
+#define MADERA_AIF2_FRAME_CTRL_3			0x549
+#define MADERA_AIF2_FRAME_CTRL_4			0x54A
+#define MADERA_AIF2_FRAME_CTRL_5			0x54B
+#define MADERA_AIF2_FRAME_CTRL_6			0x54C
+#define MADERA_AIF2_FRAME_CTRL_7			0x54D
+#define MADERA_AIF2_FRAME_CTRL_8			0x54E
+#define MADERA_AIF2_FRAME_CTRL_9			0x54F
+#define MADERA_AIF2_FRAME_CTRL_10			0x550
+#define MADERA_AIF2_FRAME_CTRL_11			0x551
+#define MADERA_AIF2_FRAME_CTRL_12			0x552
+#define MADERA_AIF2_FRAME_CTRL_13			0x553
+#define MADERA_AIF2_FRAME_CTRL_14			0x554
+#define MADERA_AIF2_FRAME_CTRL_15			0x555
+#define MADERA_AIF2_FRAME_CTRL_16			0x556
+#define MADERA_AIF2_FRAME_CTRL_17			0x557
+#define MADERA_AIF2_FRAME_CTRL_18			0x558
+#define MADERA_AIF2_TX_ENABLES				0x559
+#define MADERA_AIF2_RX_ENABLES				0x55A
+#define MADERA_AIF2_FORCE_WRITE				0x55B
+#define MADERA_AIF3_BCLK_CTRL				0x580
+#define MADERA_AIF3_TX_PIN_CTRL				0x581
+#define MADERA_AIF3_RX_PIN_CTRL				0x582
+#define MADERA_AIF3_RATE_CTRL				0x583
+#define MADERA_AIF3_FORMAT				0x584
+#define MADERA_AIF3_RX_BCLK_RATE			0x586
+#define MADERA_AIF3_FRAME_CTRL_1			0x587
+#define MADERA_AIF3_FRAME_CTRL_2			0x588
+#define MADERA_AIF3_FRAME_CTRL_3			0x589
+#define MADERA_AIF3_FRAME_CTRL_4			0x58A
+#define MADERA_AIF3_FRAME_CTRL_11			0x591
+#define MADERA_AIF3_FRAME_CTRL_12			0x592
+#define MADERA_AIF3_TX_ENABLES				0x599
+#define MADERA_AIF3_RX_ENABLES				0x59A
+#define MADERA_AIF3_FORCE_WRITE				0x59B
+#define MADERA_AIF4_BCLK_CTRL				0x5A0
+#define MADERA_AIF4_TX_PIN_CTRL				0x5A1
+#define MADERA_AIF4_RX_PIN_CTRL				0x5A2
+#define MADERA_AIF4_RATE_CTRL				0x5A3
+#define MADERA_AIF4_FORMAT				0x5A4
+#define MADERA_AIF4_RX_BCLK_RATE			0x5A6
+#define MADERA_AIF4_FRAME_CTRL_1			0x5A7
+#define MADERA_AIF4_FRAME_CTRL_2			0x5A8
+#define MADERA_AIF4_FRAME_CTRL_3			0x5A9
+#define MADERA_AIF4_FRAME_CTRL_4			0x5AA
+#define MADERA_AIF4_FRAME_CTRL_11			0x5B1
+#define MADERA_AIF4_FRAME_CTRL_12			0x5B2
+#define MADERA_AIF4_TX_ENABLES				0x5B9
+#define MADERA_AIF4_RX_ENABLES				0x5BA
+#define MADERA_AIF4_FORCE_WRITE				0x5BB
+#define MADERA_SPD1_TX_CONTROL				0x5C2
+#define MADERA_SPD1_TX_CHANNEL_STATUS_1			0x5C3
+#define MADERA_SPD1_TX_CHANNEL_STATUS_2			0x5C4
+#define MADERA_SPD1_TX_CHANNEL_STATUS_3			0x5C5
+#define MADERA_SLIMBUS_FRAMER_REF_GEAR			0x5E3
+#define MADERA_SLIMBUS_RATES_1				0x5E5
+#define MADERA_SLIMBUS_RATES_2				0x5E6
+#define MADERA_SLIMBUS_RATES_3				0x5E7
+#define MADERA_SLIMBUS_RATES_4				0x5E8
+#define MADERA_SLIMBUS_RATES_5				0x5E9
+#define MADERA_SLIMBUS_RATES_6				0x5EA
+#define MADERA_SLIMBUS_RATES_7				0x5EB
+#define MADERA_SLIMBUS_RATES_8				0x5EC
+#define MADERA_SLIMBUS_RX_CHANNEL_ENABLE		0x5F5
+#define MADERA_SLIMBUS_TX_CHANNEL_ENABLE		0x5F6
+#define MADERA_SLIMBUS_RX_PORT_STATUS			0x5F7
+#define MADERA_SLIMBUS_TX_PORT_STATUS			0x5F8
+#define MADERA_PWM1MIX_INPUT_1_SOURCE			0x640
+#define MADERA_PWM1MIX_INPUT_1_VOLUME			0x641
+#define MADERA_PWM1MIX_INPUT_2_SOURCE			0x642
+#define MADERA_PWM1MIX_INPUT_2_VOLUME			0x643
+#define MADERA_PWM1MIX_INPUT_3_SOURCE			0x644
+#define MADERA_PWM1MIX_INPUT_3_VOLUME			0x645
+#define MADERA_PWM1MIX_INPUT_4_SOURCE			0x646
+#define MADERA_PWM1MIX_INPUT_4_VOLUME			0x647
+#define MADERA_PWM2MIX_INPUT_1_SOURCE			0x648
+#define MADERA_PWM2MIX_INPUT_1_VOLUME			0x649
+#define MADERA_PWM2MIX_INPUT_2_SOURCE			0x64A
+#define MADERA_PWM2MIX_INPUT_2_VOLUME			0x64B
+#define MADERA_PWM2MIX_INPUT_3_SOURCE			0x64C
+#define MADERA_PWM2MIX_INPUT_3_VOLUME			0x64D
+#define MADERA_PWM2MIX_INPUT_4_SOURCE			0x64E
+#define MADERA_PWM2MIX_INPUT_4_VOLUME			0x64F
+#define MADERA_OUT1LMIX_INPUT_1_SOURCE			0x680
+#define MADERA_OUT1LMIX_INPUT_1_VOLUME			0x681
+#define MADERA_OUT1LMIX_INPUT_2_SOURCE			0x682
+#define MADERA_OUT1LMIX_INPUT_2_VOLUME			0x683
+#define MADERA_OUT1LMIX_INPUT_3_SOURCE			0x684
+#define MADERA_OUT1LMIX_INPUT_3_VOLUME			0x685
+#define MADERA_OUT1LMIX_INPUT_4_SOURCE			0x686
+#define MADERA_OUT1LMIX_INPUT_4_VOLUME			0x687
+#define MADERA_OUT1RMIX_INPUT_1_SOURCE			0x688
+#define MADERA_OUT1RMIX_INPUT_1_VOLUME			0x689
+#define MADERA_OUT1RMIX_INPUT_2_SOURCE			0x68A
+#define MADERA_OUT1RMIX_INPUT_2_VOLUME			0x68B
+#define MADERA_OUT1RMIX_INPUT_3_SOURCE			0x68C
+#define MADERA_OUT1RMIX_INPUT_3_VOLUME			0x68D
+#define MADERA_OUT1RMIX_INPUT_4_SOURCE			0x68E
+#define MADERA_OUT1RMIX_INPUT_4_VOLUME			0x68F
+#define MADERA_OUT2LMIX_INPUT_1_SOURCE			0x690
+#define MADERA_OUT2LMIX_INPUT_1_VOLUME			0x691
+#define MADERA_OUT2LMIX_INPUT_2_SOURCE			0x692
+#define MADERA_OUT2LMIX_INPUT_2_VOLUME			0x693
+#define MADERA_OUT2LMIX_INPUT_3_SOURCE			0x694
+#define MADERA_OUT2LMIX_INPUT_3_VOLUME			0x695
+#define MADERA_OUT2LMIX_INPUT_4_SOURCE			0x696
+#define MADERA_OUT2LMIX_INPUT_4_VOLUME			0x697
+#define MADERA_OUT2RMIX_INPUT_1_SOURCE			0x698
+#define MADERA_OUT2RMIX_INPUT_1_VOLUME			0x699
+#define MADERA_OUT2RMIX_INPUT_2_SOURCE			0x69A
+#define MADERA_OUT2RMIX_INPUT_2_VOLUME			0x69B
+#define MADERA_OUT2RMIX_INPUT_3_SOURCE			0x69C
+#define MADERA_OUT2RMIX_INPUT_3_VOLUME			0x69D
+#define MADERA_OUT2RMIX_INPUT_4_SOURCE			0x69E
+#define MADERA_OUT2RMIX_INPUT_4_VOLUME			0x69F
+#define MADERA_OUT3LMIX_INPUT_1_SOURCE			0x6A0
+#define MADERA_OUT3LMIX_INPUT_1_VOLUME			0x6A1
+#define MADERA_OUT3LMIX_INPUT_2_SOURCE			0x6A2
+#define MADERA_OUT3LMIX_INPUT_2_VOLUME			0x6A3
+#define MADERA_OUT3LMIX_INPUT_3_SOURCE			0x6A4
+#define MADERA_OUT3LMIX_INPUT_3_VOLUME			0x6A5
+#define MADERA_OUT3LMIX_INPUT_4_SOURCE			0x6A6
+#define MADERA_OUT3LMIX_INPUT_4_VOLUME			0x6A7
+#define MADERA_OUT3RMIX_INPUT_1_SOURCE			0x6A8
+#define MADERA_OUT3RMIX_INPUT_1_VOLUME			0x6A9
+#define MADERA_OUT3RMIX_INPUT_2_SOURCE			0x6AA
+#define MADERA_OUT3RMIX_INPUT_2_VOLUME			0x6AB
+#define MADERA_OUT3RMIX_INPUT_3_SOURCE			0x6AC
+#define MADERA_OUT3RMIX_INPUT_3_VOLUME			0x6AD
+#define MADERA_OUT3RMIX_INPUT_4_SOURCE			0x6AE
+#define MADERA_OUT3RMIX_INPUT_4_VOLUME			0x6AF
+#define MADERA_OUT4LMIX_INPUT_1_SOURCE			0x6B0
+#define MADERA_OUT4LMIX_INPUT_1_VOLUME			0x6B1
+#define MADERA_OUT4LMIX_INPUT_2_SOURCE			0x6B2
+#define MADERA_OUT4LMIX_INPUT_2_VOLUME			0x6B3
+#define MADERA_OUT4LMIX_INPUT_3_SOURCE			0x6B4
+#define MADERA_OUT4LMIX_INPUT_3_VOLUME			0x6B5
+#define MADERA_OUT4LMIX_INPUT_4_SOURCE			0x6B6
+#define MADERA_OUT4LMIX_INPUT_4_VOLUME			0x6B7
+#define MADERA_OUT4RMIX_INPUT_1_SOURCE			0x6B8
+#define MADERA_OUT4RMIX_INPUT_1_VOLUME			0x6B9
+#define MADERA_OUT4RMIX_INPUT_2_SOURCE			0x6BA
+#define MADERA_OUT4RMIX_INPUT_2_VOLUME			0x6BB
+#define MADERA_OUT4RMIX_INPUT_3_SOURCE			0x6BC
+#define MADERA_OUT4RMIX_INPUT_3_VOLUME			0x6BD
+#define MADERA_OUT4RMIX_INPUT_4_SOURCE			0x6BE
+#define MADERA_OUT4RMIX_INPUT_4_VOLUME			0x6BF
+#define MADERA_OUT5LMIX_INPUT_1_SOURCE			0x6C0
+#define MADERA_OUT5LMIX_INPUT_1_VOLUME			0x6C1
+#define MADERA_OUT5LMIX_INPUT_2_SOURCE			0x6C2
+#define MADERA_OUT5LMIX_INPUT_2_VOLUME			0x6C3
+#define MADERA_OUT5LMIX_INPUT_3_SOURCE			0x6C4
+#define MADERA_OUT5LMIX_INPUT_3_VOLUME			0x6C5
+#define MADERA_OUT5LMIX_INPUT_4_SOURCE			0x6C6
+#define MADERA_OUT5LMIX_INPUT_4_VOLUME			0x6C7
+#define MADERA_OUT5RMIX_INPUT_1_SOURCE			0x6C8
+#define MADERA_OUT5RMIX_INPUT_1_VOLUME			0x6C9
+#define MADERA_OUT5RMIX_INPUT_2_SOURCE			0x6CA
+#define MADERA_OUT5RMIX_INPUT_2_VOLUME			0x6CB
+#define MADERA_OUT5RMIX_INPUT_3_SOURCE			0x6CC
+#define MADERA_OUT5RMIX_INPUT_3_VOLUME			0x6CD
+#define MADERA_OUT5RMIX_INPUT_4_SOURCE			0x6CE
+#define MADERA_OUT5RMIX_INPUT_4_VOLUME			0x6CF
+#define MADERA_OUT6LMIX_INPUT_1_SOURCE			0x6D0
+#define MADERA_OUT6LMIX_INPUT_1_VOLUME			0x6D1
+#define MADERA_OUT6LMIX_INPUT_2_SOURCE			0x6D2
+#define MADERA_OUT6LMIX_INPUT_2_VOLUME			0x6D3
+#define MADERA_OUT6LMIX_INPUT_3_SOURCE			0x6D4
+#define MADERA_OUT6LMIX_INPUT_3_VOLUME			0x6D5
+#define MADERA_OUT6LMIX_INPUT_4_SOURCE			0x6D6
+#define MADERA_OUT6LMIX_INPUT_4_VOLUME			0x6D7
+#define MADERA_OUT6RMIX_INPUT_1_SOURCE			0x6D8
+#define MADERA_OUT6RMIX_INPUT_1_VOLUME			0x6D9
+#define MADERA_OUT6RMIX_INPUT_2_SOURCE			0x6DA
+#define MADERA_OUT6RMIX_INPUT_2_VOLUME			0x6DB
+#define MADERA_OUT6RMIX_INPUT_3_SOURCE			0x6DC
+#define MADERA_OUT6RMIX_INPUT_3_VOLUME			0x6DD
+#define MADERA_OUT6RMIX_INPUT_4_SOURCE			0x6DE
+#define MADERA_OUT6RMIX_INPUT_4_VOLUME			0x6DF
+#define MADERA_AIF1TX1MIX_INPUT_1_SOURCE		0x700
+#define MADERA_AIF1TX1MIX_INPUT_1_VOLUME		0x701
+#define MADERA_AIF1TX1MIX_INPUT_2_SOURCE		0x702
+#define MADERA_AIF1TX1MIX_INPUT_2_VOLUME		0x703
+#define MADERA_AIF1TX1MIX_INPUT_3_SOURCE		0x704
+#define MADERA_AIF1TX1MIX_INPUT_3_VOLUME		0x705
+#define MADERA_AIF1TX1MIX_INPUT_4_SOURCE		0x706
+#define MADERA_AIF1TX1MIX_INPUT_4_VOLUME		0x707
+#define MADERA_AIF1TX2MIX_INPUT_1_SOURCE		0x708
+#define MADERA_AIF1TX2MIX_INPUT_1_VOLUME		0x709
+#define MADERA_AIF1TX2MIX_INPUT_2_SOURCE		0x70A
+#define MADERA_AIF1TX2MIX_INPUT_2_VOLUME		0x70B
+#define MADERA_AIF1TX2MIX_INPUT_3_SOURCE		0x70C
+#define MADERA_AIF1TX2MIX_INPUT_3_VOLUME		0x70D
+#define MADERA_AIF1TX2MIX_INPUT_4_SOURCE		0x70E
+#define MADERA_AIF1TX2MIX_INPUT_4_VOLUME		0x70F
+#define MADERA_AIF1TX3MIX_INPUT_1_SOURCE		0x710
+#define MADERA_AIF1TX3MIX_INPUT_1_VOLUME		0x711
+#define MADERA_AIF1TX3MIX_INPUT_2_SOURCE		0x712
+#define MADERA_AIF1TX3MIX_INPUT_2_VOLUME		0x713
+#define MADERA_AIF1TX3MIX_INPUT_3_SOURCE		0x714
+#define MADERA_AIF1TX3MIX_INPUT_3_VOLUME		0x715
+#define MADERA_AIF1TX3MIX_INPUT_4_SOURCE		0x716
+#define MADERA_AIF1TX3MIX_INPUT_4_VOLUME		0x717
+#define MADERA_AIF1TX4MIX_INPUT_1_SOURCE		0x718
+#define MADERA_AIF1TX4MIX_INPUT_1_VOLUME		0x719
+#define MADERA_AIF1TX4MIX_INPUT_2_SOURCE		0x71A
+#define MADERA_AIF1TX4MIX_INPUT_2_VOLUME		0x71B
+#define MADERA_AIF1TX4MIX_INPUT_3_SOURCE		0x71C
+#define MADERA_AIF1TX4MIX_INPUT_3_VOLUME		0x71D
+#define MADERA_AIF1TX4MIX_INPUT_4_SOURCE		0x71E
+#define MADERA_AIF1TX4MIX_INPUT_4_VOLUME		0x71F
+#define MADERA_AIF1TX5MIX_INPUT_1_SOURCE		0x720
+#define MADERA_AIF1TX5MIX_INPUT_1_VOLUME		0x721
+#define MADERA_AIF1TX5MIX_INPUT_2_SOURCE		0x722
+#define MADERA_AIF1TX5MIX_INPUT_2_VOLUME		0x723
+#define MADERA_AIF1TX5MIX_INPUT_3_SOURCE		0x724
+#define MADERA_AIF1TX5MIX_INPUT_3_VOLUME		0x725
+#define MADERA_AIF1TX5MIX_INPUT_4_SOURCE		0x726
+#define MADERA_AIF1TX5MIX_INPUT_4_VOLUME		0x727
+#define MADERA_AIF1TX6MIX_INPUT_1_SOURCE		0x728
+#define MADERA_AIF1TX6MIX_INPUT_1_VOLUME		0x729
+#define MADERA_AIF1TX6MIX_INPUT_2_SOURCE		0x72A
+#define MADERA_AIF1TX6MIX_INPUT_2_VOLUME		0x72B
+#define MADERA_AIF1TX6MIX_INPUT_3_SOURCE		0x72C
+#define MADERA_AIF1TX6MIX_INPUT_3_VOLUME		0x72D
+#define MADERA_AIF1TX6MIX_INPUT_4_SOURCE		0x72E
+#define MADERA_AIF1TX6MIX_INPUT_4_VOLUME		0x72F
+#define MADERA_AIF1TX7MIX_INPUT_1_SOURCE		0x730
+#define MADERA_AIF1TX7MIX_INPUT_1_VOLUME		0x731
+#define MADERA_AIF1TX7MIX_INPUT_2_SOURCE		0x732
+#define MADERA_AIF1TX7MIX_INPUT_2_VOLUME		0x733
+#define MADERA_AIF1TX7MIX_INPUT_3_SOURCE		0x734
+#define MADERA_AIF1TX7MIX_INPUT_3_VOLUME		0x735
+#define MADERA_AIF1TX7MIX_INPUT_4_SOURCE		0x736
+#define MADERA_AIF1TX7MIX_INPUT_4_VOLUME		0x737
+#define MADERA_AIF1TX8MIX_INPUT_1_SOURCE		0x738
+#define MADERA_AIF1TX8MIX_INPUT_1_VOLUME		0x739
+#define MADERA_AIF1TX8MIX_INPUT_2_SOURCE		0x73A
+#define MADERA_AIF1TX8MIX_INPUT_2_VOLUME		0x73B
+#define MADERA_AIF1TX8MIX_INPUT_3_SOURCE		0x73C
+#define MADERA_AIF1TX8MIX_INPUT_3_VOLUME		0x73D
+#define MADERA_AIF1TX8MIX_INPUT_4_SOURCE		0x73E
+#define MADERA_AIF1TX8MIX_INPUT_4_VOLUME		0x73F
+#define MADERA_AIF2TX1MIX_INPUT_1_SOURCE		0x740
+#define MADERA_AIF2TX1MIX_INPUT_1_VOLUME		0x741
+#define MADERA_AIF2TX1MIX_INPUT_2_SOURCE		0x742
+#define MADERA_AIF2TX1MIX_INPUT_2_VOLUME		0x743
+#define MADERA_AIF2TX1MIX_INPUT_3_SOURCE		0x744
+#define MADERA_AIF2TX1MIX_INPUT_3_VOLUME		0x745
+#define MADERA_AIF2TX1MIX_INPUT_4_SOURCE		0x746
+#define MADERA_AIF2TX1MIX_INPUT_4_VOLUME		0x747
+#define MADERA_AIF2TX2MIX_INPUT_1_SOURCE		0x748
+#define MADERA_AIF2TX2MIX_INPUT_1_VOLUME		0x749
+#define MADERA_AIF2TX2MIX_INPUT_2_SOURCE		0x74A
+#define MADERA_AIF2TX2MIX_INPUT_2_VOLUME		0x74B
+#define MADERA_AIF2TX2MIX_INPUT_3_SOURCE		0x74C
+#define MADERA_AIF2TX2MIX_INPUT_3_VOLUME		0x74D
+#define MADERA_AIF2TX2MIX_INPUT_4_SOURCE		0x74E
+#define MADERA_AIF2TX2MIX_INPUT_4_VOLUME		0x74F
+#define MADERA_AIF2TX3MIX_INPUT_1_SOURCE		0x750
+#define MADERA_AIF2TX3MIX_INPUT_1_VOLUME		0x751
+#define MADERA_AIF2TX3MIX_INPUT_2_SOURCE		0x752
+#define MADERA_AIF2TX3MIX_INPUT_2_VOLUME		0x753
+#define MADERA_AIF2TX3MIX_INPUT_3_SOURCE		0x754
+#define MADERA_AIF2TX3MIX_INPUT_3_VOLUME		0x755
+#define MADERA_AIF2TX3MIX_INPUT_4_SOURCE		0x756
+#define MADERA_AIF2TX3MIX_INPUT_4_VOLUME		0x757
+#define MADERA_AIF2TX4MIX_INPUT_1_SOURCE		0x758
+#define MADERA_AIF2TX4MIX_INPUT_1_VOLUME		0x759
+#define MADERA_AIF2TX4MIX_INPUT_2_SOURCE		0x75A
+#define MADERA_AIF2TX4MIX_INPUT_2_VOLUME		0x75B
+#define MADERA_AIF2TX4MIX_INPUT_3_SOURCE		0x75C
+#define MADERA_AIF2TX4MIX_INPUT_3_VOLUME		0x75D
+#define MADERA_AIF2TX4MIX_INPUT_4_SOURCE		0x75E
+#define MADERA_AIF2TX4MIX_INPUT_4_VOLUME		0x75F
+#define MADERA_AIF2TX5MIX_INPUT_1_SOURCE		0x760
+#define MADERA_AIF2TX5MIX_INPUT_1_VOLUME		0x761
+#define MADERA_AIF2TX5MIX_INPUT_2_SOURCE		0x762
+#define MADERA_AIF2TX5MIX_INPUT_2_VOLUME		0x763
+#define MADERA_AIF2TX5MIX_INPUT_3_SOURCE		0x764
+#define MADERA_AIF2TX5MIX_INPUT_3_VOLUME		0x765
+#define MADERA_AIF2TX5MIX_INPUT_4_SOURCE		0x766
+#define MADERA_AIF2TX5MIX_INPUT_4_VOLUME		0x767
+#define MADERA_AIF2TX6MIX_INPUT_1_SOURCE		0x768
+#define MADERA_AIF2TX6MIX_INPUT_1_VOLUME		0x769
+#define MADERA_AIF2TX6MIX_INPUT_2_SOURCE		0x76A
+#define MADERA_AIF2TX6MIX_INPUT_2_VOLUME		0x76B
+#define MADERA_AIF2TX6MIX_INPUT_3_SOURCE		0x76C
+#define MADERA_AIF2TX6MIX_INPUT_3_VOLUME		0x76D
+#define MADERA_AIF2TX6MIX_INPUT_4_SOURCE		0x76E
+#define MADERA_AIF2TX6MIX_INPUT_4_VOLUME		0x76F
+#define MADERA_AIF2TX7MIX_INPUT_1_SOURCE		0x770
+#define MADERA_AIF2TX7MIX_INPUT_1_VOLUME		0x771
+#define MADERA_AIF2TX7MIX_INPUT_2_SOURCE		0x772
+#define MADERA_AIF2TX7MIX_INPUT_2_VOLUME		0x773
+#define MADERA_AIF2TX7MIX_INPUT_3_SOURCE		0x774
+#define MADERA_AIF2TX7MIX_INPUT_3_VOLUME		0x775
+#define MADERA_AIF2TX7MIX_INPUT_4_SOURCE		0x776
+#define MADERA_AIF2TX7MIX_INPUT_4_VOLUME		0x777
+#define MADERA_AIF2TX8MIX_INPUT_1_SOURCE		0x778
+#define MADERA_AIF2TX8MIX_INPUT_1_VOLUME		0x779
+#define MADERA_AIF2TX8MIX_INPUT_2_SOURCE		0x77A
+#define MADERA_AIF2TX8MIX_INPUT_2_VOLUME		0x77B
+#define MADERA_AIF2TX8MIX_INPUT_3_SOURCE		0x77C
+#define MADERA_AIF2TX8MIX_INPUT_3_VOLUME		0x77D
+#define MADERA_AIF2TX8MIX_INPUT_4_SOURCE		0x77E
+#define MADERA_AIF2TX8MIX_INPUT_4_VOLUME		0x77F
+#define MADERA_AIF3TX1MIX_INPUT_1_SOURCE		0x780
+#define MADERA_AIF3TX1MIX_INPUT_1_VOLUME		0x781
+#define MADERA_AIF3TX1MIX_INPUT_2_SOURCE		0x782
+#define MADERA_AIF3TX1MIX_INPUT_2_VOLUME		0x783
+#define MADERA_AIF3TX1MIX_INPUT_3_SOURCE		0x784
+#define MADERA_AIF3TX1MIX_INPUT_3_VOLUME		0x785
+#define MADERA_AIF3TX1MIX_INPUT_4_SOURCE		0x786
+#define MADERA_AIF3TX1MIX_INPUT_4_VOLUME		0x787
+#define MADERA_AIF3TX2MIX_INPUT_1_SOURCE		0x788
+#define MADERA_AIF3TX2MIX_INPUT_1_VOLUME		0x789
+#define MADERA_AIF3TX2MIX_INPUT_2_SOURCE		0x78A
+#define MADERA_AIF3TX2MIX_INPUT_2_VOLUME		0x78B
+#define MADERA_AIF3TX2MIX_INPUT_3_SOURCE		0x78C
+#define MADERA_AIF3TX2MIX_INPUT_3_VOLUME		0x78D
+#define MADERA_AIF3TX2MIX_INPUT_4_SOURCE		0x78E
+#define MADERA_AIF3TX2MIX_INPUT_4_VOLUME		0x78F
+#define MADERA_AIF4TX1MIX_INPUT_1_SOURCE		0x7A0
+#define MADERA_AIF4TX1MIX_INPUT_1_VOLUME		0x7A1
+#define MADERA_AIF4TX1MIX_INPUT_2_SOURCE		0x7A2
+#define MADERA_AIF4TX1MIX_INPUT_2_VOLUME		0x7A3
+#define MADERA_AIF4TX1MIX_INPUT_3_SOURCE		0x7A4
+#define MADERA_AIF4TX1MIX_INPUT_3_VOLUME		0x7A5
+#define MADERA_AIF4TX1MIX_INPUT_4_SOURCE		0x7A6
+#define MADERA_AIF4TX1MIX_INPUT_4_VOLUME		0x7A7
+#define MADERA_AIF4TX2MIX_INPUT_1_SOURCE		0x7A8
+#define MADERA_AIF4TX2MIX_INPUT_1_VOLUME		0x7A9
+#define MADERA_AIF4TX2MIX_INPUT_2_SOURCE		0x7AA
+#define MADERA_AIF4TX2MIX_INPUT_2_VOLUME		0x7AB
+#define MADERA_AIF4TX2MIX_INPUT_3_SOURCE		0x7AC
+#define MADERA_AIF4TX2MIX_INPUT_3_VOLUME		0x7AD
+#define MADERA_AIF4TX2MIX_INPUT_4_SOURCE		0x7AE
+#define MADERA_AIF4TX2MIX_INPUT_4_VOLUME		0x7AF
+#define MADERA_SLIMTX1MIX_INPUT_1_SOURCE		0x7C0
+#define MADERA_SLIMTX1MIX_INPUT_1_VOLUME		0x7C1
+#define MADERA_SLIMTX1MIX_INPUT_2_SOURCE		0x7C2
+#define MADERA_SLIMTX1MIX_INPUT_2_VOLUME		0x7C3
+#define MADERA_SLIMTX1MIX_INPUT_3_SOURCE		0x7C4
+#define MADERA_SLIMTX1MIX_INPUT_3_VOLUME		0x7C5
+#define MADERA_SLIMTX1MIX_INPUT_4_SOURCE		0x7C6
+#define MADERA_SLIMTX1MIX_INPUT_4_VOLUME		0x7C7
+#define MADERA_SLIMTX2MIX_INPUT_1_SOURCE		0x7C8
+#define MADERA_SLIMTX2MIX_INPUT_1_VOLUME		0x7C9
+#define MADERA_SLIMTX2MIX_INPUT_2_SOURCE		0x7CA
+#define MADERA_SLIMTX2MIX_INPUT_2_VOLUME		0x7CB
+#define MADERA_SLIMTX2MIX_INPUT_3_SOURCE		0x7CC
+#define MADERA_SLIMTX2MIX_INPUT_3_VOLUME		0x7CD
+#define MADERA_SLIMTX2MIX_INPUT_4_SOURCE		0x7CE
+#define MADERA_SLIMTX2MIX_INPUT_4_VOLUME		0x7CF
+#define MADERA_SLIMTX3MIX_INPUT_1_SOURCE		0x7D0
+#define MADERA_SLIMTX3MIX_INPUT_1_VOLUME		0x7D1
+#define MADERA_SLIMTX3MIX_INPUT_2_SOURCE		0x7D2
+#define MADERA_SLIMTX3MIX_INPUT_2_VOLUME		0x7D3
+#define MADERA_SLIMTX3MIX_INPUT_3_SOURCE		0x7D4
+#define MADERA_SLIMTX3MIX_INPUT_3_VOLUME		0x7D5
+#define MADERA_SLIMTX3MIX_INPUT_4_SOURCE		0x7D6
+#define MADERA_SLIMTX3MIX_INPUT_4_VOLUME		0x7D7
+#define MADERA_SLIMTX4MIX_INPUT_1_SOURCE		0x7D8
+#define MADERA_SLIMTX4MIX_INPUT_1_VOLUME		0x7D9
+#define MADERA_SLIMTX4MIX_INPUT_2_SOURCE		0x7DA
+#define MADERA_SLIMTX4MIX_INPUT_2_VOLUME		0x7DB
+#define MADERA_SLIMTX4MIX_INPUT_3_SOURCE		0x7DC
+#define MADERA_SLIMTX4MIX_INPUT_3_VOLUME		0x7DD
+#define MADERA_SLIMTX4MIX_INPUT_4_SOURCE		0x7DE
+#define MADERA_SLIMTX4MIX_INPUT_4_VOLUME		0x7DF
+#define MADERA_SLIMTX5MIX_INPUT_1_SOURCE		0x7E0
+#define MADERA_SLIMTX5MIX_INPUT_1_VOLUME		0x7E1
+#define MADERA_SLIMTX5MIX_INPUT_2_SOURCE		0x7E2
+#define MADERA_SLIMTX5MIX_INPUT_2_VOLUME		0x7E3
+#define MADERA_SLIMTX5MIX_INPUT_3_SOURCE		0x7E4
+#define MADERA_SLIMTX5MIX_INPUT_3_VOLUME		0x7E5
+#define MADERA_SLIMTX5MIX_INPUT_4_SOURCE		0x7E6
+#define MADERA_SLIMTX5MIX_INPUT_4_VOLUME		0x7E7
+#define MADERA_SLIMTX6MIX_INPUT_1_SOURCE		0x7E8
+#define MADERA_SLIMTX6MIX_INPUT_1_VOLUME		0x7E9
+#define MADERA_SLIMTX6MIX_INPUT_2_SOURCE		0x7EA
+#define MADERA_SLIMTX6MIX_INPUT_2_VOLUME		0x7EB
+#define MADERA_SLIMTX6MIX_INPUT_3_SOURCE		0x7EC
+#define MADERA_SLIMTX6MIX_INPUT_3_VOLUME		0x7ED
+#define MADERA_SLIMTX6MIX_INPUT_4_SOURCE		0x7EE
+#define MADERA_SLIMTX6MIX_INPUT_4_VOLUME		0x7EF
+#define MADERA_SLIMTX7MIX_INPUT_1_SOURCE		0x7F0
+#define MADERA_SLIMTX7MIX_INPUT_1_VOLUME		0x7F1
+#define MADERA_SLIMTX7MIX_INPUT_2_SOURCE		0x7F2
+#define MADERA_SLIMTX7MIX_INPUT_2_VOLUME		0x7F3
+#define MADERA_SLIMTX7MIX_INPUT_3_SOURCE		0x7F4
+#define MADERA_SLIMTX7MIX_INPUT_3_VOLUME		0x7F5
+#define MADERA_SLIMTX7MIX_INPUT_4_SOURCE		0x7F6
+#define MADERA_SLIMTX7MIX_INPUT_4_VOLUME		0x7F7
+#define MADERA_SLIMTX8MIX_INPUT_1_SOURCE		0x7F8
+#define MADERA_SLIMTX8MIX_INPUT_1_VOLUME		0x7F9
+#define MADERA_SLIMTX8MIX_INPUT_2_SOURCE		0x7FA
+#define MADERA_SLIMTX8MIX_INPUT_2_VOLUME		0x7FB
+#define MADERA_SLIMTX8MIX_INPUT_3_SOURCE		0x7FC
+#define MADERA_SLIMTX8MIX_INPUT_3_VOLUME		0x7FD
+#define MADERA_SLIMTX8MIX_INPUT_4_SOURCE		0x7FE
+#define MADERA_SLIMTX8MIX_INPUT_4_VOLUME		0x7FF
+#define MADERA_SPDIF1TX1MIX_INPUT_1_SOURCE		0x800
+#define MADERA_SPDIF1TX1MIX_INPUT_1_VOLUME		0x801
+#define MADERA_SPDIF1TX2MIX_INPUT_1_SOURCE		0x808
+#define MADERA_SPDIF1TX2MIX_INPUT_1_VOLUME		0x809
+#define MADERA_EQ1MIX_INPUT_1_SOURCE			0x880
+#define MADERA_EQ1MIX_INPUT_1_VOLUME			0x881
+#define MADERA_EQ1MIX_INPUT_2_SOURCE			0x882
+#define MADERA_EQ1MIX_INPUT_2_VOLUME			0x883
+#define MADERA_EQ1MIX_INPUT_3_SOURCE			0x884
+#define MADERA_EQ1MIX_INPUT_3_VOLUME			0x885
+#define MADERA_EQ1MIX_INPUT_4_SOURCE			0x886
+#define MADERA_EQ1MIX_INPUT_4_VOLUME			0x887
+#define MADERA_EQ2MIX_INPUT_1_SOURCE			0x888
+#define MADERA_EQ2MIX_INPUT_1_VOLUME			0x889
+#define MADERA_EQ2MIX_INPUT_2_SOURCE			0x88A
+#define MADERA_EQ2MIX_INPUT_2_VOLUME			0x88B
+#define MADERA_EQ2MIX_INPUT_3_SOURCE			0x88C
+#define MADERA_EQ2MIX_INPUT_3_VOLUME			0x88D
+#define MADERA_EQ2MIX_INPUT_4_SOURCE			0x88E
+#define MADERA_EQ2MIX_INPUT_4_VOLUME			0x88F
+#define MADERA_EQ3MIX_INPUT_1_SOURCE			0x890
+#define MADERA_EQ3MIX_INPUT_1_VOLUME			0x891
+#define MADERA_EQ3MIX_INPUT_2_SOURCE			0x892
+#define MADERA_EQ3MIX_INPUT_2_VOLUME			0x893
+#define MADERA_EQ3MIX_INPUT_3_SOURCE			0x894
+#define MADERA_EQ3MIX_INPUT_3_VOLUME			0x895
+#define MADERA_EQ3MIX_INPUT_4_SOURCE			0x896
+#define MADERA_EQ3MIX_INPUT_4_VOLUME			0x897
+#define MADERA_EQ4MIX_INPUT_1_SOURCE			0x898
+#define MADERA_EQ4MIX_INPUT_1_VOLUME			0x899
+#define MADERA_EQ4MIX_INPUT_2_SOURCE			0x89A
+#define MADERA_EQ4MIX_INPUT_2_VOLUME			0x89B
+#define MADERA_EQ4MIX_INPUT_3_SOURCE			0x89C
+#define MADERA_EQ4MIX_INPUT_3_VOLUME			0x89D
+#define MADERA_EQ4MIX_INPUT_4_SOURCE			0x89E
+#define MADERA_EQ4MIX_INPUT_4_VOLUME			0x89F
+#define MADERA_DRC1LMIX_INPUT_1_SOURCE			0x8C0
+#define MADERA_DRC1LMIX_INPUT_1_VOLUME			0x8C1
+#define MADERA_DRC1LMIX_INPUT_2_SOURCE			0x8C2
+#define MADERA_DRC1LMIX_INPUT_2_VOLUME			0x8C3
+#define MADERA_DRC1LMIX_INPUT_3_SOURCE			0x8C4
+#define MADERA_DRC1LMIX_INPUT_3_VOLUME			0x8C5
+#define MADERA_DRC1LMIX_INPUT_4_SOURCE			0x8C6
+#define MADERA_DRC1LMIX_INPUT_4_VOLUME			0x8C7
+#define MADERA_DRC1RMIX_INPUT_1_SOURCE			0x8C8
+#define MADERA_DRC1RMIX_INPUT_1_VOLUME			0x8C9
+#define MADERA_DRC1RMIX_INPUT_2_SOURCE			0x8CA
+#define MADERA_DRC1RMIX_INPUT_2_VOLUME			0x8CB
+#define MADERA_DRC1RMIX_INPUT_3_SOURCE			0x8CC
+#define MADERA_DRC1RMIX_INPUT_3_VOLUME			0x8CD
+#define MADERA_DRC1RMIX_INPUT_4_SOURCE			0x8CE
+#define MADERA_DRC1RMIX_INPUT_4_VOLUME			0x8CF
+#define MADERA_DRC2LMIX_INPUT_1_SOURCE			0x8D0
+#define MADERA_DRC2LMIX_INPUT_1_VOLUME			0x8D1
+#define MADERA_DRC2LMIX_INPUT_2_SOURCE			0x8D2
+#define MADERA_DRC2LMIX_INPUT_2_VOLUME			0x8D3
+#define MADERA_DRC2LMIX_INPUT_3_SOURCE			0x8D4
+#define MADERA_DRC2LMIX_INPUT_3_VOLUME			0x8D5
+#define MADERA_DRC2LMIX_INPUT_4_SOURCE			0x8D6
+#define MADERA_DRC2LMIX_INPUT_4_VOLUME			0x8D7
+#define MADERA_DRC2RMIX_INPUT_1_SOURCE			0x8D8
+#define MADERA_DRC2RMIX_INPUT_1_VOLUME			0x8D9
+#define MADERA_DRC2RMIX_INPUT_2_SOURCE			0x8DA
+#define MADERA_DRC2RMIX_INPUT_2_VOLUME			0x8DB
+#define MADERA_DRC2RMIX_INPUT_3_SOURCE			0x8DC
+#define MADERA_DRC2RMIX_INPUT_3_VOLUME			0x8DD
+#define MADERA_DRC2RMIX_INPUT_4_SOURCE			0x8DE
+#define MADERA_DRC2RMIX_INPUT_4_VOLUME			0x8DF
+#define MADERA_HPLP1MIX_INPUT_1_SOURCE			0x900
+#define MADERA_HPLP1MIX_INPUT_1_VOLUME			0x901
+#define MADERA_HPLP1MIX_INPUT_2_SOURCE			0x902
+#define MADERA_HPLP1MIX_INPUT_2_VOLUME			0x903
+#define MADERA_HPLP1MIX_INPUT_3_SOURCE			0x904
+#define MADERA_HPLP1MIX_INPUT_3_VOLUME			0x905
+#define MADERA_HPLP1MIX_INPUT_4_SOURCE			0x906
+#define MADERA_HPLP1MIX_INPUT_4_VOLUME			0x907
+#define MADERA_HPLP2MIX_INPUT_1_SOURCE			0x908
+#define MADERA_HPLP2MIX_INPUT_1_VOLUME			0x909
+#define MADERA_HPLP2MIX_INPUT_2_SOURCE			0x90A
+#define MADERA_HPLP2MIX_INPUT_2_VOLUME			0x90B
+#define MADERA_HPLP2MIX_INPUT_3_SOURCE			0x90C
+#define MADERA_HPLP2MIX_INPUT_3_VOLUME			0x90D
+#define MADERA_HPLP2MIX_INPUT_4_SOURCE			0x90E
+#define MADERA_HPLP2MIX_INPUT_4_VOLUME			0x90F
+#define MADERA_HPLP3MIX_INPUT_1_SOURCE			0x910
+#define MADERA_HPLP3MIX_INPUT_1_VOLUME			0x911
+#define MADERA_HPLP3MIX_INPUT_2_SOURCE			0x912
+#define MADERA_HPLP3MIX_INPUT_2_VOLUME			0x913
+#define MADERA_HPLP3MIX_INPUT_3_SOURCE			0x914
+#define MADERA_HPLP3MIX_INPUT_3_VOLUME			0x915
+#define MADERA_HPLP3MIX_INPUT_4_SOURCE			0x916
+#define MADERA_HPLP3MIX_INPUT_4_VOLUME			0x917
+#define MADERA_HPLP4MIX_INPUT_1_SOURCE			0x918
+#define MADERA_HPLP4MIX_INPUT_1_VOLUME			0x919
+#define MADERA_HPLP4MIX_INPUT_2_SOURCE			0x91A
+#define MADERA_HPLP4MIX_INPUT_2_VOLUME			0x91B
+#define MADERA_HPLP4MIX_INPUT_3_SOURCE			0x91C
+#define MADERA_HPLP4MIX_INPUT_3_VOLUME			0x91D
+#define MADERA_HPLP4MIX_INPUT_4_SOURCE			0x91E
+#define MADERA_HPLP4MIX_INPUT_4_VOLUME			0x91F
+#define MADERA_DSP1LMIX_INPUT_1_SOURCE			0x940
+#define MADERA_DSP1LMIX_INPUT_1_VOLUME			0x941
+#define MADERA_DSP1LMIX_INPUT_2_SOURCE			0x942
+#define MADERA_DSP1LMIX_INPUT_2_VOLUME			0x943
+#define MADERA_DSP1LMIX_INPUT_3_SOURCE			0x944
+#define MADERA_DSP1LMIX_INPUT_3_VOLUME			0x945
+#define MADERA_DSP1LMIX_INPUT_4_SOURCE			0x946
+#define MADERA_DSP1LMIX_INPUT_4_VOLUME			0x947
+#define MADERA_DSP1RMIX_INPUT_1_SOURCE			0x948
+#define MADERA_DSP1RMIX_INPUT_1_VOLUME			0x949
+#define MADERA_DSP1RMIX_INPUT_2_SOURCE			0x94A
+#define MADERA_DSP1RMIX_INPUT_2_VOLUME			0x94B
+#define MADERA_DSP1RMIX_INPUT_3_SOURCE			0x94C
+#define MADERA_DSP1RMIX_INPUT_3_VOLUME			0x94D
+#define MADERA_DSP1RMIX_INPUT_4_SOURCE			0x94E
+#define MADERA_DSP1RMIX_INPUT_4_VOLUME			0x94F
+#define MADERA_DSP1AUX1MIX_INPUT_1_SOURCE		0x950
+#define MADERA_DSP1AUX2MIX_INPUT_1_SOURCE		0x958
+#define MADERA_DSP1AUX3MIX_INPUT_1_SOURCE		0x960
+#define MADERA_DSP1AUX4MIX_INPUT_1_SOURCE		0x968
+#define MADERA_DSP1AUX5MIX_INPUT_1_SOURCE		0x970
+#define MADERA_DSP1AUX6MIX_INPUT_1_SOURCE		0x978
+#define MADERA_DSP2LMIX_INPUT_1_SOURCE			0x980
+#define MADERA_DSP2LMIX_INPUT_1_VOLUME			0x981
+#define MADERA_DSP2LMIX_INPUT_2_SOURCE			0x982
+#define MADERA_DSP2LMIX_INPUT_2_VOLUME			0x983
+#define MADERA_DSP2LMIX_INPUT_3_SOURCE			0x984
+#define MADERA_DSP2LMIX_INPUT_3_VOLUME			0x985
+#define MADERA_DSP2LMIX_INPUT_4_SOURCE			0x986
+#define MADERA_DSP2LMIX_INPUT_4_VOLUME			0x987
+#define MADERA_DSP2RMIX_INPUT_1_SOURCE			0x988
+#define MADERA_DSP2RMIX_INPUT_1_VOLUME			0x989
+#define MADERA_DSP2RMIX_INPUT_2_SOURCE			0x98A
+#define MADERA_DSP2RMIX_INPUT_2_VOLUME			0x98B
+#define MADERA_DSP2RMIX_INPUT_3_SOURCE			0x98C
+#define MADERA_DSP2RMIX_INPUT_3_VOLUME			0x98D
+#define MADERA_DSP2RMIX_INPUT_4_SOURCE			0x98E
+#define MADERA_DSP2RMIX_INPUT_4_VOLUME			0x98F
+#define MADERA_DSP2AUX1MIX_INPUT_1_SOURCE		0x990
+#define MADERA_DSP2AUX2MIX_INPUT_1_SOURCE		0x998
+#define MADERA_DSP2AUX3MIX_INPUT_1_SOURCE		0x9A0
+#define MADERA_DSP2AUX4MIX_INPUT_1_SOURCE		0x9A8
+#define MADERA_DSP2AUX5MIX_INPUT_1_SOURCE		0x9B0
+#define MADERA_DSP2AUX6MIX_INPUT_1_SOURCE		0x9B8
+#define MADERA_DSP3LMIX_INPUT_1_SOURCE			0x9C0
+#define MADERA_DSP3LMIX_INPUT_1_VOLUME			0x9C1
+#define MADERA_DSP3LMIX_INPUT_2_SOURCE			0x9C2
+#define MADERA_DSP3LMIX_INPUT_2_VOLUME			0x9C3
+#define MADERA_DSP3LMIX_INPUT_3_SOURCE			0x9C4
+#define MADERA_DSP3LMIX_INPUT_3_VOLUME			0x9C5
+#define MADERA_DSP3LMIX_INPUT_4_SOURCE			0x9C6
+#define MADERA_DSP3LMIX_INPUT_4_VOLUME			0x9C7
+#define MADERA_DSP3RMIX_INPUT_1_SOURCE			0x9C8
+#define MADERA_DSP3RMIX_INPUT_1_VOLUME			0x9C9
+#define MADERA_DSP3RMIX_INPUT_2_SOURCE			0x9CA
+#define MADERA_DSP3RMIX_INPUT_2_VOLUME			0x9CB
+#define MADERA_DSP3RMIX_INPUT_3_SOURCE			0x9CC
+#define MADERA_DSP3RMIX_INPUT_3_VOLUME			0x9CD
+#define MADERA_DSP3RMIX_INPUT_4_SOURCE			0x9CE
+#define MADERA_DSP3RMIX_INPUT_4_VOLUME			0x9CF
+#define MADERA_DSP3AUX1MIX_INPUT_1_SOURCE		0x9D0
+#define MADERA_DSP3AUX2MIX_INPUT_1_SOURCE		0x9D8
+#define MADERA_DSP3AUX3MIX_INPUT_1_SOURCE		0x9E0
+#define MADERA_DSP3AUX4MIX_INPUT_1_SOURCE		0x9E8
+#define MADERA_DSP3AUX5MIX_INPUT_1_SOURCE		0x9F0
+#define MADERA_DSP3AUX6MIX_INPUT_1_SOURCE		0x9F8
+#define MADERA_DSP4LMIX_INPUT_1_SOURCE			0xA00
+#define MADERA_DSP4LMIX_INPUT_1_VOLUME			0xA01
+#define MADERA_DSP4LMIX_INPUT_2_SOURCE			0xA02
+#define MADERA_DSP4LMIX_INPUT_2_VOLUME			0xA03
+#define MADERA_DSP4LMIX_INPUT_3_SOURCE			0xA04
+#define MADERA_DSP4LMIX_INPUT_3_VOLUME			0xA05
+#define MADERA_DSP4LMIX_INPUT_4_SOURCE			0xA06
+#define MADERA_DSP4LMIX_INPUT_4_VOLUME			0xA07
+#define MADERA_DSP4RMIX_INPUT_1_SOURCE			0xA08
+#define MADERA_DSP4RMIX_INPUT_1_VOLUME			0xA09
+#define MADERA_DSP4RMIX_INPUT_2_SOURCE			0xA0A
+#define MADERA_DSP4RMIX_INPUT_2_VOLUME			0xA0B
+#define MADERA_DSP4RMIX_INPUT_3_SOURCE			0xA0C
+#define MADERA_DSP4RMIX_INPUT_3_VOLUME			0xA0D
+#define MADERA_DSP4RMIX_INPUT_4_SOURCE			0xA0E
+#define MADERA_DSP4RMIX_INPUT_4_VOLUME			0xA0F
+#define MADERA_DSP4AUX1MIX_INPUT_1_SOURCE		0xA10
+#define MADERA_DSP4AUX2MIX_INPUT_1_SOURCE		0xA18
+#define MADERA_DSP4AUX3MIX_INPUT_1_SOURCE		0xA20
+#define MADERA_DSP4AUX4MIX_INPUT_1_SOURCE		0xA28
+#define MADERA_DSP4AUX5MIX_INPUT_1_SOURCE		0xA30
+#define MADERA_DSP4AUX6MIX_INPUT_1_SOURCE		0xA38
+#define MADERA_DSP5LMIX_INPUT_1_SOURCE			0xA40
+#define MADERA_DSP5LMIX_INPUT_1_VOLUME			0xA41
+#define MADERA_DSP5LMIX_INPUT_2_SOURCE			0xA42
+#define MADERA_DSP5LMIX_INPUT_2_VOLUME			0xA43
+#define MADERA_DSP5LMIX_INPUT_3_SOURCE			0xA44
+#define MADERA_DSP5LMIX_INPUT_3_VOLUME			0xA45
+#define MADERA_DSP5LMIX_INPUT_4_SOURCE			0xA46
+#define MADERA_DSP5LMIX_INPUT_4_VOLUME			0xA47
+#define MADERA_DSP5RMIX_INPUT_1_SOURCE			0xA48
+#define MADERA_DSP5RMIX_INPUT_1_VOLUME			0xA49
+#define MADERA_DSP5RMIX_INPUT_2_SOURCE			0xA4A
+#define MADERA_DSP5RMIX_INPUT_2_VOLUME			0xA4B
+#define MADERA_DSP5RMIX_INPUT_3_SOURCE			0xA4C
+#define MADERA_DSP5RMIX_INPUT_3_VOLUME			0xA4D
+#define MADERA_DSP5RMIX_INPUT_4_SOURCE			0xA4E
+#define MADERA_DSP5RMIX_INPUT_4_VOLUME			0xA4F
+#define MADERA_DSP5AUX1MIX_INPUT_1_SOURCE		0xA50
+#define MADERA_DSP5AUX2MIX_INPUT_1_SOURCE		0xA58
+#define MADERA_DSP5AUX3MIX_INPUT_1_SOURCE		0xA60
+#define MADERA_DSP5AUX4MIX_INPUT_1_SOURCE		0xA68
+#define MADERA_DSP5AUX5MIX_INPUT_1_SOURCE		0xA70
+#define MADERA_DSP5AUX6MIX_INPUT_1_SOURCE		0xA78
+#define MADERA_ASRC1_1LMIX_INPUT_1_SOURCE		0xA80
+#define MADERA_ASRC1_1RMIX_INPUT_1_SOURCE		0xA88
+#define MADERA_ASRC1_2LMIX_INPUT_1_SOURCE		0xA90
+#define MADERA_ASRC1_2RMIX_INPUT_1_SOURCE		0xA98
+#define MADERA_ASRC2_1LMIX_INPUT_1_SOURCE		0xAA0
+#define MADERA_ASRC2_1RMIX_INPUT_1_SOURCE		0xAA8
+#define MADERA_ASRC2_2LMIX_INPUT_1_SOURCE		0xAB0
+#define MADERA_ASRC2_2RMIX_INPUT_1_SOURCE		0xAB8
+#define MADERA_ISRC1DEC1MIX_INPUT_1_SOURCE		0xB00
+#define MADERA_ISRC1DEC2MIX_INPUT_1_SOURCE		0xB08
+#define MADERA_ISRC1DEC3MIX_INPUT_1_SOURCE		0xB10
+#define MADERA_ISRC1DEC4MIX_INPUT_1_SOURCE		0xB18
+#define MADERA_ISRC1INT1MIX_INPUT_1_SOURCE		0xB20
+#define MADERA_ISRC1INT2MIX_INPUT_1_SOURCE		0xB28
+#define MADERA_ISRC1INT3MIX_INPUT_1_SOURCE		0xB30
+#define MADERA_ISRC1INT4MIX_INPUT_1_SOURCE		0xB38
+#define MADERA_ISRC2DEC1MIX_INPUT_1_SOURCE		0xB40
+#define MADERA_ISRC2DEC2MIX_INPUT_1_SOURCE		0xB48
+#define MADERA_ISRC2DEC3MIX_INPUT_1_SOURCE		0xB50
+#define MADERA_ISRC2DEC4MIX_INPUT_1_SOURCE		0xB58
+#define MADERA_ISRC2INT1MIX_INPUT_1_SOURCE		0xB60
+#define MADERA_ISRC2INT2MIX_INPUT_1_SOURCE		0xB68
+#define MADERA_ISRC2INT3MIX_INPUT_1_SOURCE		0xB70
+#define MADERA_ISRC2INT4MIX_INPUT_1_SOURCE		0xB78
+#define MADERA_ISRC3DEC1MIX_INPUT_1_SOURCE		0xB80
+#define MADERA_ISRC3DEC2MIX_INPUT_1_SOURCE		0xB88
+#define MADERA_ISRC3DEC3MIX_INPUT_1_SOURCE		0xB90
+#define MADERA_ISRC3DEC4MIX_INPUT_1_SOURCE		0xB98
+#define MADERA_ISRC3INT1MIX_INPUT_1_SOURCE		0xBA0
+#define MADERA_ISRC3INT2MIX_INPUT_1_SOURCE		0xBA8
+#define MADERA_ISRC3INT3MIX_INPUT_1_SOURCE		0xBB0
+#define MADERA_ISRC3INT4MIX_INPUT_1_SOURCE		0xBB8
+#define MADERA_ISRC4DEC1MIX_INPUT_1_SOURCE		0xBC0
+#define MADERA_ISRC4DEC2MIX_INPUT_1_SOURCE		0xBC8
+#define MADERA_ISRC4INT1MIX_INPUT_1_SOURCE		0xBE0
+#define MADERA_ISRC4INT2MIX_INPUT_1_SOURCE		0xBE8
+#define MADERA_DSP6LMIX_INPUT_1_SOURCE			0xC00
+#define MADERA_DSP6LMIX_INPUT_1_VOLUME			0xC01
+#define MADERA_DSP6LMIX_INPUT_2_SOURCE			0xC02
+#define MADERA_DSP6LMIX_INPUT_2_VOLUME			0xC03
+#define MADERA_DSP6LMIX_INPUT_3_SOURCE			0xC04
+#define MADERA_DSP6LMIX_INPUT_3_VOLUME			0xC05
+#define MADERA_DSP6LMIX_INPUT_4_SOURCE			0xC06
+#define MADERA_DSP6LMIX_INPUT_4_VOLUME			0xC07
+#define MADERA_DSP6RMIX_INPUT_1_SOURCE			0xC08
+#define MADERA_DSP6RMIX_INPUT_1_VOLUME			0xC09
+#define MADERA_DSP6RMIX_INPUT_2_SOURCE			0xC0A
+#define MADERA_DSP6RMIX_INPUT_2_VOLUME			0xC0B
+#define MADERA_DSP6RMIX_INPUT_3_SOURCE			0xC0C
+#define MADERA_DSP6RMIX_INPUT_3_VOLUME			0xC0D
+#define MADERA_DSP6RMIX_INPUT_4_SOURCE			0xC0E
+#define MADERA_DSP6RMIX_INPUT_4_VOLUME			0xC0F
+#define MADERA_DSP6AUX1MIX_INPUT_1_SOURCE		0xC10
+#define MADERA_DSP6AUX2MIX_INPUT_1_SOURCE		0xC18
+#define MADERA_DSP6AUX3MIX_INPUT_1_SOURCE		0xC20
+#define MADERA_DSP6AUX4MIX_INPUT_1_SOURCE		0xC28
+#define MADERA_DSP6AUX5MIX_INPUT_1_SOURCE		0xC30
+#define MADERA_DSP6AUX6MIX_INPUT_1_SOURCE		0xC38
+#define MADERA_DSP7LMIX_INPUT_1_SOURCE			0xC40
+#define MADERA_DSP7LMIX_INPUT_1_VOLUME			0xC41
+#define MADERA_DSP7LMIX_INPUT_2_SOURCE			0xC42
+#define MADERA_DSP7LMIX_INPUT_2_VOLUME			0xC43
+#define MADERA_DSP7LMIX_INPUT_3_SOURCE			0xC44
+#define MADERA_DSP7LMIX_INPUT_3_VOLUME			0xC45
+#define MADERA_DSP7LMIX_INPUT_4_SOURCE			0xC46
+#define MADERA_DSP7LMIX_INPUT_4_VOLUME			0xC47
+#define MADERA_DSP7RMIX_INPUT_1_SOURCE			0xC48
+#define MADERA_DSP7RMIX_INPUT_1_VOLUME			0xC49
+#define MADERA_DSP7RMIX_INPUT_2_SOURCE			0xC4A
+#define MADERA_DSP7RMIX_INPUT_2_VOLUME			0xC4B
+#define MADERA_DSP7RMIX_INPUT_3_SOURCE			0xC4C
+#define MADERA_DSP7RMIX_INPUT_3_VOLUME			0xC4D
+#define MADERA_DSP7RMIX_INPUT_4_SOURCE			0xC4E
+#define MADERA_DSP7RMIX_INPUT_4_VOLUME			0xC4F
+#define MADERA_DSP7AUX1MIX_INPUT_1_SOURCE		0xC50
+#define MADERA_DSP7AUX2MIX_INPUT_1_SOURCE		0xC58
+#define MADERA_DSP7AUX3MIX_INPUT_1_SOURCE		0xC60
+#define MADERA_DSP7AUX4MIX_INPUT_1_SOURCE		0xC68
+#define MADERA_DSP7AUX5MIX_INPUT_1_SOURCE		0xC70
+#define MADERA_DSP7AUX6MIX_INPUT_1_SOURCE		0xC78
+#define MADERA_DFC1MIX_INPUT_1_SOURCE			0xDC0
+#define MADERA_DFC2MIX_INPUT_1_SOURCE			0xDC8
+#define MADERA_DFC3MIX_INPUT_1_SOURCE			0xDD0
+#define MADERA_DFC4MIX_INPUT_1_SOURCE			0xDD8
+#define MADERA_DFC5MIX_INPUT_1_SOURCE			0xDE0
+#define MADERA_DFC6MIX_INPUT_1_SOURCE			0xDE8
+#define MADERA_DFC7MIX_INPUT_1_SOURCE			0xDF0
+#define MADERA_DFC8MIX_INPUT_1_SOURCE			0xDF8
+#define MADERA_FX_CTRL1					0xE00
+#define MADERA_FX_CTRL2					0xE01
+#define MADERA_EQ1_1					0xE10
+#define MADERA_EQ1_2					0xE11
+#define MADERA_EQ1_21					0xE24
+#define MADERA_EQ2_1					0xE26
+#define MADERA_EQ2_2					0xE27
+#define MADERA_EQ2_21					0xE3A
+#define MADERA_EQ3_1					0xE3C
+#define MADERA_EQ3_2					0xE3D
+#define MADERA_EQ3_21					0xE50
+#define MADERA_EQ4_1					0xE52
+#define MADERA_EQ4_2					0xE53
+#define MADERA_EQ4_21					0xE66
+#define MADERA_DRC1_CTRL1				0xE80
+#define MADERA_DRC1_CTRL2				0xE81
+#define MADERA_DRC1_CTRL3				0xE82
+#define MADERA_DRC1_CTRL4				0xE83
+#define MADERA_DRC1_CTRL5				0xE84
+#define MADERA_DRC2_CTRL1				0xE88
+#define MADERA_DRC2_CTRL2				0xE89
+#define MADERA_DRC2_CTRL3				0xE8A
+#define MADERA_DRC2_CTRL4				0xE8B
+#define MADERA_DRC2_CTRL5				0xE8C
+#define MADERA_HPLPF1_1					0xEC0
+#define MADERA_HPLPF1_2					0xEC1
+#define MADERA_HPLPF2_1					0xEC4
+#define MADERA_HPLPF2_2					0xEC5
+#define MADERA_HPLPF3_1					0xEC8
+#define MADERA_HPLPF3_2					0xEC9
+#define MADERA_HPLPF4_1					0xECC
+#define MADERA_HPLPF4_2					0xECD
+#define MADERA_ASRC2_ENABLE				0xED0
+#define MADERA_ASRC2_STATUS				0xED1
+#define MADERA_ASRC2_RATE1				0xED2
+#define MADERA_ASRC2_RATE2				0xED3
+#define MADERA_ASRC1_ENABLE				0xEE0
+#define MADERA_ASRC1_STATUS				0xEE1
+#define MADERA_ASRC1_RATE1				0xEE2
+#define MADERA_ASRC1_RATE2				0xEE3
+#define MADERA_ISRC_1_CTRL_1				0xEF0
+#define MADERA_ISRC_1_CTRL_2				0xEF1
+#define MADERA_ISRC_1_CTRL_3				0xEF2
+#define MADERA_ISRC_2_CTRL_1				0xEF3
+#define MADERA_ISRC_2_CTRL_2				0xEF4
+#define MADERA_ISRC_2_CTRL_3				0xEF5
+#define MADERA_ISRC_3_CTRL_1				0xEF6
+#define MADERA_ISRC_3_CTRL_2				0xEF7
+#define MADERA_ISRC_3_CTRL_3				0xEF8
+#define MADERA_ISRC_4_CTRL_1				0xEF9
+#define MADERA_ISRC_4_CTRL_2				0xEFA
+#define MADERA_ISRC_4_CTRL_3				0xEFB
+#define MADERA_CLOCK_CONTROL				0xF00
+#define MADERA_ANC_SRC					0xF01
+#define MADERA_DSP_STATUS				0xF02
+#define MADERA_ANC_COEFF_START				0xF08
+#define MADERA_ANC_COEFF_END				0xF12
+#define MADERA_FCL_FILTER_CONTROL			0xF15
+#define MADERA_FCL_ADC_REFORMATTER_CONTROL		0xF17
+#define MADERA_FCL_COEFF_START				0xF18
+#define MADERA_FCL_COEFF_END				0xF69
+#define MADERA_FCR_FILTER_CONTROL			0xF71
+#define MADERA_FCR_ADC_REFORMATTER_CONTROL		0xF73
+#define MADERA_FCR_COEFF_START				0xF74
+#define MADERA_FCR_COEFF_END				0xFC5
+#define MADERA_DAC_COMP_1				0x1300
+#define MADERA_DAC_COMP_2				0x1302
+#define MADERA_FRF_COEFFICIENT_1L_1			0x1380
+#define MADERA_FRF_COEFFICIENT_1L_2			0x1381
+#define MADERA_FRF_COEFFICIENT_1L_3			0x1382
+#define MADERA_FRF_COEFFICIENT_1L_4			0x1383
+#define MADERA_FRF_COEFFICIENT_1R_1			0x1390
+#define MADERA_FRF_COEFFICIENT_1R_2			0x1391
+#define MADERA_FRF_COEFFICIENT_1R_3			0x1392
+#define MADERA_FRF_COEFFICIENT_1R_4			0x1393
+#define MADERA_FRF_COEFFICIENT_2L_1			0x13A0
+#define MADERA_FRF_COEFFICIENT_2L_2			0x13A1
+#define MADERA_FRF_COEFFICIENT_2L_3			0x13A2
+#define MADERA_FRF_COEFFICIENT_2L_4			0x13A3
+#define MADERA_FRF_COEFFICIENT_2R_1			0x13B0
+#define MADERA_FRF_COEFFICIENT_2R_2			0x13B1
+#define MADERA_FRF_COEFFICIENT_2R_3			0x13B2
+#define MADERA_FRF_COEFFICIENT_2R_4			0x13B3
+#define MADERA_FRF_COEFFICIENT_3L_1			0x13C0
+#define MADERA_FRF_COEFFICIENT_3L_2			0x13C1
+#define MADERA_FRF_COEFFICIENT_3L_3			0x13C2
+#define MADERA_FRF_COEFFICIENT_3L_4			0x13C3
+#define MADERA_FRF_COEFFICIENT_3R_1			0x13D0
+#define MADERA_FRF_COEFFICIENT_3R_2			0x13D1
+#define MADERA_FRF_COEFFICIENT_3R_3			0x13D2
+#define MADERA_FRF_COEFFICIENT_3R_4			0x13D3
+#define MADERA_FRF_COEFFICIENT_4L_1			0x13E0
+#define MADERA_FRF_COEFFICIENT_4L_2			0x13E1
+#define MADERA_FRF_COEFFICIENT_4L_3			0x13E2
+#define MADERA_FRF_COEFFICIENT_4L_4			0x13E3
+#define MADERA_FRF_COEFFICIENT_4R_1			0x13F0
+#define MADERA_FRF_COEFFICIENT_4R_2			0x13F1
+#define MADERA_FRF_COEFFICIENT_4R_3			0x13F2
+#define MADERA_FRF_COEFFICIENT_4R_4			0x13F3
+#define CS47L35_FRF_COEFFICIENT_4L_1			0x13A0
+#define CS47L35_FRF_COEFFICIENT_4L_2			0x13A1
+#define CS47L35_FRF_COEFFICIENT_4L_3			0x13A2
+#define CS47L35_FRF_COEFFICIENT_4L_4			0x13A3
+#define CS47L35_FRF_COEFFICIENT_5L_1			0x13B0
+#define CS47L35_FRF_COEFFICIENT_5L_2			0x13B1
+#define CS47L35_FRF_COEFFICIENT_5L_3			0x13B2
+#define CS47L35_FRF_COEFFICIENT_5L_4			0x13B3
+#define CS47L35_FRF_COEFFICIENT_5R_1			0x13C0
+#define CS47L35_FRF_COEFFICIENT_5R_2			0x13C1
+#define CS47L35_FRF_COEFFICIENT_5R_3			0x13C2
+#define CS47L35_FRF_COEFFICIENT_5R_4			0x13C3
+#define MADERA_FRF_COEFFICIENT_5L_1			0x1400
+#define MADERA_FRF_COEFFICIENT_5L_2			0x1401
+#define MADERA_FRF_COEFFICIENT_5L_3			0x1402
+#define MADERA_FRF_COEFFICIENT_5L_4			0x1403
+#define MADERA_FRF_COEFFICIENT_5R_1			0x1410
+#define MADERA_FRF_COEFFICIENT_5R_2			0x1411
+#define MADERA_FRF_COEFFICIENT_5R_3			0x1412
+#define MADERA_FRF_COEFFICIENT_5R_4			0x1413
+#define MADERA_FRF_COEFFICIENT_6L_1			0x1420
+#define MADERA_FRF_COEFFICIENT_6L_2			0x1421
+#define MADERA_FRF_COEFFICIENT_6L_3			0x1422
+#define MADERA_FRF_COEFFICIENT_6L_4			0x1423
+#define MADERA_FRF_COEFFICIENT_6R_1			0x1430
+#define MADERA_FRF_COEFFICIENT_6R_2			0x1431
+#define MADERA_FRF_COEFFICIENT_6R_3			0x1432
+#define MADERA_FRF_COEFFICIENT_6R_4			0x1433
+#define MADERA_DFC1_CTRL				0x1480
+#define MADERA_DFC1_RX					0x1482
+#define MADERA_DFC1_TX					0x1484
+#define MADERA_DFC2_CTRL				0x1486
+#define MADERA_DFC2_RX					0x1488
+#define MADERA_DFC2_TX					0x148A
+#define MADERA_DFC3_CTRL				0x148C
+#define MADERA_DFC3_RX					0x148E
+#define MADERA_DFC3_TX					0x1490
+#define MADERA_DFC4_CTRL				0x1492
+#define MADERA_DFC4_RX					0x1494
+#define MADERA_DFC4_TX					0x1496
+#define MADERA_DFC5_CTRL				0x1498
+#define MADERA_DFC5_RX					0x149A
+#define MADERA_DFC5_TX					0x149C
+#define MADERA_DFC6_CTRL				0x149E
+#define MADERA_DFC6_RX					0x14A0
+#define MADERA_DFC6_TX					0x14A2
+#define MADERA_DFC7_CTRL				0x14A4
+#define MADERA_DFC7_RX					0x14A6
+#define MADERA_DFC7_TX					0x14A8
+#define MADERA_DFC8_CTRL				0x14AA
+#define MADERA_DFC8_RX					0x14AC
+#define MADERA_DFC8_TX					0x14AE
+#define MADERA_DFC_STATUS				0x14B6
+#define MADERA_ADSP2_IRQ0				0x1600
+#define MADERA_ADSP2_IRQ1				0x1601
+#define MADERA_ADSP2_IRQ2				0x1602
+#define MADERA_ADSP2_IRQ3				0x1603
+#define MADERA_ADSP2_IRQ4				0x1604
+#define MADERA_ADSP2_IRQ5				0x1605
+#define MADERA_ADSP2_IRQ6				0x1606
+#define MADERA_ADSP2_IRQ7				0x1607
+#define MADERA_GPIO1_CTRL_1				0x1700
+#define MADERA_GPIO1_CTRL_2				0x1701
+#define MADERA_GPIO2_CTRL_1				0x1702
+#define MADERA_GPIO2_CTRL_2				0x1703
+#define MADERA_GPIO16_CTRL_1				0x171E
+#define MADERA_GPIO16_CTRL_2				0x171F
+#define MADERA_GPIO38_CTRL_1				0x174A
+#define MADERA_GPIO38_CTRL_2				0x174B
+#define MADERA_GPIO40_CTRL_1				0x174E
+#define MADERA_GPIO40_CTRL_2				0x174F
+#define MADERA_IRQ1_STATUS_1				0x1800
+#define MADERA_IRQ1_STATUS_2				0x1801
+#define MADERA_IRQ1_STATUS_6				0x1805
+#define MADERA_IRQ1_STATUS_7				0x1806
+#define MADERA_IRQ1_STATUS_9				0x1808
+#define MADERA_IRQ1_STATUS_11				0x180A
+#define MADERA_IRQ1_STATUS_12				0x180B
+#define MADERA_IRQ1_STATUS_15				0x180E
+#define MADERA_IRQ1_STATUS_33				0x1820
+#define MADERA_IRQ1_MASK_1				0x1840
+#define MADERA_IRQ1_MASK_2				0x1841
+#define MADERA_IRQ1_MASK_33				0x1860
+#define MADERA_IRQ1_RAW_STATUS_1			0x1880
+#define MADERA_IRQ1_RAW_STATUS_2			0x1881
+#define MADERA_IRQ1_RAW_STATUS_15			0x188E
+#define MADERA_IRQ1_RAW_STATUS_33			0x18A0
+#define MADERA_INTERRUPT_DEBOUNCE_7			0x1A06
+#define MADERA_INTERRUPT_DEBOUNCE_15			0x1A0E
+#define MADERA_IRQ1_CTRL				0x1A80
+#define MADERA_IRQ2_CTRL				0x1A82
+#define MADERA_INTERRUPT_RAW_STATUS_1			0x1AA0
+#define MADERA_WSEQ_SEQUENCE_1				0x3000
+#define MADERA_WSEQ_SEQUENCE_252			0x31F6
+#define CS47L35_OTP_HPDET_CAL_1				0x31F8
+#define CS47L35_OTP_HPDET_CAL_2				0x31FA
+#define MADERA_WSEQ_SEQUENCE_508			0x33F6
+#define CS47L85_OTP_HPDET_CAL_1				0x33F8
+#define CS47L85_OTP_HPDET_CAL_2				0x33FA
+#define MADERA_OTP_HPDET_CAL_1				0x20004
+#define MADERA_OTP_HPDET_CAL_2				0x20006
+#define MADERA_DSP1_CONFIG_1				0x0FFE00
+#define MADERA_DSP1_CONFIG_2				0x0FFE02
+#define MADERA_DSP1_SCRATCH_1				0x0FFE40
+#define MADERA_DSP1_SCRATCH_2				0x0FFE42
+#define MADERA_DSP1_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0xFFE7C
+#define MADERA_DSP2_CONFIG_1				0x17FE00
+#define MADERA_DSP2_CONFIG_2				0x17FE02
+#define MADERA_DSP2_SCRATCH_1				0x17FE40
+#define MADERA_DSP2_SCRATCH_2				0x17FE42
+#define MADERA_DSP2_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x17FE7C
+#define MADERA_DSP3_CONFIG_1				0x1FFE00
+#define MADERA_DSP3_CONFIG_2				0x1FFE02
+#define MADERA_DSP3_SCRATCH_1				0x1FFE40
+#define MADERA_DSP3_SCRATCH_2				0x1FFE42
+#define MADERA_DSP3_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x1FFE7C
+#define MADERA_DSP4_CONFIG_1				0x27FE00
+#define MADERA_DSP4_CONFIG_2				0x27FE02
+#define MADERA_DSP4_SCRATCH_1				0x27FE40
+#define MADERA_DSP4_SCRATCH_2				0x27FE42
+#define MADERA_DSP4_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x27FE7C
+#define MADERA_DSP5_CONFIG_1				0x2FFE00
+#define MADERA_DSP5_CONFIG_2				0x2FFE02
+#define MADERA_DSP5_SCRATCH_1				0x2FFE40
+#define MADERA_DSP5_SCRATCH_2				0x2FFE42
+#define MADERA_DSP5_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x2FFE7C
+#define MADERA_DSP6_CONFIG_1				0x37FE00
+#define MADERA_DSP6_CONFIG_2				0x37FE02
+#define MADERA_DSP6_SCRATCH_1				0x37FE40
+#define MADERA_DSP6_SCRATCH_2				0x37FE42
+#define MADERA_DSP6_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x37FE7C
+#define MADERA_DSP7_CONFIG_1				0x3FFE00
+#define MADERA_DSP7_CONFIG_2				0x3FFE02
+#define MADERA_DSP7_SCRATCH_1				0x3FFE40
+#define MADERA_DSP7_SCRATCH_2				0x3FFE42
+#define MADERA_DSP7_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x3FFE7C
+
+/* (0x0000)  Software_Reset */
+#define MADERA_SW_RST_DEV_ID1_MASK			0xFFFF
+#define MADERA_SW_RST_DEV_ID1_SHIFT			     0
+#define MADERA_SW_RST_DEV_ID1_WIDTH			    16
+
+/* (0x0001)  Hardware_Revision */
+#define MADERA_HW_REVISION_MASK				0x00FF
+#define MADERA_HW_REVISION_SHIFT			     0
+#define MADERA_HW_REVISION_WIDTH			     8
+
+/* (0x0020)  Tone_Generator_1 */
+#define MADERA_TONE2_ENA				0x0002
+#define MADERA_TONE2_ENA_MASK				0x0002
+#define MADERA_TONE2_ENA_SHIFT				     1
+#define MADERA_TONE2_ENA_WIDTH				     1
+#define MADERA_TONE1_ENA				0x0001
+#define MADERA_TONE1_ENA_MASK				0x0001
+#define MADERA_TONE1_ENA_SHIFT				     0
+#define MADERA_TONE1_ENA_WIDTH				     1
+
+/* (0x0021)  Tone_Generator_2 */
+#define MADERA_TONE1_LVL_0_MASK				0xFFFF
+#define MADERA_TONE1_LVL_0_SHIFT			     0
+#define MADERA_TONE1_LVL_0_WIDTH			    16
+
+/* (0x0022)  Tone_Generator_3 */
+#define MADERA_TONE1_LVL_MASK				0x00FF
+#define MADERA_TONE1_LVL_SHIFT				     0
+#define MADERA_TONE1_LVL_WIDTH				     8
+
+/* (0x0023)  Tone_Generator_4 */
+#define MADERA_TONE2_LVL_0_MASK				0xFFFF
+#define MADERA_TONE2_LVL_0_SHIFT			     0
+#define MADERA_TONE2_LVL_0_WIDTH			    16
+
+/* (0x0024)  Tone_Generator_5 */
+#define MADERA_TONE2_LVL_MASK				0x00FF
+#define MADERA_TONE2_LVL_SHIFT				     0
+#define MADERA_TONE2_LVL_WIDTH				     8
+
+/* (0x0030)  PWM_Drive_1 */
+#define MADERA_PWM2_ENA					0x0002
+#define MADERA_PWM2_ENA_MASK				0x0002
+#define MADERA_PWM2_ENA_SHIFT				     1
+#define MADERA_PWM2_ENA_WIDTH				     1
+#define MADERA_PWM1_ENA					0x0001
+#define MADERA_PWM1_ENA_MASK				0x0001
+#define MADERA_PWM1_ENA_SHIFT				     0
+#define MADERA_PWM1_ENA_WIDTH				     1
+
+/* (0x00A0)  Comfort_Noise_Generator */
+#define MADERA_NOISE_GEN_ENA				0x0020
+#define MADERA_NOISE_GEN_ENA_MASK			0x0020
+#define MADERA_NOISE_GEN_ENA_SHIFT			     5
+#define MADERA_NOISE_GEN_ENA_WIDTH			     1
+#define MADERA_NOISE_GEN_GAIN_MASK			0x001F
+#define MADERA_NOISE_GEN_GAIN_SHIFT			     0
+#define MADERA_NOISE_GEN_GAIN_WIDTH			     5
+
+/* (0x0100)  Clock_32k_1 */
+#define MADERA_CLK_32K_ENA				0x0040
+#define MADERA_CLK_32K_ENA_MASK				0x0040
+#define MADERA_CLK_32K_ENA_SHIFT			     6
+#define MADERA_CLK_32K_ENA_WIDTH			     1
+#define MADERA_CLK_32K_SRC_MASK				0x0003
+#define MADERA_CLK_32K_SRC_SHIFT			     0
+#define MADERA_CLK_32K_SRC_WIDTH			     2
+
+/* (0x0101)  System_Clock_1 */
+#define MADERA_SYSCLK_FRAC				0x8000
+#define MADERA_SYSCLK_FRAC_MASK				0x8000
+#define MADERA_SYSCLK_FRAC_SHIFT			    15
+#define MADERA_SYSCLK_FRAC_WIDTH			     1
+#define MADERA_SYSCLK_FREQ_MASK				0x0700
+#define MADERA_SYSCLK_FREQ_SHIFT			     8
+#define MADERA_SYSCLK_FREQ_WIDTH			     3
+#define MADERA_SYSCLK_ENA				0x0040
+#define MADERA_SYSCLK_ENA_MASK				0x0040
+#define MADERA_SYSCLK_ENA_SHIFT				     6
+#define MADERA_SYSCLK_ENA_WIDTH				     1
+#define MADERA_SYSCLK_SRC_MASK				0x000F
+#define MADERA_SYSCLK_SRC_SHIFT				     0
+#define MADERA_SYSCLK_SRC_WIDTH				     4
+
+/* (0x0102)  Sample_rate_1 */
+#define MADERA_SAMPLE_RATE_1_MASK			0x001F
+#define MADERA_SAMPLE_RATE_1_SHIFT			     0
+#define MADERA_SAMPLE_RATE_1_WIDTH			     5
+
+/* (0x0103)  Sample_rate_2 */
+#define MADERA_SAMPLE_RATE_2_MASK			0x001F
+#define MADERA_SAMPLE_RATE_2_SHIFT			     0
+#define MADERA_SAMPLE_RATE_2_WIDTH			     5
+
+/* (0x0104)  Sample_rate_3 */
+#define MADERA_SAMPLE_RATE_3_MASK			0x001F
+#define MADERA_SAMPLE_RATE_3_SHIFT			     0
+#define MADERA_SAMPLE_RATE_3_WIDTH			     5
+
+/* (0x0112)  Async_clock_1 */
+#define MADERA_ASYNC_CLK_FREQ_MASK			0x0700
+#define MADERA_ASYNC_CLK_FREQ_SHIFT			     8
+#define MADERA_ASYNC_CLK_FREQ_WIDTH			     3
+#define MADERA_ASYNC_CLK_ENA				0x0040
+#define MADERA_ASYNC_CLK_ENA_MASK			0x0040
+#define MADERA_ASYNC_CLK_ENA_SHIFT			     6
+#define MADERA_ASYNC_CLK_ENA_WIDTH			     1
+#define MADERA_ASYNC_CLK_SRC_MASK			0x000F
+#define MADERA_ASYNC_CLK_SRC_SHIFT			     0
+#define MADERA_ASYNC_CLK_SRC_WIDTH			     4
+
+/* (0x0113)  Async_sample_rate_1 */
+#define MADERA_ASYNC_SAMPLE_RATE_1_MASK			0x001F
+#define MADERA_ASYNC_SAMPLE_RATE_1_SHIFT		     0
+#define MADERA_ASYNC_SAMPLE_RATE_1_WIDTH		     5
+
+/* (0x0114)  Async_sample_rate_2 */
+#define MADERA_ASYNC_SAMPLE_RATE_2_MASK			0x001F
+#define MADERA_ASYNC_SAMPLE_RATE_2_SHIFT		     0
+#define MADERA_ASYNC_SAMPLE_RATE_2_WIDTH		     5
+
+/* (0x0120)  DSP_Clock_1 */
+#define MADERA_DSP_CLK_FREQ_LEGACY			0x0700
+#define MADERA_DSP_CLK_FREQ_LEGACY_MASK			0x0700
+#define MADERA_DSP_CLK_FREQ_LEGACY_SHIFT		     8
+#define MADERA_DSP_CLK_FREQ_LEGACY_WIDTH		     3
+#define MADERA_DSP_CLK_ENA				0x0040
+#define MADERA_DSP_CLK_ENA_MASK				0x0040
+#define MADERA_DSP_CLK_ENA_SHIFT			     6
+#define MADERA_DSP_CLK_ENA_WIDTH			     1
+#define MADERA_DSP_CLK_SRC				0x000F
+#define MADERA_DSP_CLK_SRC_MASK				0x000F
+#define MADERA_DSP_CLK_SRC_SHIFT			     0
+#define MADERA_DSP_CLK_SRC_WIDTH			     4
+
+/* (0x0122)  DSP_Clock_2 */
+#define MADERA_DSP_CLK_FREQ_MASK			0x03FF
+#define MADERA_DSP_CLK_FREQ_SHIFT			     0
+#define MADERA_DSP_CLK_FREQ_WIDTH			    10
+
+/* (0x0149)  Output_system_clock */
+#define MADERA_OPCLK_ENA				0x8000
+#define MADERA_OPCLK_ENA_MASK				0x8000
+#define MADERA_OPCLK_ENA_SHIFT				    15
+#define MADERA_OPCLK_ENA_WIDTH				     1
+#define MADERA_OPCLK_DIV_MASK				0x00F8
+#define MADERA_OPCLK_DIV_SHIFT				     3
+#define MADERA_OPCLK_DIV_WIDTH				     5
+#define MADERA_OPCLK_SEL_MASK				0x0007
+#define MADERA_OPCLK_SEL_SHIFT				     0
+#define MADERA_OPCLK_SEL_WIDTH				     3
+
+/* (0x014A)  Output_async_clock */
+#define MADERA_OPCLK_ASYNC_ENA				0x8000
+#define MADERA_OPCLK_ASYNC_ENA_MASK			0x8000
+#define MADERA_OPCLK_ASYNC_ENA_SHIFT			    15
+#define MADERA_OPCLK_ASYNC_ENA_WIDTH			     1
+#define MADERA_OPCLK_ASYNC_DIV_MASK			0x00F8
+#define MADERA_OPCLK_ASYNC_DIV_SHIFT			     3
+#define MADERA_OPCLK_ASYNC_DIV_WIDTH			     5
+#define MADERA_OPCLK_ASYNC_SEL_MASK			0x0007
+#define MADERA_OPCLK_ASYNC_SEL_SHIFT			     0
+#define MADERA_OPCLK_ASYNC_SEL_WIDTH			     3
+
+/* (0x0171)  FLL1_Control_1 */
+#define MADERA_FLL1_FREERUN				0x0002
+#define MADERA_FLL1_FREERUN_MASK			0x0002
+#define MADERA_FLL1_FREERUN_SHIFT			     1
+#define MADERA_FLL1_FREERUN_WIDTH			     1
+#define MADERA_FLL1_ENA					0x0001
+#define MADERA_FLL1_ENA_MASK				0x0001
+#define MADERA_FLL1_ENA_SHIFT				     0
+#define MADERA_FLL1_ENA_WIDTH				     1
+
+/* (0x0172)  FLL1_Control_2 */
+#define MADERA_FLL1_CTRL_UPD				0x8000
+#define MADERA_FLL1_CTRL_UPD_MASK			0x8000
+#define MADERA_FLL1_CTRL_UPD_SHIFT			    15
+#define MADERA_FLL1_CTRL_UPD_WIDTH			     1
+#define MADERA_FLL1_N_MASK				0x03FF
+#define MADERA_FLL1_N_SHIFT				     0
+#define MADERA_FLL1_N_WIDTH				    10
+
+/* (0x0173)  FLL1_Control_3 */
+#define MADERA_FLL1_THETA_MASK				0xFFFF
+#define MADERA_FLL1_THETA_SHIFT				     0
+#define MADERA_FLL1_THETA_WIDTH				    16
+
+/* (0x0174)  FLL1_Control_4 */
+#define MADERA_FLL1_LAMBDA_MASK				0xFFFF
+#define MADERA_FLL1_LAMBDA_SHIFT			     0
+#define MADERA_FLL1_LAMBDA_WIDTH			    16
+
+/* (0x0175)  FLL1_Control_5 */
+#define MADERA_FLL1_FRATIO_MASK				0x0F00
+#define MADERA_FLL1_FRATIO_SHIFT			     8
+#define MADERA_FLL1_FRATIO_WIDTH			     4
+
+/* (0x0176)  FLL1_Control_6 */
+#define MADERA_FLL1_REFCLK_DIV_MASK			0x00C0
+#define MADERA_FLL1_REFCLK_DIV_SHIFT			     6
+#define MADERA_FLL1_REFCLK_DIV_WIDTH			     2
+#define MADERA_FLL1_REFCLK_SRC_MASK			0x000F
+#define MADERA_FLL1_REFCLK_SRC_SHIFT			     0
+#define MADERA_FLL1_REFCLK_SRC_WIDTH			     4
+
+/* (0x0177)  FLL1_Loop_Filter_Test_1 */
+#define MADERA_FLL1_FRC_INTEG_UPD			0x8000
+#define MADERA_FLL1_FRC_INTEG_UPD_MASK			0x8000
+#define MADERA_FLL1_FRC_INTEG_UPD_SHIFT			    15
+#define MADERA_FLL1_FRC_INTEG_UPD_WIDTH			     1
+#define MADERA_FLL1_FRC_INTEG_VAL_MASK			0x0FFF
+#define MADERA_FLL1_FRC_INTEG_VAL_SHIFT			     0
+#define MADERA_FLL1_FRC_INTEG_VAL_WIDTH			    12
+
+/* (0x0179)  FLL1_Control_7 */
+#define MADERA_FLL1_GAIN_MASK				0x003c
+#define MADERA_FLL1_GAIN_SHIFT				     2
+#define MADERA_FLL1_GAIN_WIDTH				     4
+
+/* (0x017A)  FLL1_EFS_2 */
+#define MADERA_FLL1_PHASE_GAIN_MASK			0xF000
+#define MADERA_FLL1_PHASE_GAIN_SHIFT			    12
+#define MADERA_FLL1_PHASE_GAIN_WIDTH			     4
+#define MADERA_FLL1_PHASE_ENA_MASK			0x0800
+#define MADERA_FLL1_PHASE_ENA_SHIFT			    11
+#define MADERA_FLL1_PHASE_ENA_WIDTH			     1
+
+/* (0x0181)  FLL1_Synchroniser_1 */
+#define MADERA_FLL1_SYNC_ENA				0x0001
+#define MADERA_FLL1_SYNC_ENA_MASK			0x0001
+#define MADERA_FLL1_SYNC_ENA_SHIFT			     0
+#define MADERA_FLL1_SYNC_ENA_WIDTH			     1
+
+/* (0x0182)  FLL1_Synchroniser_2 */
+#define MADERA_FLL1_SYNC_N_MASK				0x03FF
+#define MADERA_FLL1_SYNC_N_SHIFT			     0
+#define MADERA_FLL1_SYNC_N_WIDTH			    10
+
+/* (0x0183)  FLL1_Synchroniser_3 */
+#define MADERA_FLL1_SYNC_THETA_MASK			0xFFFF
+#define MADERA_FLL1_SYNC_THETA_SHIFT			     0
+#define MADERA_FLL1_SYNC_THETA_WIDTH			    16
+
+/* (0x0184)  FLL1_Synchroniser_4 */
+#define MADERA_FLL1_SYNC_LAMBDA_MASK			0xFFFF
+#define MADERA_FLL1_SYNC_LAMBDA_SHIFT			     0
+#define MADERA_FLL1_SYNC_LAMBDA_WIDTH			    16
+
+/* (0x0185)  FLL1_Synchroniser_5 */
+#define MADERA_FLL1_SYNC_FRATIO_MASK			0x0700
+#define MADERA_FLL1_SYNC_FRATIO_SHIFT			     8
+#define MADERA_FLL1_SYNC_FRATIO_WIDTH			     3
+
+/* (0x0186)  FLL1_Synchroniser_6 */
+#define MADERA_FLL1_SYNCCLK_DIV_MASK			0x00C0
+#define MADERA_FLL1_SYNCCLK_DIV_SHIFT			     6
+#define MADERA_FLL1_SYNCCLK_DIV_WIDTH			     2
+#define MADERA_FLL1_SYNCCLK_SRC_MASK			0x000F
+#define MADERA_FLL1_SYNCCLK_SRC_SHIFT			     0
+#define MADERA_FLL1_SYNCCLK_SRC_WIDTH			     4
+
+/* (0x0187)  FLL1_Synchroniser_7 */
+#define MADERA_FLL1_SYNC_GAIN_MASK			0x003c
+#define MADERA_FLL1_SYNC_GAIN_SHIFT			     2
+#define MADERA_FLL1_SYNC_GAIN_WIDTH			     4
+#define MADERA_FLL1_SYNC_DFSAT				0x0001
+#define MADERA_FLL1_SYNC_DFSAT_MASK			0x0001
+#define MADERA_FLL1_SYNC_DFSAT_SHIFT			     0
+#define MADERA_FLL1_SYNC_DFSAT_WIDTH			     1
+
+/* (0x01D1)  FLL_AO_Control_1 */
+#define MADERA_FLL_AO_HOLD				0x0004
+#define MADERA_FLL_AO_HOLD_MASK				0x0004
+#define MADERA_FLL_AO_HOLD_SHIFT			     2
+#define MADERA_FLL_AO_HOLD_WIDTH			     1
+#define MADERA_FLL_AO_FREERUN				0x0002
+#define MADERA_FLL_AO_FREERUN_MASK			0x0002
+#define MADERA_FLL_AO_FREERUN_SHIFT			     1
+#define MADERA_FLL_AO_FREERUN_WIDTH			     1
+#define MADERA_FLL_AO_ENA				0x0001
+#define MADERA_FLL_AO_ENA_MASK				0x0001
+#define MADERA_FLL_AO_ENA_SHIFT				     0
+#define MADERA_FLL_AO_ENA_WIDTH				     1
+
+/* (0x01D2)  FLL_AO_Control_2 */
+#define MADERA_FLL_AO_CTRL_UPD				0x8000
+#define MADERA_FLL_AO_CTRL_UPD_MASK			0x8000
+#define MADERA_FLL_AO_CTRL_UPD_SHIFT			    15
+#define MADERA_FLL_AO_CTRL_UPD_WIDTH			     1
+
+/* (0x01D6)  FLL_AO_Control_6 */
+#define MADERA_FLL_AO_REFCLK_SRC_MASK			0x000F
+#define MADERA_FLL_AO_REFCLK_SRC_SHIFT			     0
+#define MADERA_FLL_AO_REFCLK_SRC_WIDTH			     4
+
+/* (0x0200)  Mic_Charge_Pump_1 */
+#define MADERA_CPMIC_BYPASS				0x0002
+#define MADERA_CPMIC_BYPASS_MASK			0x0002
+#define MADERA_CPMIC_BYPASS_SHIFT			     1
+#define MADERA_CPMIC_BYPASS_WIDTH			     1
+#define MADERA_CPMIC_ENA				0x0001
+#define MADERA_CPMIC_ENA_MASK				0x0001
+#define MADERA_CPMIC_ENA_SHIFT				     0
+#define MADERA_CPMIC_ENA_WIDTH				     1
+
+/* (0x0210)  LDO1_Control_1 */
+#define MADERA_LDO1_VSEL_MASK				0x07E0
+#define MADERA_LDO1_VSEL_SHIFT				     5
+#define MADERA_LDO1_VSEL_WIDTH				     6
+#define MADERA_LDO1_FAST				0x0010
+#define MADERA_LDO1_FAST_MASK				0x0010
+#define MADERA_LDO1_FAST_SHIFT				     4
+#define MADERA_LDO1_FAST_WIDTH				     1
+#define MADERA_LDO1_DISCH				0x0004
+#define MADERA_LDO1_DISCH_MASK				0x0004
+#define MADERA_LDO1_DISCH_SHIFT				     2
+#define MADERA_LDO1_DISCH_WIDTH				     1
+#define MADERA_LDO1_BYPASS				0x0002
+#define MADERA_LDO1_BYPASS_MASK				0x0002
+#define MADERA_LDO1_BYPASS_SHIFT			     1
+#define MADERA_LDO1_BYPASS_WIDTH			     1
+#define MADERA_LDO1_ENA					0x0001
+#define MADERA_LDO1_ENA_MASK				0x0001
+#define MADERA_LDO1_ENA_SHIFT				     0
+#define MADERA_LDO1_ENA_WIDTH				     1
+
+/* (0x0213)  LDO2_Control_1 */
+#define MADERA_LDO2_VSEL_MASK				0x07E0
+#define MADERA_LDO2_VSEL_SHIFT				     5
+#define MADERA_LDO2_VSEL_WIDTH				     6
+#define MADERA_LDO2_FAST				0x0010
+#define MADERA_LDO2_FAST_MASK				0x0010
+#define MADERA_LDO2_FAST_SHIFT				     4
+#define MADERA_LDO2_FAST_WIDTH				     1
+#define MADERA_LDO2_DISCH				0x0004
+#define MADERA_LDO2_DISCH_MASK				0x0004
+#define MADERA_LDO2_DISCH_SHIFT				     2
+#define MADERA_LDO2_DISCH_WIDTH				     1
+#define MADERA_LDO2_BYPASS				0x0002
+#define MADERA_LDO2_BYPASS_MASK				0x0002
+#define MADERA_LDO2_BYPASS_SHIFT			     1
+#define MADERA_LDO2_BYPASS_WIDTH			     1
+#define MADERA_LDO2_ENA					0x0001
+#define MADERA_LDO2_ENA_MASK				0x0001
+#define MADERA_LDO2_ENA_SHIFT				     0
+#define MADERA_LDO2_ENA_WIDTH				     1
+
+/* (0x0218)  Mic_Bias_Ctrl_1 */
+#define MADERA_MICB1_ENA				0x0001
+#define MADERA_MICB1_ENA_MASK				0x0001
+#define MADERA_MICB1_ENA_SHIFT				     0
+#define MADERA_MICB1_ENA_WIDTH				     1
+
+/* (0x021C)  Mic_Bias_Ctrl_5 */
+#define MADERA_MICB1D_ENA				0x1000
+#define MADERA_MICB1D_ENA_MASK				0x1000
+#define MADERA_MICB1D_ENA_SHIFT				    12
+#define MADERA_MICB1D_ENA_WIDTH				     1
+#define MADERA_MICB1C_ENA				0x0100
+#define MADERA_MICB1C_ENA_MASK				0x0100
+#define MADERA_MICB1C_ENA_SHIFT				     8
+#define MADERA_MICB1C_ENA_WIDTH				     1
+#define MADERA_MICB1B_ENA				0x0010
+#define MADERA_MICB1B_ENA_MASK				0x0010
+#define MADERA_MICB1B_ENA_SHIFT				     4
+#define MADERA_MICB1B_ENA_WIDTH				     1
+#define MADERA_MICB1A_ENA				0x0001
+#define MADERA_MICB1A_ENA_MASK				0x0001
+#define MADERA_MICB1A_ENA_SHIFT				     0
+#define MADERA_MICB1A_ENA_WIDTH				     1
+
+/* (0x021E)  Mic_Bias_Ctrl_6 */
+#define MADERA_MICB2D_ENA				0x1000
+#define MADERA_MICB2D_ENA_MASK				0x1000
+#define MADERA_MICB2D_ENA_SHIFT				    12
+#define MADERA_MICB2D_ENA_WIDTH				     1
+#define MADERA_MICB2C_ENA				0x0100
+#define MADERA_MICB2C_ENA_MASK				0x0100
+#define MADERA_MICB2C_ENA_SHIFT				     8
+#define MADERA_MICB2C_ENA_WIDTH				     1
+#define MADERA_MICB2B_ENA				0x0010
+#define MADERA_MICB2B_ENA_MASK				0x0010
+#define MADERA_MICB2B_ENA_SHIFT				     4
+#define MADERA_MICB2B_ENA_WIDTH				     1
+#define MADERA_MICB2A_ENA				0x0001
+#define MADERA_MICB2A_ENA_MASK				0x0001
+#define MADERA_MICB2A_ENA_SHIFT				     0
+#define MADERA_MICB2A_ENA_WIDTH				     1
+
+/* (0x0293)  Accessory_Detect_Mode_1 */
+#define MADERA_ACCDET_SRC				0x2000
+#define MADERA_ACCDET_SRC_MASK				0x2000
+#define MADERA_ACCDET_SRC_SHIFT				    13
+#define MADERA_ACCDET_SRC_WIDTH				     1
+#define MADERA_ACCDET_POLARITY_INV_ENA			0x0080
+#define MADERA_ACCDET_POLARITY_INV_ENA_MASK		0x0080
+#define MADERA_ACCDET_POLARITY_INV_ENA_SHIFT		     7
+#define MADERA_ACCDET_POLARITY_INV_ENA_WIDTH		     1
+#define MADERA_ACCDET_MODE_MASK				0x0007
+#define MADERA_ACCDET_MODE_SHIFT			     0
+#define MADERA_ACCDET_MODE_WIDTH			     3
+
+/* (0x0299)  Headphone_Detect_0 */
+#define MADERA_HPD_GND_SEL				0x0007
+#define MADERA_HPD_GND_SEL_MASK				0x0007
+#define MADERA_HPD_GND_SEL_SHIFT			     0
+#define MADERA_HPD_GND_SEL_WIDTH			     3
+#define MADERA_HPD_SENSE_SEL				0x00F0
+#define MADERA_HPD_SENSE_SEL_MASK			0x00F0
+#define MADERA_HPD_SENSE_SEL_SHIFT			     4
+#define MADERA_HPD_SENSE_SEL_WIDTH			     4
+#define MADERA_HPD_FRC_SEL				0x0F00
+#define MADERA_HPD_FRC_SEL_MASK				0x0F00
+#define MADERA_HPD_FRC_SEL_SHIFT			     8
+#define MADERA_HPD_FRC_SEL_WIDTH			     4
+#define MADERA_HPD_OUT_SEL				0x7000
+#define MADERA_HPD_OUT_SEL_MASK				0x7000
+#define MADERA_HPD_OUT_SEL_SHIFT			    12
+#define MADERA_HPD_OUT_SEL_WIDTH			     3
+#define MADERA_HPD_OVD_ENA_SEL				0x8000
+#define MADERA_HPD_OVD_ENA_SEL_MASK			0x8000
+#define MADERA_HPD_OVD_ENA_SEL_SHIFT			    15
+#define MADERA_HPD_OVD_ENA_SEL_WIDTH			     1
+
+/* (0x029B)  Headphone_Detect_1 */
+#define MADERA_HP_IMPEDANCE_RANGE_MASK			0x0600
+#define MADERA_HP_IMPEDANCE_RANGE_SHIFT			     9
+#define MADERA_HP_IMPEDANCE_RANGE_WIDTH			     2
+#define MADERA_HP_STEP_SIZE				0x0100
+#define MADERA_HP_STEP_SIZE_MASK			0x0100
+#define MADERA_HP_STEP_SIZE_SHIFT			     8
+#define MADERA_HP_STEP_SIZE_WIDTH			     1
+#define MADERA_HP_CLK_DIV_MASK				0x0018
+#define MADERA_HP_CLK_DIV_SHIFT				     3
+#define MADERA_HP_CLK_DIV_WIDTH				     2
+#define MADERA_HP_RATE_MASK				0x0006
+#define MADERA_HP_RATE_SHIFT				     1
+#define MADERA_HP_RATE_WIDTH				     2
+#define MADERA_HP_POLL					0x0001
+#define MADERA_HP_POLL_MASK				0x0001
+#define MADERA_HP_POLL_SHIFT				     0
+#define MADERA_HP_POLL_WIDTH				     1
+
+/* (0x029C)  Headphone_Detect_2 */
+#define MADERA_HP_DONE_MASK				0x8000
+#define MADERA_HP_DONE_SHIFT				    15
+#define MADERA_HP_DONE_WIDTH				     1
+#define MADERA_HP_LVL_MASK				0x7FFF
+#define MADERA_HP_LVL_SHIFT				     0
+#define MADERA_HP_LVL_WIDTH				    15
+
+/* (0x029D)  Headphone_Detect_3 */
+#define MADERA_HP_DACVAL_MASK				0x03FF
+#define MADERA_HP_DACVAL_SHIFT				     0
+#define MADERA_HP_DACVAL_WIDTH				    10
+
+/* (0x029F) - Headphone Detect 5 */
+#define MADERA_HP_DACVAL_DOWN_MASK			0x03FF
+#define MADERA_HP_DACVAL_DOWN_SHIFT			     0
+#define MADERA_HP_DACVAL_DOWN_WIDTH			    10
+
+/* (0x02A2)  Mic_Detect_1_Control_0 */
+#define MADERA_MICD1_GND_MASK				0x0007
+#define MADERA_MICD1_GND_SHIFT				     0
+#define MADERA_MICD1_GND_WIDTH				     3
+#define MADERA_MICD1_SENSE_MASK				0x00F0
+#define MADERA_MICD1_SENSE_SHIFT			     4
+#define MADERA_MICD1_SENSE_WIDTH			     4
+#define MADERA_MICD1_ADC_MODE_MASK			0x8000
+#define MADERA_MICD1_ADC_MODE_SHIFT			    15
+#define MADERA_MICD1_ADC_MODE_WIDTH			     1
+
+/* (0x02A3)  Mic_Detect_1_Control_1 */
+#define MADERA_MICD_BIAS_STARTTIME_MASK			0xF000
+#define MADERA_MICD_BIAS_STARTTIME_SHIFT		    12
+#define MADERA_MICD_BIAS_STARTTIME_WIDTH		     4
+#define MADERA_MICD_RATE_MASK				0x0F00
+#define MADERA_MICD_RATE_SHIFT				     8
+#define MADERA_MICD_RATE_WIDTH				     4
+#define MADERA_MICD_BIAS_SRC_MASK			0x00F0
+#define MADERA_MICD_BIAS_SRC_SHIFT			     4
+#define MADERA_MICD_BIAS_SRC_WIDTH			     4
+#define MADERA_MICD_DBTIME				0x0002
+#define MADERA_MICD_DBTIME_MASK				0x0002
+#define MADERA_MICD_DBTIME_SHIFT			     1
+#define MADERA_MICD_DBTIME_WIDTH			     1
+#define MADERA_MICD_ENA					0x0001
+#define MADERA_MICD_ENA_MASK				0x0001
+#define MADERA_MICD_ENA_SHIFT				     0
+#define MADERA_MICD_ENA_WIDTH				     1
+
+/* (0x02A4)  Mic_Detect_1_Control_2 */
+#define MADERA_MICD_LVL_SEL_MASK			0x00FF
+#define MADERA_MICD_LVL_SEL_SHIFT			     0
+#define MADERA_MICD_LVL_SEL_WIDTH			     8
+
+/* (0x02A5)  Mic_Detect_1_Control_3 */
+#define MADERA_MICD_LVL_0				0x0004
+#define MADERA_MICD_LVL_1				0x0008
+#define MADERA_MICD_LVL_2				0x0010
+#define MADERA_MICD_LVL_3				0x0020
+#define MADERA_MICD_LVL_4				0x0040
+#define MADERA_MICD_LVL_5				0x0080
+#define MADERA_MICD_LVL_6				0x0100
+#define MADERA_MICD_LVL_7				0x0200
+#define MADERA_MICD_LVL_8				0x0400
+#define MADERA_MICD_LVL_MASK				0x07FC
+#define MADERA_MICD_LVL_SHIFT				     2
+#define MADERA_MICD_LVL_WIDTH				     9
+#define MADERA_MICD_VALID				0x0002
+#define MADERA_MICD_VALID_MASK				0x0002
+#define MADERA_MICD_VALID_SHIFT				     1
+#define MADERA_MICD_VALID_WIDTH				     1
+#define MADERA_MICD_STS					0x0001
+#define MADERA_MICD_STS_MASK				0x0001
+#define MADERA_MICD_STS_SHIFT				     0
+#define MADERA_MICD_STS_WIDTH				     1
+
+/* (0x02AB)  Mic_Detect_1_Control_4 */
+#define MADERA_MICDET_ADCVAL_DIFF_MASK			0xFF00
+#define MADERA_MICDET_ADCVAL_DIFF_SHIFT			     8
+#define MADERA_MICDET_ADCVAL_DIFF_WIDTH			     8
+#define MADERA_MICDET_ADCVAL_MASK			0x007F
+#define MADERA_MICDET_ADCVAL_SHIFT			     0
+#define MADERA_MICDET_ADCVAL_WIDTH			     7
+
+/* (0x02C6)  Micd_Clamp_control */
+#define MADERA_MICD_CLAMP_OVD				0x0010
+#define MADERA_MICD_CLAMP_OVD_MASK			0x0010
+#define MADERA_MICD_CLAMP_OVD_SHIFT			     4
+#define MADERA_MICD_CLAMP_OVD_WIDTH			     1
+#define MADERA_MICD_CLAMP_MODE_MASK			0x000F
+#define MADERA_MICD_CLAMP_MODE_SHIFT			     0
+#define MADERA_MICD_CLAMP_MODE_WIDTH			     4
+
+/* (0x02C8)  GP_Switch_1 */
+#define MADERA_SW2_MODE_MASK				0x000C
+#define MADERA_SW2_MODE_SHIFT				     2
+#define MADERA_SW2_MODE_WIDTH				     2
+#define MADERA_SW1_MODE_MASK				0x0003
+#define MADERA_SW1_MODE_SHIFT				     0
+#define MADERA_SW1_MODE_WIDTH				     2
+
+/* (0x02D3)  Jack_detect_analogue */
+#define MADERA_JD2_ENA					0x0002
+#define MADERA_JD2_ENA_MASK				0x0002
+#define MADERA_JD2_ENA_SHIFT				     1
+#define MADERA_JD2_ENA_WIDTH				     1
+#define MADERA_JD1_ENA					0x0001
+#define MADERA_JD1_ENA_MASK				0x0001
+#define MADERA_JD1_ENA_SHIFT				     0
+#define MADERA_JD1_ENA_WIDTH				     1
+
+/* (0x0300)  Input_Enables */
+#define MADERA_IN6L_ENA					0x0800
+#define MADERA_IN6L_ENA_MASK				0x0800
+#define MADERA_IN6L_ENA_SHIFT				    11
+#define MADERA_IN6L_ENA_WIDTH				     1
+#define MADERA_IN6R_ENA					0x0400
+#define MADERA_IN6R_ENA_MASK				0x0400
+#define MADERA_IN6R_ENA_SHIFT				    10
+#define MADERA_IN6R_ENA_WIDTH				     1
+#define MADERA_IN5L_ENA					0x0200
+#define MADERA_IN5L_ENA_MASK				0x0200
+#define MADERA_IN5L_ENA_SHIFT				     9
+#define MADERA_IN5L_ENA_WIDTH				     1
+#define MADERA_IN5R_ENA					0x0100
+#define MADERA_IN5R_ENA_MASK				0x0100
+#define MADERA_IN5R_ENA_SHIFT				     8
+#define MADERA_IN5R_ENA_WIDTH				     1
+#define MADERA_IN4L_ENA					0x0080
+#define MADERA_IN4L_ENA_MASK				0x0080
+#define MADERA_IN4L_ENA_SHIFT				     7
+#define MADERA_IN4L_ENA_WIDTH				     1
+#define MADERA_IN4R_ENA					0x0040
+#define MADERA_IN4R_ENA_MASK				0x0040
+#define MADERA_IN4R_ENA_SHIFT				     6
+#define MADERA_IN4R_ENA_WIDTH				     1
+#define MADERA_IN3L_ENA					0x0020
+#define MADERA_IN3L_ENA_MASK				0x0020
+#define MADERA_IN3L_ENA_SHIFT				     5
+#define MADERA_IN3L_ENA_WIDTH				     1
+#define MADERA_IN3R_ENA					0x0010
+#define MADERA_IN3R_ENA_MASK				0x0010
+#define MADERA_IN3R_ENA_SHIFT				     4
+#define MADERA_IN3R_ENA_WIDTH				     1
+#define MADERA_IN2L_ENA					0x0008
+#define MADERA_IN2L_ENA_MASK				0x0008
+#define MADERA_IN2L_ENA_SHIFT				     3
+#define MADERA_IN2L_ENA_WIDTH				     1
+#define MADERA_IN2R_ENA					0x0004
+#define MADERA_IN2R_ENA_MASK				0x0004
+#define MADERA_IN2R_ENA_SHIFT				     2
+#define MADERA_IN2R_ENA_WIDTH				     1
+#define MADERA_IN1L_ENA					0x0002
+#define MADERA_IN1L_ENA_MASK				0x0002
+#define MADERA_IN1L_ENA_SHIFT				     1
+#define MADERA_IN1L_ENA_WIDTH				     1
+#define MADERA_IN1R_ENA					0x0001
+#define MADERA_IN1R_ENA_MASK				0x0001
+#define MADERA_IN1R_ENA_SHIFT				     0
+#define MADERA_IN1R_ENA_WIDTH				     1
+
+/* (0x0308)  Input_Rate */
+#define MADERA_IN_RATE_MASK				0xF800
+#define MADERA_IN_RATE_SHIFT				    11
+#define MADERA_IN_RATE_WIDTH				     5
+#define MADERA_IN_MODE_MASK				0x0400
+#define MADERA_IN_MODE_SHIFT				    10
+#define MADERA_IN_MODE_WIDTH				     1
+
+/* (0x0309)  Input_Volume_Ramp */
+#define MADERA_IN_VD_RAMP_MASK				0x0070
+#define MADERA_IN_VD_RAMP_SHIFT				     4
+#define MADERA_IN_VD_RAMP_WIDTH				     3
+#define MADERA_IN_VI_RAMP_MASK				0x0007
+#define MADERA_IN_VI_RAMP_SHIFT				     0
+#define MADERA_IN_VI_RAMP_WIDTH				     3
+
+/* (0x030C)  HPF_Control */
+#define MADERA_IN_HPF_CUT_MASK				0x0007
+#define MADERA_IN_HPF_CUT_SHIFT				     0
+#define MADERA_IN_HPF_CUT_WIDTH				     3
+
+/* (0x0310)  IN1L_Control */
+#define MADERA_IN1L_HPF_MASK				0x8000
+#define MADERA_IN1L_HPF_SHIFT				    15
+#define MADERA_IN1L_HPF_WIDTH				     1
+#define MADERA_IN1_DMIC_SUP_MASK			0x1800
+#define MADERA_IN1_DMIC_SUP_SHIFT			    11
+#define MADERA_IN1_DMIC_SUP_WIDTH			     2
+#define MADERA_IN1_MODE_MASK				0x0400
+#define MADERA_IN1_MODE_SHIFT				    10
+#define MADERA_IN1_MODE_WIDTH				     1
+#define MADERA_IN1L_PGA_VOL_MASK			0x00FE
+#define MADERA_IN1L_PGA_VOL_SHIFT			     1
+#define MADERA_IN1L_PGA_VOL_WIDTH			     7
+
+/* (0x0311)  ADC_Digital_Volume_1L */
+#define MADERA_IN1L_SRC_MASK				0x4000
+#define MADERA_IN1L_SRC_SHIFT				    14
+#define MADERA_IN1L_SRC_WIDTH				     1
+#define MADERA_IN1L_SRC_SE_MASK				0x2000
+#define MADERA_IN1L_SRC_SE_SHIFT			    13
+#define MADERA_IN1L_SRC_SE_WIDTH			     1
+#define MADERA_IN1L_LP_MODE				0x0800
+#define MADERA_IN1L_LP_MODE_MASK			0x0800
+#define MADERA_IN1L_LP_MODE_SHIFT			    11
+#define MADERA_IN1L_LP_MODE_WIDTH			     1
+#define MADERA_IN_VU					0x0200
+#define MADERA_IN_VU_MASK				0x0200
+#define MADERA_IN_VU_SHIFT				     9
+#define MADERA_IN_VU_WIDTH				     1
+#define MADERA_IN1L_MUTE				0x0100
+#define MADERA_IN1L_MUTE_MASK				0x0100
+#define MADERA_IN1L_MUTE_SHIFT				     8
+#define MADERA_IN1L_MUTE_WIDTH				     1
+#define MADERA_IN1L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN1L_DIG_VOL_SHIFT			     0
+#define MADERA_IN1L_DIG_VOL_WIDTH			     8
+
+/* (0x0312)  DMIC1L_Control */
+#define MADERA_IN1_OSR_MASK				0x0700
+#define MADERA_IN1_OSR_SHIFT				     8
+#define MADERA_IN1_OSR_WIDTH				     3
+
+/* (0x0313)  IN1L_Rate_Control */
+#define MADERA_IN1L_RATE_MASK				0xF800
+#define MADERA_IN1L_RATE_SHIFT				    11
+#define MADERA_IN1L_RATE_WIDTH				     5
+
+/* (0x0314)  IN1R_Control */
+#define MADERA_IN1R_HPF_MASK				0x8000
+#define MADERA_IN1R_HPF_SHIFT				    15
+#define MADERA_IN1R_HPF_WIDTH				     1
+#define MADERA_IN1R_PGA_VOL_MASK			0x00FE
+#define MADERA_IN1R_PGA_VOL_SHIFT			     1
+#define MADERA_IN1R_PGA_VOL_WIDTH			     7
+#define MADERA_IN1_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN1_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN1_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0315)  ADC_Digital_Volume_1R */
+#define MADERA_IN1R_SRC_MASK				0x4000
+#define MADERA_IN1R_SRC_SHIFT				    14
+#define MADERA_IN1R_SRC_WIDTH				     1
+#define MADERA_IN1R_SRC_SE_MASK				0x2000
+#define MADERA_IN1R_SRC_SE_SHIFT			    13
+#define MADERA_IN1R_SRC_SE_WIDTH			     1
+#define MADERA_IN1R_LP_MODE				0x0800
+#define MADERA_IN1R_LP_MODE_MASK			0x0800
+#define MADERA_IN1R_LP_MODE_SHIFT			    11
+#define MADERA_IN1R_LP_MODE_WIDTH			     1
+#define MADERA_IN1R_MUTE				0x0100
+#define MADERA_IN1R_MUTE_MASK				0x0100
+#define MADERA_IN1R_MUTE_SHIFT				     8
+#define MADERA_IN1R_MUTE_WIDTH				     1
+#define MADERA_IN1R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN1R_DIG_VOL_SHIFT			     0
+#define MADERA_IN1R_DIG_VOL_WIDTH			     8
+
+/* (0x0317)  IN1R_Rate_Control */
+#define MADERA_IN1R_RATE_MASK				0xF800
+#define MADERA_IN1R_RATE_SHIFT				    11
+#define MADERA_IN1R_RATE_WIDTH				     5
+
+/* (0x0318)  IN2L_Control */
+#define MADERA_IN2L_HPF_MASK				0x8000
+#define MADERA_IN2L_HPF_SHIFT				    15
+#define MADERA_IN2L_HPF_WIDTH				     1
+#define MADERA_IN2_DMIC_SUP_MASK			0x1800
+#define MADERA_IN2_DMIC_SUP_SHIFT			    11
+#define MADERA_IN2_DMIC_SUP_WIDTH			     2
+#define MADERA_IN2_MODE_MASK				0x0400
+#define MADERA_IN2_MODE_SHIFT				    10
+#define MADERA_IN2_MODE_WIDTH				     1
+#define MADERA_IN2L_PGA_VOL_MASK			0x00FE
+#define MADERA_IN2L_PGA_VOL_SHIFT			     1
+#define MADERA_IN2L_PGA_VOL_WIDTH			     7
+
+/* (0x0319)  ADC_Digital_Volume_2L */
+#define MADERA_IN2L_SRC_MASK				0x4000
+#define MADERA_IN2L_SRC_SHIFT				    14
+#define MADERA_IN2L_SRC_WIDTH				     1
+#define MADERA_IN2L_SRC_SE_MASK				0x2000
+#define MADERA_IN2L_SRC_SE_SHIFT			    13
+#define MADERA_IN2L_SRC_SE_WIDTH			     1
+#define MADERA_IN2L_LP_MODE				0x0800
+#define MADERA_IN2L_LP_MODE_MASK			0x0800
+#define MADERA_IN2L_LP_MODE_SHIFT			    11
+#define MADERA_IN2L_LP_MODE_WIDTH			     1
+#define MADERA_IN2L_MUTE				0x0100
+#define MADERA_IN2L_MUTE_MASK				0x0100
+#define MADERA_IN2L_MUTE_SHIFT				     8
+#define MADERA_IN2L_MUTE_WIDTH				     1
+#define MADERA_IN2L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN2L_DIG_VOL_SHIFT			     0
+#define MADERA_IN2L_DIG_VOL_WIDTH			     8
+
+/* (0x031A)  DMIC2L_Control */
+#define MADERA_IN2_OSR_MASK				0x0700
+#define MADERA_IN2_OSR_SHIFT				     8
+#define MADERA_IN2_OSR_WIDTH				     3
+
+/* (0x031C)  IN2R_Control */
+#define MADERA_IN2R_HPF_MASK				0x8000
+#define MADERA_IN2R_HPF_SHIFT				    15
+#define MADERA_IN2R_HPF_WIDTH				     1
+#define MADERA_IN2R_PGA_VOL_MASK			0x00FE
+#define MADERA_IN2R_PGA_VOL_SHIFT			     1
+#define MADERA_IN2R_PGA_VOL_WIDTH			     7
+#define MADERA_IN2_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN2_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN2_DMICCLK_SRC_WIDTH			     2
+
+/* (0x031D)  ADC_Digital_Volume_2R */
+#define MADERA_IN2R_SRC_MASK				0x4000
+#define MADERA_IN2R_SRC_SHIFT				    14
+#define MADERA_IN2R_SRC_WIDTH				     1
+#define MADERA_IN2R_SRC_SE_MASK				0x2000
+#define MADERA_IN2R_SRC_SE_SHIFT			    13
+#define MADERA_IN2R_SRC_SE_WIDTH			     1
+#define MADERA_IN2R_LP_MODE				0x0800
+#define MADERA_IN2R_LP_MODE_MASK			0x0800
+#define MADERA_IN2R_LP_MODE_SHIFT			    11
+#define MADERA_IN2R_LP_MODE_WIDTH			     1
+#define MADERA_IN2R_MUTE				0x0100
+#define MADERA_IN2R_MUTE_MASK				0x0100
+#define MADERA_IN2R_MUTE_SHIFT				     8
+#define MADERA_IN2R_MUTE_WIDTH				     1
+#define MADERA_IN2R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN2R_DIG_VOL_SHIFT			     0
+#define MADERA_IN2R_DIG_VOL_WIDTH			     8
+
+/* (0x0320)  IN3L_Control */
+#define MADERA_IN3L_HPF_MASK				0x8000
+#define MADERA_IN3L_HPF_SHIFT				    15
+#define MADERA_IN3L_HPF_WIDTH				     1
+#define MADERA_IN3_DMIC_SUP_MASK			0x1800
+#define MADERA_IN3_DMIC_SUP_SHIFT			    11
+#define MADERA_IN3_DMIC_SUP_WIDTH			     2
+#define MADERA_IN3_MODE_MASK				0x0400
+#define MADERA_IN3_MODE_SHIFT				    10
+#define MADERA_IN3_MODE_WIDTH				     1
+#define MADERA_IN3L_PGA_VOL_MASK			0x00FE
+#define MADERA_IN3L_PGA_VOL_SHIFT			     1
+#define MADERA_IN3L_PGA_VOL_WIDTH			     7
+
+/* (0x0321)  ADC_Digital_Volume_3L */
+#define MADERA_IN3L_MUTE				0x0100
+#define MADERA_IN3L_MUTE_MASK				0x0100
+#define MADERA_IN3L_MUTE_SHIFT				     8
+#define MADERA_IN3L_MUTE_WIDTH				     1
+#define MADERA_IN3L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN3L_DIG_VOL_SHIFT			     0
+#define MADERA_IN3L_DIG_VOL_WIDTH			     8
+
+/* (0x0322)  DMIC3L_Control */
+#define MADERA_IN3_OSR_MASK				0x0700
+#define MADERA_IN3_OSR_SHIFT				     8
+#define MADERA_IN3_OSR_WIDTH				     3
+
+/* (0x0324)  IN3R_Control */
+#define MADERA_IN3R_HPF_MASK				0x8000
+#define MADERA_IN3R_HPF_SHIFT				    15
+#define MADERA_IN3R_HPF_WIDTH				     1
+#define MADERA_IN3R_PGA_VOL_MASK			0x00FE
+#define MADERA_IN3R_PGA_VOL_SHIFT			     1
+#define MADERA_IN3R_PGA_VOL_WIDTH			     7
+#define MADERA_IN3_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN3_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN3_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0325)  ADC_Digital_Volume_3R */
+#define MADERA_IN3R_MUTE				0x0100
+#define MADERA_IN3R_MUTE_MASK				0x0100
+#define MADERA_IN3R_MUTE_SHIFT				     8
+#define MADERA_IN3R_MUTE_WIDTH				     1
+#define MADERA_IN3R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN3R_DIG_VOL_SHIFT			     0
+#define MADERA_IN3R_DIG_VOL_WIDTH			     8
+
+/* (0x0328)  IN4L_Control */
+#define MADERA_IN4L_HPF_MASK				0x8000
+#define MADERA_IN4L_HPF_SHIFT				    15
+#define MADERA_IN4L_HPF_WIDTH				     1
+#define MADERA_IN4_DMIC_SUP_MASK			0x1800
+#define MADERA_IN4_DMIC_SUP_SHIFT			    11
+#define MADERA_IN4_DMIC_SUP_WIDTH			     2
+
+/* (0x0329)  ADC_Digital_Volume_4L */
+#define MADERA_IN4L_MUTE				0x0100
+#define MADERA_IN4L_MUTE_MASK				0x0100
+#define MADERA_IN4L_MUTE_SHIFT				     8
+#define MADERA_IN4L_MUTE_WIDTH				     1
+#define MADERA_IN4L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN4L_DIG_VOL_SHIFT			     0
+#define MADERA_IN4L_DIG_VOL_WIDTH			     8
+
+/* (0x032A)  DMIC4L_Control */
+#define MADERA_IN4_OSR_MASK				0x0700
+#define MADERA_IN4_OSR_SHIFT				     8
+#define MADERA_IN4_OSR_WIDTH				     3
+
+/* (0x032C)  IN4R_Control */
+#define MADERA_IN4R_HPF_MASK				0x8000
+#define MADERA_IN4R_HPF_SHIFT				    15
+#define MADERA_IN4R_HPF_WIDTH				     1
+#define MADERA_IN4_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN4_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN4_DMICCLK_SRC_WIDTH			     2
+
+/* (0x032D)  ADC_Digital_Volume_4R */
+#define MADERA_IN4R_MUTE				0x0100
+#define MADERA_IN4R_MUTE_MASK				0x0100
+#define MADERA_IN4R_MUTE_SHIFT				     8
+#define MADERA_IN4R_MUTE_WIDTH				     1
+#define MADERA_IN4R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN4R_DIG_VOL_SHIFT			     0
+#define MADERA_IN4R_DIG_VOL_WIDTH			     8
+
+/* (0x0330)  IN5L_Control */
+#define MADERA_IN5L_HPF_MASK				0x8000
+#define MADERA_IN5L_HPF_SHIFT				    15
+#define MADERA_IN5L_HPF_WIDTH				     1
+#define MADERA_IN5_DMIC_SUP_MASK			0x1800
+#define MADERA_IN5_DMIC_SUP_SHIFT			    11
+#define MADERA_IN5_DMIC_SUP_WIDTH			     2
+
+/* (0x0331)  ADC_Digital_Volume_5L */
+#define MADERA_IN5L_MUTE				0x0100
+#define MADERA_IN5L_MUTE_MASK				0x0100
+#define MADERA_IN5L_MUTE_SHIFT				     8
+#define MADERA_IN5L_MUTE_WIDTH				     1
+#define MADERA_IN5L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN5L_DIG_VOL_SHIFT			     0
+#define MADERA_IN5L_DIG_VOL_WIDTH			     8
+
+/* (0x0332)  DMIC5L_Control */
+#define MADERA_IN5_OSR_MASK				0x0700
+#define MADERA_IN5_OSR_SHIFT				     8
+#define MADERA_IN5_OSR_WIDTH				     3
+
+/* (0x0334)  IN5R_Control */
+#define MADERA_IN5R_HPF_MASK				0x8000
+#define MADERA_IN5R_HPF_SHIFT				    15
+#define MADERA_IN5R_HPF_WIDTH				     1
+#define MADERA_IN5_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN5_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN5_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0335)  ADC_Digital_Volume_5R */
+#define MADERA_IN5R_MUTE				0x0100
+#define MADERA_IN5R_MUTE_MASK				0x0100
+#define MADERA_IN5R_MUTE_SHIFT				     8
+#define MADERA_IN5R_MUTE_WIDTH				     1
+#define MADERA_IN5R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN5R_DIG_VOL_SHIFT			     0
+#define MADERA_IN5R_DIG_VOL_WIDTH			     8
+
+/* (0x0338)  IN6L_Control */
+#define MADERA_IN6L_HPF_MASK				0x8000
+#define MADERA_IN6L_HPF_SHIFT				    15
+#define MADERA_IN6L_HPF_WIDTH				     1
+#define MADERA_IN6_DMIC_SUP_MASK			0x1800
+#define MADERA_IN6_DMIC_SUP_SHIFT			    11
+#define MADERA_IN6_DMIC_SUP_WIDTH			     2
+
+/* (0x0339)  ADC_Digital_Volume_6L */
+#define MADERA_IN6L_MUTE				0x0100
+#define MADERA_IN6L_MUTE_MASK				0x0100
+#define MADERA_IN6L_MUTE_SHIFT				     8
+#define MADERA_IN6L_MUTE_WIDTH				     1
+#define MADERA_IN6L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN6L_DIG_VOL_SHIFT			     0
+#define MADERA_IN6L_DIG_VOL_WIDTH			     8
+
+/* (0x033A)  DMIC6L_Control */
+#define MADERA_IN6_OSR_MASK				0x0700
+#define MADERA_IN6_OSR_SHIFT				     8
+#define MADERA_IN6_OSR_WIDTH				     3
+
+/* (0x033C)  IN6R_Control */
+#define MADERA_IN6R_HPF_MASK				0x8000
+#define MADERA_IN6R_HPF_SHIFT				    15
+#define MADERA_IN6R_HPF_WIDTH				     1
+
+/* (0x033D)  ADC_Digital_Volume_6R */
+#define MADERA_IN6R_MUTE				0x0100
+#define MADERA_IN6R_MUTE_MASK				0x0100
+#define MADERA_IN6R_MUTE_SHIFT				     8
+#define MADERA_IN6R_MUTE_WIDTH				     1
+#define MADERA_IN6R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN6R_DIG_VOL_SHIFT			     0
+#define MADERA_IN6R_DIG_VOL_WIDTH			     8
+
+/* (0x033E)  DMIC6R_Control */
+#define MADERA_IN6_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN6_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN6_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0400)  Output_Enables_1 */
+#define MADERA_EP_SEL					0x8000
+#define MADERA_EP_SEL_MASK				0x8000
+#define MADERA_EP_SEL_SHIFT				    15
+#define MADERA_EP_SEL_WIDTH				     1
+#define MADERA_OUT6L_ENA				0x0800
+#define MADERA_OUT6L_ENA_MASK				0x0800
+#define MADERA_OUT6L_ENA_SHIFT				    11
+#define MADERA_OUT6L_ENA_WIDTH				     1
+#define MADERA_OUT6R_ENA				0x0400
+#define MADERA_OUT6R_ENA_MASK				0x0400
+#define MADERA_OUT6R_ENA_SHIFT				    10
+#define MADERA_OUT6R_ENA_WIDTH				     1
+#define MADERA_OUT5L_ENA				0x0200
+#define MADERA_OUT5L_ENA_MASK				0x0200
+#define MADERA_OUT5L_ENA_SHIFT				     9
+#define MADERA_OUT5L_ENA_WIDTH				     1
+#define MADERA_OUT5R_ENA				0x0100
+#define MADERA_OUT5R_ENA_MASK				0x0100
+#define MADERA_OUT5R_ENA_SHIFT				     8
+#define MADERA_OUT5R_ENA_WIDTH				     1
+#define MADERA_OUT4L_ENA				0x0080
+#define MADERA_OUT4L_ENA_MASK				0x0080
+#define MADERA_OUT4L_ENA_SHIFT				     7
+#define MADERA_OUT4L_ENA_WIDTH				     1
+#define MADERA_OUT4R_ENA				0x0040
+#define MADERA_OUT4R_ENA_MASK				0x0040
+#define MADERA_OUT4R_ENA_SHIFT				     6
+#define MADERA_OUT4R_ENA_WIDTH				     1
+#define MADERA_OUT3L_ENA				0x0020
+#define MADERA_OUT3L_ENA_MASK				0x0020
+#define MADERA_OUT3L_ENA_SHIFT				     5
+#define MADERA_OUT3L_ENA_WIDTH				     1
+#define MADERA_OUT3R_ENA				0x0010
+#define MADERA_OUT3R_ENA_MASK				0x0010
+#define MADERA_OUT3R_ENA_SHIFT				     4
+#define MADERA_OUT3R_ENA_WIDTH				     1
+#define MADERA_OUT2L_ENA				0x0008
+#define MADERA_OUT2L_ENA_MASK				0x0008
+#define MADERA_OUT2L_ENA_SHIFT				     3
+#define MADERA_OUT2L_ENA_WIDTH				     1
+#define MADERA_OUT2R_ENA				0x0004
+#define MADERA_OUT2R_ENA_MASK				0x0004
+#define MADERA_OUT2R_ENA_SHIFT				     2
+#define MADERA_OUT2R_ENA_WIDTH				     1
+#define MADERA_OUT1L_ENA				0x0002
+#define MADERA_OUT1L_ENA_MASK				0x0002
+#define MADERA_OUT1L_ENA_SHIFT				     1
+#define MADERA_OUT1L_ENA_WIDTH				     1
+#define MADERA_OUT1R_ENA				0x0001
+#define MADERA_OUT1R_ENA_MASK				0x0001
+#define MADERA_OUT1R_ENA_SHIFT				     0
+#define MADERA_OUT1R_ENA_WIDTH				     1
+
+/* (0x0409)  Output_Volume_Ramp */
+#define MADERA_OUT_VD_RAMP_MASK				0x0070
+#define MADERA_OUT_VD_RAMP_SHIFT			     4
+#define MADERA_OUT_VD_RAMP_WIDTH			     3
+#define MADERA_OUT_VI_RAMP_MASK				0x0007
+#define MADERA_OUT_VI_RAMP_SHIFT			     0
+#define MADERA_OUT_VI_RAMP_WIDTH			     3
+
+/* (0x0410)  Output_Path_Config_1L */
+#define MADERA_OUT1_MONO				0x1000
+#define MADERA_OUT1_MONO_MASK				0x1000
+#define MADERA_OUT1_MONO_SHIFT				    12
+#define MADERA_OUT1_MONO_WIDTH				     1
+#define MADERA_OUT1L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT1L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT1L_ANC_SRC_WIDTH			     2
+
+/* (0x0411)  DAC_Digital_Volume_1L */
+#define MADERA_OUT1L_VU					0x0200
+#define MADERA_OUT1L_VU_MASK				0x0200
+#define MADERA_OUT1L_VU_SHIFT				     9
+#define MADERA_OUT1L_VU_WIDTH				     1
+#define MADERA_OUT1L_MUTE				0x0100
+#define MADERA_OUT1L_MUTE_MASK				0x0100
+#define MADERA_OUT1L_MUTE_SHIFT				     8
+#define MADERA_OUT1L_MUTE_WIDTH				     1
+#define MADERA_OUT1L_VOL_MASK				0x00FF
+#define MADERA_OUT1L_VOL_SHIFT				     0
+#define MADERA_OUT1L_VOL_WIDTH				     8
+
+/* (0x0412)  Output_Path_Config_1 */
+#define MADERA_HP1_GND_SEL_MASK				0x0007
+#define MADERA_HP1_GND_SEL_SHIFT			     0
+#define MADERA_HP1_GND_SEL_WIDTH			     3
+
+/* (0x0414)  Output_Path_Config_1R */
+#define MADERA_OUT1R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT1R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT1R_ANC_SRC_WIDTH			     2
+
+/* (0x0415)  DAC_Digital_Volume_1R */
+#define MADERA_OUT1R_MUTE				0x0100
+#define MADERA_OUT1R_MUTE_MASK				0x0100
+#define MADERA_OUT1R_MUTE_SHIFT				     8
+#define MADERA_OUT1R_MUTE_WIDTH				     1
+#define MADERA_OUT1R_VOL_MASK				0x00FF
+#define MADERA_OUT1R_VOL_SHIFT				     0
+#define MADERA_OUT1R_VOL_WIDTH				     8
+
+/* (0x0418)  Output_Path_Config_2L */
+#define MADERA_OUT2L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT2L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT2L_ANC_SRC_WIDTH			     2
+
+/* (0x0419)  DAC_Digital_Volume_2L */
+#define MADERA_OUT2L_MUTE				0x0100
+#define MADERA_OUT2L_MUTE_MASK				0x0100
+#define MADERA_OUT2L_MUTE_SHIFT				     8
+#define MADERA_OUT2L_MUTE_WIDTH				     1
+#define MADERA_OUT2L_VOL_MASK				0x00FF
+#define MADERA_OUT2L_VOL_SHIFT				     0
+#define MADERA_OUT2L_VOL_WIDTH				     8
+
+/* (0x041A)  Output_Path_Config_2 */
+#define MADERA_HP2_GND_SEL_MASK				0x0007
+#define MADERA_HP2_GND_SEL_SHIFT			     0
+#define MADERA_HP2_GND_SEL_WIDTH			     3
+
+/* (0x041C)  Output_Path_Config_2R */
+#define MADERA_OUT2R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT2R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT2R_ANC_SRC_WIDTH			     2
+
+/* (0x041D)  DAC_Digital_Volume_2R */
+#define MADERA_OUT2R_MUTE				0x0100
+#define MADERA_OUT2R_MUTE_MASK				0x0100
+#define MADERA_OUT2R_MUTE_SHIFT				     8
+#define MADERA_OUT2R_MUTE_WIDTH				     1
+#define MADERA_OUT2R_VOL_MASK				0x00FF
+#define MADERA_OUT2R_VOL_SHIFT				     0
+#define MADERA_OUT2R_VOL_WIDTH				     8
+
+/* (0x0420)  Output_Path_Config_3L */
+#define MADERA_OUT3L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT3L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT3L_ANC_SRC_WIDTH			     2
+
+/* (0x0421)  DAC_Digital_Volume_3L */
+#define MADERA_OUT3L_MUTE				0x0100
+#define MADERA_OUT3L_MUTE_MASK				0x0100
+#define MADERA_OUT3L_MUTE_SHIFT				     8
+#define MADERA_OUT3L_MUTE_WIDTH				     1
+#define MADERA_OUT3L_VOL_MASK				0x00FF
+#define MADERA_OUT3L_VOL_SHIFT				     0
+#define MADERA_OUT3L_VOL_WIDTH				     8
+
+/* (0x0424)  Output_Path_Config_3R */
+#define MADERA_OUT3R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT3R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT3R_ANC_SRC_WIDTH			     2
+
+/* (0x0425)  DAC_Digital_Volume_3R */
+#define MADERA_OUT3R_MUTE				0x0100
+#define MADERA_OUT3R_MUTE_MASK				0x0100
+#define MADERA_OUT3R_MUTE_SHIFT				     8
+#define MADERA_OUT3R_MUTE_WIDTH				     1
+#define MADERA_OUT3R_VOL_MASK				0x00FF
+#define MADERA_OUT3R_VOL_SHIFT				     0
+#define MADERA_OUT3R_VOL_WIDTH				     8
+
+/* (0x0428)  Output_Path_Config_4L */
+#define MADERA_OUT4L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT4L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT4L_ANC_SRC_WIDTH			     2
+
+/* (0x0429)  DAC_Digital_Volume_4L */
+#define MADERA_OUT4L_MUTE				0x0100
+#define MADERA_OUT4L_MUTE_MASK				0x0100
+#define MADERA_OUT4L_MUTE_SHIFT				     8
+#define MADERA_OUT4L_MUTE_WIDTH				     1
+#define MADERA_OUT4L_VOL_MASK				0x00FF
+#define MADERA_OUT4L_VOL_SHIFT				     0
+#define MADERA_OUT4L_VOL_WIDTH				     8
+
+/* (0x042C)  Output_Path_Config_4R */
+#define MADERA_OUT4R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT4R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT4R_ANC_SRC_WIDTH			     2
+
+/* (0x042D)  DAC_Digital_Volume_4R */
+#define MADERA_OUT4R_MUTE				0x0100
+#define MADERA_OUT4R_MUTE_MASK				0x0100
+#define MADERA_OUT4R_MUTE_SHIFT				     8
+#define MADERA_OUT4R_MUTE_WIDTH				     1
+#define MADERA_OUT4R_VOL_MASK				0x00FF
+#define MADERA_OUT4R_VOL_SHIFT				     0
+#define MADERA_OUT4R_VOL_WIDTH				     8
+
+/* (0x0430)  Output_Path_Config_5L */
+#define MADERA_OUT5_OSR					0x2000
+#define MADERA_OUT5_OSR_MASK				0x2000
+#define MADERA_OUT5_OSR_SHIFT				    13
+#define MADERA_OUT5_OSR_WIDTH				     1
+#define MADERA_OUT5L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT5L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT5L_ANC_SRC_WIDTH			     2
+
+/* (0x0431)  DAC_Digital_Volume_5L */
+#define MADERA_OUT5L_MUTE				0x0100
+#define MADERA_OUT5L_MUTE_MASK				0x0100
+#define MADERA_OUT5L_MUTE_SHIFT				     8
+#define MADERA_OUT5L_MUTE_WIDTH				     1
+#define MADERA_OUT5L_VOL_MASK				0x00FF
+#define MADERA_OUT5L_VOL_SHIFT				     0
+#define MADERA_OUT5L_VOL_WIDTH				     8
+
+/* (0x0434)  Output_Path_Config_5R */
+#define MADERA_OUT5R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT5R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT5R_ANC_SRC_WIDTH			     2
+
+/* (0x0435)  DAC_Digital_Volume_5R */
+#define MADERA_OUT5R_MUTE				0x0100
+#define MADERA_OUT5R_MUTE_MASK				0x0100
+#define MADERA_OUT5R_MUTE_SHIFT				     8
+#define MADERA_OUT5R_MUTE_WIDTH				     1
+#define MADERA_OUT5R_VOL_MASK				0x00FF
+#define MADERA_OUT5R_VOL_SHIFT				     0
+#define MADERA_OUT5R_VOL_WIDTH				     8
+
+/* (0x0438)  Output_Path_Config_6L */
+#define MADERA_OUT6_OSR					0x2000
+#define MADERA_OUT6_OSR_MASK				0x2000
+#define MADERA_OUT6_OSR_SHIFT				    13
+#define MADERA_OUT6_OSR_WIDTH				     1
+#define MADERA_OUT6L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT6L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT6L_ANC_SRC_WIDTH			     2
+
+/* (0x0439)  DAC_Digital_Volume_6L */
+#define MADERA_OUT6L_MUTE				0x0100
+#define MADERA_OUT6L_MUTE_MASK				0x0100
+#define MADERA_OUT6L_MUTE_SHIFT				     8
+#define MADERA_OUT6L_MUTE_WIDTH				     1
+#define MADERA_OUT6L_VOL_MASK				0x00FF
+#define MADERA_OUT6L_VOL_SHIFT				     0
+#define MADERA_OUT6L_VOL_WIDTH				     8
+
+/* (0x043C)  Output_Path_Config_6R */
+#define MADERA_OUT6R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT6R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT6R_ANC_SRC_WIDTH			     2
+
+/* (0x043D)  DAC_Digital_Volume_6R */
+#define MADERA_OUT6R_MUTE				0x0100
+#define MADERA_OUT6R_MUTE_MASK				0x0100
+#define MADERA_OUT6R_MUTE_SHIFT				     8
+#define MADERA_OUT6R_MUTE_WIDTH				     1
+#define MADERA_OUT6R_VOL_MASK				0x00FF
+#define MADERA_OUT6R_VOL_SHIFT				     0
+#define MADERA_OUT6R_VOL_WIDTH				     8
+
+/* (0x0450) - DAC AEC Control 1 */
+#define MADERA_AEC1_LOOPBACK_SRC_MASK			0x003C
+#define MADERA_AEC1_LOOPBACK_SRC_SHIFT			     2
+#define MADERA_AEC1_LOOPBACK_SRC_WIDTH			     4
+#define MADERA_AEC1_ENA_STS				0x0002
+#define MADERA_AEC1_ENA_STS_MASK			0x0002
+#define MADERA_AEC1_ENA_STS_SHIFT			     1
+#define MADERA_AEC1_ENA_STS_WIDTH			     1
+#define MADERA_AEC1_LOOPBACK_ENA			0x0001
+#define MADERA_AEC1_LOOPBACK_ENA_MASK			0x0001
+#define MADERA_AEC1_LOOPBACK_ENA_SHIFT			     0
+#define MADERA_AEC1_LOOPBACK_ENA_WIDTH			     1
+
+/* (0x0451)  DAC_AEC_Control_2 */
+#define MADERA_AEC2_LOOPBACK_SRC_MASK			0x003C
+#define MADERA_AEC2_LOOPBACK_SRC_SHIFT			     2
+#define MADERA_AEC2_LOOPBACK_SRC_WIDTH			     4
+#define MADERA_AEC2_ENA_STS				0x0002
+#define MADERA_AEC2_ENA_STS_MASK			0x0002
+#define MADERA_AEC2_ENA_STS_SHIFT			     1
+#define MADERA_AEC2_ENA_STS_WIDTH			     1
+#define MADERA_AEC2_LOOPBACK_ENA			0x0001
+#define MADERA_AEC2_LOOPBACK_ENA_MASK			0x0001
+#define MADERA_AEC2_LOOPBACK_ENA_SHIFT			     0
+#define MADERA_AEC2_LOOPBACK_ENA_WIDTH			     1
+
+/* (0x0458)  Noise_Gate_Control */
+#define MADERA_NGATE_HOLD_MASK				0x0030
+#define MADERA_NGATE_HOLD_SHIFT				     4
+#define MADERA_NGATE_HOLD_WIDTH				     2
+#define MADERA_NGATE_THR_MASK				0x000E
+#define MADERA_NGATE_THR_SHIFT				     1
+#define MADERA_NGATE_THR_WIDTH				     3
+#define MADERA_NGATE_ENA				0x0001
+#define MADERA_NGATE_ENA_MASK				0x0001
+#define MADERA_NGATE_ENA_SHIFT				     0
+#define MADERA_NGATE_ENA_WIDTH				     1
+
+/* (0x0490)  PDM_SPK1_CTRL_1 */
+#define MADERA_SPK1R_MUTE				0x2000
+#define MADERA_SPK1R_MUTE_MASK				0x2000
+#define MADERA_SPK1R_MUTE_SHIFT				    13
+#define MADERA_SPK1R_MUTE_WIDTH				     1
+#define MADERA_SPK1L_MUTE				0x1000
+#define MADERA_SPK1L_MUTE_MASK				0x1000
+#define MADERA_SPK1L_MUTE_SHIFT				    12
+#define MADERA_SPK1L_MUTE_WIDTH				     1
+#define MADERA_SPK1_MUTE_ENDIAN				0x0100
+#define MADERA_SPK1_MUTE_ENDIAN_MASK			0x0100
+#define MADERA_SPK1_MUTE_ENDIAN_SHIFT			     8
+#define MADERA_SPK1_MUTE_ENDIAN_WIDTH			     1
+#define MADERA_SPK1_MUTE_SEQ1_MASK			0x00FF
+#define MADERA_SPK1_MUTE_SEQ1_SHIFT			     0
+#define MADERA_SPK1_MUTE_SEQ1_WIDTH			     8
+
+/* (0x0491)  PDM_SPK1_CTRL_2 */
+#define MADERA_SPK1_FMT					0x0001
+#define MADERA_SPK1_FMT_MASK				0x0001
+#define MADERA_SPK1_FMT_SHIFT				     0
+#define MADERA_SPK1_FMT_WIDTH				     1
+
+/* (0x0492)  PDM_SPK2_CTRL_1 */
+#define MADERA_SPK2R_MUTE				0x2000
+#define MADERA_SPK2R_MUTE_MASK				0x2000
+#define MADERA_SPK2R_MUTE_SHIFT				    13
+#define MADERA_SPK2R_MUTE_WIDTH				     1
+#define MADERA_SPK2L_MUTE				0x1000
+#define MADERA_SPK2L_MUTE_MASK				0x1000
+#define MADERA_SPK2L_MUTE_SHIFT				    12
+#define MADERA_SPK2L_MUTE_WIDTH				     1
+
+/* (0x04A0) - HP1 Short Circuit Ctrl */
+#define MADERA_HP1_SC_ENA				0x1000
+#define MADERA_HP1_SC_ENA_MASK				0x1000
+#define MADERA_HP1_SC_ENA_SHIFT				    12
+#define MADERA_HP1_SC_ENA_WIDTH				     1
+
+/* (0x04A1) - HP2 Short Circuit Ctrl */
+#define MADERA_HP2_SC_ENA				0x1000
+#define MADERA_HP2_SC_ENA_MASK				0x1000
+#define MADERA_HP2_SC_ENA_SHIFT				    12
+#define MADERA_HP2_SC_ENA_WIDTH				     1
+
+/* (0x04A2) - HP3 Short Circuit Ctrl */
+#define MADERA_HP3_SC_ENA				0x1000
+#define MADERA_HP3_SC_ENA_MASK				0x1000
+#define MADERA_HP3_SC_ENA_SHIFT				    12
+#define MADERA_HP3_SC_ENA_WIDTH				     1
+
+/* (0x04A8) - HP_Test_Ctrl_5 */
+#define MADERA_HP1L_ONEFLT				0x0100
+#define MADERA_HP1L_ONEFLT_MASK				0x0100
+#define MADERA_HP1L_ONEFLT_SHIFT			     8
+#define MADERA_HP1L_ONEFLT_WIDTH			     1
+
+/* (0x04A9) - HP_Test_Ctrl_6 */
+#define MADERA_HP1R_ONEFLT				0x0100
+#define MADERA_HP1R_ONEFLT_MASK				0x0100
+#define MADERA_HP1R_ONEFLT_SHIFT			     8
+#define MADERA_HP1R_ONEFLT_WIDTH			     1
+
+/* (0x0500)  AIF1_BCLK_Ctrl */
+#define MADERA_AIF1_BCLK_INV				0x0080
+#define MADERA_AIF1_BCLK_INV_MASK			0x0080
+#define MADERA_AIF1_BCLK_INV_SHIFT			     7
+#define MADERA_AIF1_BCLK_INV_WIDTH			     1
+#define MADERA_AIF1_BCLK_MSTR				0x0020
+#define MADERA_AIF1_BCLK_MSTR_MASK			0x0020
+#define MADERA_AIF1_BCLK_MSTR_SHIFT			     5
+#define MADERA_AIF1_BCLK_MSTR_WIDTH			     1
+#define MADERA_AIF1_BCLK_FREQ_MASK			0x001F
+#define MADERA_AIF1_BCLK_FREQ_SHIFT			     0
+#define MADERA_AIF1_BCLK_FREQ_WIDTH			     5
+
+/* (0x0501)  AIF1_Tx_Pin_Ctrl */
+#define MADERA_AIF1TX_LRCLK_SRC				0x0008
+#define MADERA_AIF1TX_LRCLK_SRC_MASK			0x0008
+#define MADERA_AIF1TX_LRCLK_SRC_SHIFT			     3
+#define MADERA_AIF1TX_LRCLK_SRC_WIDTH			     1
+#define MADERA_AIF1TX_LRCLK_INV				0x0004
+#define MADERA_AIF1TX_LRCLK_INV_MASK			0x0004
+#define MADERA_AIF1TX_LRCLK_INV_SHIFT			     2
+#define MADERA_AIF1TX_LRCLK_INV_WIDTH			     1
+#define MADERA_AIF1TX_LRCLK_MSTR			0x0001
+#define MADERA_AIF1TX_LRCLK_MSTR_MASK			0x0001
+#define MADERA_AIF1TX_LRCLK_MSTR_SHIFT			     0
+#define MADERA_AIF1TX_LRCLK_MSTR_WIDTH			     1
+
+/* (0x0502)  AIF1_Rx_Pin_Ctrl */
+#define MADERA_AIF1RX_LRCLK_INV				0x0004
+#define MADERA_AIF1RX_LRCLK_INV_MASK			0x0004
+#define MADERA_AIF1RX_LRCLK_INV_SHIFT			     2
+#define MADERA_AIF1RX_LRCLK_INV_WIDTH			     1
+#define MADERA_AIF1RX_LRCLK_FRC				0x0002
+#define MADERA_AIF1RX_LRCLK_FRC_MASK			0x0002
+#define MADERA_AIF1RX_LRCLK_FRC_SHIFT			     1
+#define MADERA_AIF1RX_LRCLK_FRC_WIDTH			     1
+#define MADERA_AIF1RX_LRCLK_MSTR			0x0001
+#define MADERA_AIF1RX_LRCLK_MSTR_MASK			0x0001
+#define MADERA_AIF1RX_LRCLK_MSTR_SHIFT			     0
+#define MADERA_AIF1RX_LRCLK_MSTR_WIDTH			     1
+
+/* (0x0503)  AIF1_Rate_Ctrl */
+#define MADERA_AIF1_RATE_MASK				0xF800
+#define MADERA_AIF1_RATE_SHIFT				    11
+#define MADERA_AIF1_RATE_WIDTH				     5
+#define MADERA_AIF1_TRI					0x0040
+#define MADERA_AIF1_TRI_MASK				0x0040
+#define MADERA_AIF1_TRI_SHIFT				     6
+#define MADERA_AIF1_TRI_WIDTH				     1
+
+/* (0x0504)  AIF1_Format */
+#define MADERA_AIF1_FMT_MASK				0x0007
+#define MADERA_AIF1_FMT_SHIFT				     0
+#define MADERA_AIF1_FMT_WIDTH				     3
+
+/* (0x0506)  AIF1_Rx_BCLK_Rate */
+#define MADERA_AIF1RX_BCPF_MASK				0x1FFF
+#define MADERA_AIF1RX_BCPF_SHIFT			     0
+#define MADERA_AIF1RX_BCPF_WIDTH			    13
+
+/* (0x0507)  AIF1_Frame_Ctrl_1 */
+#define MADERA_AIF1TX_WL_MASK				0x3F00
+#define MADERA_AIF1TX_WL_SHIFT				     8
+#define MADERA_AIF1TX_WL_WIDTH				     6
+#define MADERA_AIF1TX_SLOT_LEN_MASK			0x00FF
+#define MADERA_AIF1TX_SLOT_LEN_SHIFT			     0
+#define MADERA_AIF1TX_SLOT_LEN_WIDTH			     8
+
+/* (0x0508)  AIF1_Frame_Ctrl_2 */
+#define MADERA_AIF1RX_WL_MASK				0x3F00
+#define MADERA_AIF1RX_WL_SHIFT				     8
+#define MADERA_AIF1RX_WL_WIDTH				     6
+#define MADERA_AIF1RX_SLOT_LEN_MASK			0x00FF
+#define MADERA_AIF1RX_SLOT_LEN_SHIFT			     0
+#define MADERA_AIF1RX_SLOT_LEN_WIDTH			     8
+
+/* (0x0509)  AIF1_Frame_Ctrl_3 */
+#define MADERA_AIF1TX1_SLOT_MASK			0x003F
+#define MADERA_AIF1TX1_SLOT_SHIFT			     0
+#define MADERA_AIF1TX1_SLOT_WIDTH			     6
+
+/* (0x0519)  AIF1_Tx_Enables */
+#define MADERA_AIF1TX8_ENA				0x0080
+#define MADERA_AIF1TX8_ENA_MASK				0x0080
+#define MADERA_AIF1TX8_ENA_SHIFT			     7
+#define MADERA_AIF1TX8_ENA_WIDTH			     1
+#define MADERA_AIF1TX7_ENA				0x0040
+#define MADERA_AIF1TX7_ENA_MASK				0x0040
+#define MADERA_AIF1TX7_ENA_SHIFT			     6
+#define MADERA_AIF1TX7_ENA_WIDTH			     1
+#define MADERA_AIF1TX6_ENA				0x0020
+#define MADERA_AIF1TX6_ENA_MASK				0x0020
+#define MADERA_AIF1TX6_ENA_SHIFT			     5
+#define MADERA_AIF1TX6_ENA_WIDTH			     1
+#define MADERA_AIF1TX5_ENA				0x0010
+#define MADERA_AIF1TX5_ENA_MASK				0x0010
+#define MADERA_AIF1TX5_ENA_SHIFT			     4
+#define MADERA_AIF1TX5_ENA_WIDTH			     1
+#define MADERA_AIF1TX4_ENA				0x0008
+#define MADERA_AIF1TX4_ENA_MASK				0x0008
+#define MADERA_AIF1TX4_ENA_SHIFT			     3
+#define MADERA_AIF1TX4_ENA_WIDTH			     1
+#define MADERA_AIF1TX3_ENA				0x0004
+#define MADERA_AIF1TX3_ENA_MASK				0x0004
+#define MADERA_AIF1TX3_ENA_SHIFT			     2
+#define MADERA_AIF1TX3_ENA_WIDTH			     1
+#define MADERA_AIF1TX2_ENA				0x0002
+#define MADERA_AIF1TX2_ENA_MASK				0x0002
+#define MADERA_AIF1TX2_ENA_SHIFT			     1
+#define MADERA_AIF1TX2_ENA_WIDTH			     1
+#define MADERA_AIF1TX1_ENA				0x0001
+#define MADERA_AIF1TX1_ENA_MASK				0x0001
+#define MADERA_AIF1TX1_ENA_SHIFT			     0
+#define MADERA_AIF1TX1_ENA_WIDTH			     1
+
+/* (0x051A)  AIF1_Rx_Enables */
+#define MADERA_AIF1RX8_ENA				0x0080
+#define MADERA_AIF1RX8_ENA_MASK				0x0080
+#define MADERA_AIF1RX8_ENA_SHIFT			     7
+#define MADERA_AIF1RX8_ENA_WIDTH			     1
+#define MADERA_AIF1RX7_ENA				0x0040
+#define MADERA_AIF1RX7_ENA_MASK				0x0040
+#define MADERA_AIF1RX7_ENA_SHIFT			     6
+#define MADERA_AIF1RX7_ENA_WIDTH			     1
+#define MADERA_AIF1RX6_ENA				0x0020
+#define MADERA_AIF1RX6_ENA_MASK				0x0020
+#define MADERA_AIF1RX6_ENA_SHIFT			     5
+#define MADERA_AIF1RX6_ENA_WIDTH			     1
+#define MADERA_AIF1RX5_ENA				0x0010
+#define MADERA_AIF1RX5_ENA_MASK				0x0010
+#define MADERA_AIF1RX5_ENA_SHIFT			     4
+#define MADERA_AIF1RX5_ENA_WIDTH			     1
+#define MADERA_AIF1RX4_ENA				0x0008
+#define MADERA_AIF1RX4_ENA_MASK				0x0008
+#define MADERA_AIF1RX4_ENA_SHIFT			     3
+#define MADERA_AIF1RX4_ENA_WIDTH			     1
+#define MADERA_AIF1RX3_ENA				0x0004
+#define MADERA_AIF1RX3_ENA_MASK				0x0004
+#define MADERA_AIF1RX3_ENA_SHIFT			     2
+#define MADERA_AIF1RX3_ENA_WIDTH			     1
+#define MADERA_AIF1RX2_ENA				0x0002
+#define MADERA_AIF1RX2_ENA_MASK				0x0002
+#define MADERA_AIF1RX2_ENA_SHIFT			     1
+#define MADERA_AIF1RX2_ENA_WIDTH			     1
+#define MADERA_AIF1RX1_ENA				0x0001
+#define MADERA_AIF1RX1_ENA_MASK				0x0001
+#define MADERA_AIF1RX1_ENA_SHIFT			     0
+#define MADERA_AIF1RX1_ENA_WIDTH			     1
+
+/* (0x0559)  AIF2_Tx_Enables */
+#define MADERA_AIF2TX8_ENA				0x0080
+#define MADERA_AIF2TX8_ENA_MASK				0x0080
+#define MADERA_AIF2TX8_ENA_SHIFT			     7
+#define MADERA_AIF2TX8_ENA_WIDTH			     1
+#define MADERA_AIF2TX7_ENA				0x0040
+#define MADERA_AIF2TX7_ENA_MASK				0x0040
+#define MADERA_AIF2TX7_ENA_SHIFT			     6
+#define MADERA_AIF2TX7_ENA_WIDTH			     1
+#define MADERA_AIF2TX6_ENA				0x0020
+#define MADERA_AIF2TX6_ENA_MASK				0x0020
+#define MADERA_AIF2TX6_ENA_SHIFT			     5
+#define MADERA_AIF2TX6_ENA_WIDTH			     1
+#define MADERA_AIF2TX5_ENA				0x0010
+#define MADERA_AIF2TX5_ENA_MASK				0x0010
+#define MADERA_AIF2TX5_ENA_SHIFT			     4
+#define MADERA_AIF2TX5_ENA_WIDTH			     1
+#define MADERA_AIF2TX4_ENA				0x0008
+#define MADERA_AIF2TX4_ENA_MASK				0x0008
+#define MADERA_AIF2TX4_ENA_SHIFT			     3
+#define MADERA_AIF2TX4_ENA_WIDTH			     1
+#define MADERA_AIF2TX3_ENA				0x0004
+#define MADERA_AIF2TX3_ENA_MASK				0x0004
+#define MADERA_AIF2TX3_ENA_SHIFT			     2
+#define MADERA_AIF2TX3_ENA_WIDTH			     1
+#define MADERA_AIF2TX2_ENA				0x0002
+#define MADERA_AIF2TX2_ENA_MASK				0x0002
+#define MADERA_AIF2TX2_ENA_SHIFT			     1
+#define MADERA_AIF2TX2_ENA_WIDTH			     1
+#define MADERA_AIF2TX1_ENA				0x0001
+#define MADERA_AIF2TX1_ENA_MASK				0x0001
+#define MADERA_AIF2TX1_ENA_SHIFT			     0
+#define MADERA_AIF2TX1_ENA_WIDTH			     1
+
+/* (0x055A)  AIF2_Rx_Enables */
+#define MADERA_AIF2RX8_ENA				0x0080
+#define MADERA_AIF2RX8_ENA_MASK				0x0080
+#define MADERA_AIF2RX8_ENA_SHIFT			     7
+#define MADERA_AIF2RX8_ENA_WIDTH			     1
+#define MADERA_AIF2RX7_ENA				0x0040
+#define MADERA_AIF2RX7_ENA_MASK				0x0040
+#define MADERA_AIF2RX7_ENA_SHIFT			     6
+#define MADERA_AIF2RX7_ENA_WIDTH			     1
+#define MADERA_AIF2RX6_ENA				0x0020
+#define MADERA_AIF2RX6_ENA_MASK				0x0020
+#define MADERA_AIF2RX6_ENA_SHIFT			     5
+#define MADERA_AIF2RX6_ENA_WIDTH			     1
+#define MADERA_AIF2RX5_ENA				0x0010
+#define MADERA_AIF2RX5_ENA_MASK				0x0010
+#define MADERA_AIF2RX5_ENA_SHIFT			     4
+#define MADERA_AIF2RX5_ENA_WIDTH			     1
+#define MADERA_AIF2RX4_ENA				0x0008
+#define MADERA_AIF2RX4_ENA_MASK				0x0008
+#define MADERA_AIF2RX4_ENA_SHIFT			     3
+#define MADERA_AIF2RX4_ENA_WIDTH			     1
+#define MADERA_AIF2RX3_ENA				0x0004
+#define MADERA_AIF2RX3_ENA_MASK				0x0004
+#define MADERA_AIF2RX3_ENA_SHIFT			     2
+#define MADERA_AIF2RX3_ENA_WIDTH			     1
+#define MADERA_AIF2RX2_ENA				0x0002
+#define MADERA_AIF2RX2_ENA_MASK				0x0002
+#define MADERA_AIF2RX2_ENA_SHIFT			     1
+#define MADERA_AIF2RX2_ENA_WIDTH			     1
+#define MADERA_AIF2RX1_ENA				0x0001
+#define MADERA_AIF2RX1_ENA_MASK				0x0001
+#define MADERA_AIF2RX1_ENA_SHIFT			     0
+#define MADERA_AIF2RX1_ENA_WIDTH			     1
+
+/* (0x0599)  AIF3_Tx_Enables */
+#define MADERA_AIF3TX2_ENA				0x0002
+#define MADERA_AIF3TX2_ENA_MASK				0x0002
+#define MADERA_AIF3TX2_ENA_SHIFT			     1
+#define MADERA_AIF3TX2_ENA_WIDTH			     1
+#define MADERA_AIF3TX1_ENA				0x0001
+#define MADERA_AIF3TX1_ENA_MASK				0x0001
+#define MADERA_AIF3TX1_ENA_SHIFT			     0
+#define MADERA_AIF3TX1_ENA_WIDTH			     1
+
+/* (0x059A)  AIF3_Rx_Enables */
+#define MADERA_AIF3RX2_ENA				0x0002
+#define MADERA_AIF3RX2_ENA_MASK				0x0002
+#define MADERA_AIF3RX2_ENA_SHIFT			     1
+#define MADERA_AIF3RX2_ENA_WIDTH			     1
+#define MADERA_AIF3RX1_ENA				0x0001
+#define MADERA_AIF3RX1_ENA_MASK				0x0001
+#define MADERA_AIF3RX1_ENA_SHIFT			     0
+#define MADERA_AIF3RX1_ENA_WIDTH			     1
+
+/* (0x05B9)  AIF4_Tx_Enables */
+#define MADERA_AIF4TX2_ENA				0x0002
+#define MADERA_AIF4TX2_ENA_MASK				0x0002
+#define MADERA_AIF4TX2_ENA_SHIFT			     1
+#define MADERA_AIF4TX2_ENA_WIDTH			     1
+#define MADERA_AIF4TX1_ENA				0x0001
+#define MADERA_AIF4TX1_ENA_MASK				0x0001
+#define MADERA_AIF4TX1_ENA_SHIFT			     0
+#define MADERA_AIF4TX1_ENA_WIDTH			     1
+
+/* (0x05BA)  AIF4_Rx_Enables */
+#define MADERA_AIF4RX2_ENA				0x0002
+#define MADERA_AIF4RX2_ENA_MASK				0x0002
+#define MADERA_AIF4RX2_ENA_SHIFT			     1
+#define MADERA_AIF4RX2_ENA_WIDTH			     1
+#define MADERA_AIF4RX1_ENA				0x0001
+#define MADERA_AIF4RX1_ENA_MASK				0x0001
+#define MADERA_AIF4RX1_ENA_SHIFT			     0
+#define MADERA_AIF4RX1_ENA_WIDTH			     1
+
+/* (0x05C2)  SPD1_TX_Control */
+#define MADERA_SPD1_VAL2				0x2000
+#define MADERA_SPD1_VAL2_MASK				0x2000
+#define MADERA_SPD1_VAL2_SHIFT				    13
+#define MADERA_SPD1_VAL2_WIDTH				     1
+#define MADERA_SPD1_VAL1				0x1000
+#define MADERA_SPD1_VAL1_MASK				0x1000
+#define MADERA_SPD1_VAL1_SHIFT				    12
+#define MADERA_SPD1_VAL1_WIDTH				     1
+#define MADERA_SPD1_RATE_MASK				0x00F0
+#define MADERA_SPD1_RATE_SHIFT				     4
+#define MADERA_SPD1_RATE_WIDTH				     4
+#define MADERA_SPD1_ENA					0x0001
+#define MADERA_SPD1_ENA_MASK				0x0001
+#define MADERA_SPD1_ENA_SHIFT				     0
+#define MADERA_SPD1_ENA_WIDTH				     1
+
+/* (0x05F5)  SLIMbus_RX_Channel_Enable */
+#define MADERA_SLIMRX8_ENA				0x0080
+#define MADERA_SLIMRX8_ENA_MASK				0x0080
+#define MADERA_SLIMRX8_ENA_SHIFT			     7
+#define MADERA_SLIMRX8_ENA_WIDTH			     1
+#define MADERA_SLIMRX7_ENA				0x0040
+#define MADERA_SLIMRX7_ENA_MASK				0x0040
+#define MADERA_SLIMRX7_ENA_SHIFT			     6
+#define MADERA_SLIMRX7_ENA_WIDTH			     1
+#define MADERA_SLIMRX6_ENA				0x0020
+#define MADERA_SLIMRX6_ENA_MASK				0x0020
+#define MADERA_SLIMRX6_ENA_SHIFT			     5
+#define MADERA_SLIMRX6_ENA_WIDTH			     1
+#define MADERA_SLIMRX5_ENA				0x0010
+#define MADERA_SLIMRX5_ENA_MASK				0x0010
+#define MADERA_SLIMRX5_ENA_SHIFT			     4
+#define MADERA_SLIMRX5_ENA_WIDTH			     1
+#define MADERA_SLIMRX4_ENA				0x0008
+#define MADERA_SLIMRX4_ENA_MASK				0x0008
+#define MADERA_SLIMRX4_ENA_SHIFT			     3
+#define MADERA_SLIMRX4_ENA_WIDTH			     1
+#define MADERA_SLIMRX3_ENA				0x0004
+#define MADERA_SLIMRX3_ENA_MASK				0x0004
+#define MADERA_SLIMRX3_ENA_SHIFT			     2
+#define MADERA_SLIMRX3_ENA_WIDTH			     1
+#define MADERA_SLIMRX2_ENA				0x0002
+#define MADERA_SLIMRX2_ENA_MASK				0x0002
+#define MADERA_SLIMRX2_ENA_SHIFT			     1
+#define MADERA_SLIMRX2_ENA_WIDTH			     1
+#define MADERA_SLIMRX1_ENA				0x0001
+#define MADERA_SLIMRX1_ENA_MASK				0x0001
+#define MADERA_SLIMRX1_ENA_SHIFT			     0
+#define MADERA_SLIMRX1_ENA_WIDTH			     1
+
+/* (0x05F6)  SLIMbus_TX_Channel_Enable */
+#define MADERA_SLIMTX8_ENA				0x0080
+#define MADERA_SLIMTX8_ENA_MASK				0x0080
+#define MADERA_SLIMTX8_ENA_SHIFT			     7
+#define MADERA_SLIMTX8_ENA_WIDTH			     1
+#define MADERA_SLIMTX7_ENA				0x0040
+#define MADERA_SLIMTX7_ENA_MASK				0x0040
+#define MADERA_SLIMTX7_ENA_SHIFT			     6
+#define MADERA_SLIMTX7_ENA_WIDTH			     1
+#define MADERA_SLIMTX6_ENA				0x0020
+#define MADERA_SLIMTX6_ENA_MASK				0x0020
+#define MADERA_SLIMTX6_ENA_SHIFT			     5
+#define MADERA_SLIMTX6_ENA_WIDTH			     1
+#define MADERA_SLIMTX5_ENA				0x0010
+#define MADERA_SLIMTX5_ENA_MASK				0x0010
+#define MADERA_SLIMTX5_ENA_SHIFT			     4
+#define MADERA_SLIMTX5_ENA_WIDTH			     1
+#define MADERA_SLIMTX4_ENA				0x0008
+#define MADERA_SLIMTX4_ENA_MASK				0x0008
+#define MADERA_SLIMTX4_ENA_SHIFT			     3
+#define MADERA_SLIMTX4_ENA_WIDTH			     1
+#define MADERA_SLIMTX3_ENA				0x0004
+#define MADERA_SLIMTX3_ENA_MASK				0x0004
+#define MADERA_SLIMTX3_ENA_SHIFT			     2
+#define MADERA_SLIMTX3_ENA_WIDTH			     1
+#define MADERA_SLIMTX2_ENA				0x0002
+#define MADERA_SLIMTX2_ENA_MASK				0x0002
+#define MADERA_SLIMTX2_ENA_SHIFT			     1
+#define MADERA_SLIMTX2_ENA_WIDTH			     1
+#define MADERA_SLIMTX1_ENA				0x0001
+#define MADERA_SLIMTX1_ENA_MASK				0x0001
+#define MADERA_SLIMTX1_ENA_SHIFT			     0
+#define MADERA_SLIMTX1_ENA_WIDTH			     1
+
+/* (0x0E10)  EQ1_1 */
+#define MADERA_EQ1_B1_GAIN_MASK				0xF800
+#define MADERA_EQ1_B1_GAIN_SHIFT			    11
+#define MADERA_EQ1_B1_GAIN_WIDTH			     5
+#define MADERA_EQ1_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ1_B2_GAIN_SHIFT			     6
+#define MADERA_EQ1_B2_GAIN_WIDTH			     5
+#define MADERA_EQ1_B3_GAIN_MASK				0x003E
+#define MADERA_EQ1_B3_GAIN_SHIFT			     1
+#define MADERA_EQ1_B3_GAIN_WIDTH			     5
+#define MADERA_EQ1_ENA					0x0001
+#define MADERA_EQ1_ENA_MASK				0x0001
+#define MADERA_EQ1_ENA_SHIFT				     0
+#define MADERA_EQ1_ENA_WIDTH				     1
+
+/* (0x0E11)  EQ1_2 */
+#define MADERA_EQ1_B4_GAIN_MASK				0xF800
+#define MADERA_EQ1_B4_GAIN_SHIFT			    11
+#define MADERA_EQ1_B4_GAIN_WIDTH			     5
+#define MADERA_EQ1_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ1_B5_GAIN_SHIFT			     6
+#define MADERA_EQ1_B5_GAIN_WIDTH			     5
+#define MADERA_EQ1_B1_MODE				0x0001
+#define MADERA_EQ1_B1_MODE_MASK				0x0001
+#define MADERA_EQ1_B1_MODE_SHIFT			     0
+#define MADERA_EQ1_B1_MODE_WIDTH			     1
+
+/* (0x0E26)  EQ2_1 */
+#define MADERA_EQ2_B1_GAIN_MASK				0xF800
+#define MADERA_EQ2_B1_GAIN_SHIFT			    11
+#define MADERA_EQ2_B1_GAIN_WIDTH			     5
+#define MADERA_EQ2_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ2_B2_GAIN_SHIFT			     6
+#define MADERA_EQ2_B2_GAIN_WIDTH			     5
+#define MADERA_EQ2_B3_GAIN_MASK				0x003E
+#define MADERA_EQ2_B3_GAIN_SHIFT			     1
+#define MADERA_EQ2_B3_GAIN_WIDTH			     5
+#define MADERA_EQ2_ENA					0x0001
+#define MADERA_EQ2_ENA_MASK				0x0001
+#define MADERA_EQ2_ENA_SHIFT				     0
+#define MADERA_EQ2_ENA_WIDTH				     1
+
+/* (0x0E27)  EQ2_2 */
+#define MADERA_EQ2_B4_GAIN_MASK				0xF800
+#define MADERA_EQ2_B4_GAIN_SHIFT			    11
+#define MADERA_EQ2_B4_GAIN_WIDTH			     5
+#define MADERA_EQ2_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ2_B5_GAIN_SHIFT			     6
+#define MADERA_EQ2_B5_GAIN_WIDTH			     5
+#define MADERA_EQ2_B1_MODE				0x0001
+#define MADERA_EQ2_B1_MODE_MASK				0x0001
+#define MADERA_EQ2_B1_MODE_SHIFT			     0
+#define MADERA_EQ2_B1_MODE_WIDTH			     1
+
+/* (0x0E3C)  EQ3_1 */
+#define MADERA_EQ3_B1_GAIN_MASK				0xF800
+#define MADERA_EQ3_B1_GAIN_SHIFT			    11
+#define MADERA_EQ3_B1_GAIN_WIDTH			     5
+#define MADERA_EQ3_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ3_B2_GAIN_SHIFT			     6
+#define MADERA_EQ3_B2_GAIN_WIDTH			     5
+#define MADERA_EQ3_B3_GAIN_MASK				0x003E
+#define MADERA_EQ3_B3_GAIN_SHIFT			     1
+#define MADERA_EQ3_B3_GAIN_WIDTH			     5
+#define MADERA_EQ3_ENA					0x0001
+#define MADERA_EQ3_ENA_MASK				0x0001
+#define MADERA_EQ3_ENA_SHIFT				     0
+#define MADERA_EQ3_ENA_WIDTH				     1
+
+/* (0x0E3D)  EQ3_2 */
+#define MADERA_EQ3_B4_GAIN_MASK				0xF800
+#define MADERA_EQ3_B4_GAIN_SHIFT			    11
+#define MADERA_EQ3_B4_GAIN_WIDTH			     5
+#define MADERA_EQ3_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ3_B5_GAIN_SHIFT			     6
+#define MADERA_EQ3_B5_GAIN_WIDTH			     5
+#define MADERA_EQ3_B1_MODE				0x0001
+#define MADERA_EQ3_B1_MODE_MASK				0x0001
+#define MADERA_EQ3_B1_MODE_SHIFT			     0
+#define MADERA_EQ3_B1_MODE_WIDTH			     1
+
+/* (0x0E52)  EQ4_1 */
+#define MADERA_EQ4_B1_GAIN_MASK				0xF800
+#define MADERA_EQ4_B1_GAIN_SHIFT			    11
+#define MADERA_EQ4_B1_GAIN_WIDTH			     5
+#define MADERA_EQ4_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ4_B2_GAIN_SHIFT			     6
+#define MADERA_EQ4_B2_GAIN_WIDTH			     5
+#define MADERA_EQ4_B3_GAIN_MASK				0x003E
+#define MADERA_EQ4_B3_GAIN_SHIFT			     1
+#define MADERA_EQ4_B3_GAIN_WIDTH			     5
+#define MADERA_EQ4_ENA					0x0001
+#define MADERA_EQ4_ENA_MASK				0x0001
+#define MADERA_EQ4_ENA_SHIFT				     0
+#define MADERA_EQ4_ENA_WIDTH				     1
+
+/* (0x0E53)  EQ4_2 */
+#define MADERA_EQ4_B4_GAIN_MASK				0xF800
+#define MADERA_EQ4_B4_GAIN_SHIFT			    11
+#define MADERA_EQ4_B4_GAIN_WIDTH			     5
+#define MADERA_EQ4_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ4_B5_GAIN_SHIFT			     6
+#define MADERA_EQ4_B5_GAIN_WIDTH			     5
+#define MADERA_EQ4_B1_MODE				0x0001
+#define MADERA_EQ4_B1_MODE_MASK				0x0001
+#define MADERA_EQ4_B1_MODE_SHIFT			     0
+#define MADERA_EQ4_B1_MODE_WIDTH			     1
+
+/* (0x0E80)  DRC1_ctrl1 */
+#define MADERA_DRC1L_ENA				0x0002
+#define MADERA_DRC1L_ENA_MASK				0x0002
+#define MADERA_DRC1L_ENA_SHIFT				     1
+#define MADERA_DRC1L_ENA_WIDTH				     1
+#define MADERA_DRC1R_ENA				0x0001
+#define MADERA_DRC1R_ENA_MASK				0x0001
+#define MADERA_DRC1R_ENA_SHIFT				     0
+#define MADERA_DRC1R_ENA_WIDTH				     1
+
+/* (0x0E88)  DRC2_ctrl1 */
+#define MADERA_DRC2L_ENA				0x0002
+#define MADERA_DRC2L_ENA_MASK				0x0002
+#define MADERA_DRC2L_ENA_SHIFT				     1
+#define MADERA_DRC2L_ENA_WIDTH				     1
+#define MADERA_DRC2R_ENA				0x0001
+#define MADERA_DRC2R_ENA_MASK				0x0001
+#define MADERA_DRC2R_ENA_SHIFT				     0
+#define MADERA_DRC2R_ENA_WIDTH				     1
+
+/* (0x0EC0)  HPLPF1_1 */
+#define MADERA_LHPF1_MODE				0x0002
+#define MADERA_LHPF1_MODE_MASK				0x0002
+#define MADERA_LHPF1_MODE_SHIFT				     1
+#define MADERA_LHPF1_MODE_WIDTH				     1
+#define MADERA_LHPF1_ENA				0x0001
+#define MADERA_LHPF1_ENA_MASK				0x0001
+#define MADERA_LHPF1_ENA_SHIFT				     0
+#define MADERA_LHPF1_ENA_WIDTH				     1
+
+/* (0x0EC1)  HPLPF1_2 */
+#define MADERA_LHPF1_COEFF_MASK				0xFFFF
+#define MADERA_LHPF1_COEFF_SHIFT			     0
+#define MADERA_LHPF1_COEFF_WIDTH			    16
+
+/* (0x0EC4)  HPLPF2_1 */
+#define MADERA_LHPF2_MODE				0x0002
+#define MADERA_LHPF2_MODE_MASK				0x0002
+#define MADERA_LHPF2_MODE_SHIFT				     1
+#define MADERA_LHPF2_MODE_WIDTH				     1
+#define MADERA_LHPF2_ENA				0x0001
+#define MADERA_LHPF2_ENA_MASK				0x0001
+#define MADERA_LHPF2_ENA_SHIFT				     0
+#define MADERA_LHPF2_ENA_WIDTH				     1
+
+/* (0x0EC5)  HPLPF2_2 */
+#define MADERA_LHPF2_COEFF_MASK				0xFFFF
+#define MADERA_LHPF2_COEFF_SHIFT			     0
+#define MADERA_LHPF2_COEFF_WIDTH			    16
+
+/* (0x0EC8)  HPLPF3_1 */
+#define MADERA_LHPF3_MODE				0x0002
+#define MADERA_LHPF3_MODE_MASK				0x0002
+#define MADERA_LHPF3_MODE_SHIFT				     1
+#define MADERA_LHPF3_MODE_WIDTH				     1
+#define MADERA_LHPF3_ENA				0x0001
+#define MADERA_LHPF3_ENA_MASK				0x0001
+#define MADERA_LHPF3_ENA_SHIFT				     0
+#define MADERA_LHPF3_ENA_WIDTH				     1
+
+/* (0x0EC9)  HPLPF3_2 */
+#define MADERA_LHPF3_COEFF_MASK				0xFFFF
+#define MADERA_LHPF3_COEFF_SHIFT			     0
+#define MADERA_LHPF3_COEFF_WIDTH			    16
+
+/* (0x0ECC)  HPLPF4_1 */
+#define MADERA_LHPF4_MODE				0x0002
+#define MADERA_LHPF4_MODE_MASK				0x0002
+#define MADERA_LHPF4_MODE_SHIFT				     1
+#define MADERA_LHPF4_MODE_WIDTH				     1
+#define MADERA_LHPF4_ENA				0x0001
+#define MADERA_LHPF4_ENA_MASK				0x0001
+#define MADERA_LHPF4_ENA_SHIFT				     0
+#define MADERA_LHPF4_ENA_WIDTH				     1
+
+/* (0x0ECD)  HPLPF4_2 */
+#define MADERA_LHPF4_COEFF_MASK				0xFFFF
+#define MADERA_LHPF4_COEFF_SHIFT			     0
+#define MADERA_LHPF4_COEFF_WIDTH			    16
+
+/* (0x0ED0)  ASRC2_ENABLE */
+#define MADERA_ASRC2_IN2L_ENA				0x0008
+#define MADERA_ASRC2_IN2L_ENA_MASK			0x0008
+#define MADERA_ASRC2_IN2L_ENA_SHIFT			     3
+#define MADERA_ASRC2_IN2L_ENA_WIDTH			     1
+#define MADERA_ASRC2_IN2R_ENA				0x0004
+#define MADERA_ASRC2_IN2R_ENA_MASK			0x0004
+#define MADERA_ASRC2_IN2R_ENA_SHIFT			     2
+#define MADERA_ASRC2_IN2R_ENA_WIDTH			     1
+#define MADERA_ASRC2_IN1L_ENA				0x0002
+#define MADERA_ASRC2_IN1L_ENA_MASK			0x0002
+#define MADERA_ASRC2_IN1L_ENA_SHIFT			     1
+#define MADERA_ASRC2_IN1L_ENA_WIDTH			     1
+#define MADERA_ASRC2_IN1R_ENA				0x0001
+#define MADERA_ASRC2_IN1R_ENA_MASK			0x0001
+#define MADERA_ASRC2_IN1R_ENA_SHIFT			     0
+#define MADERA_ASRC2_IN1R_ENA_WIDTH			     1
+
+/* (0x0ED2)  ASRC2_RATE1 */
+#define MADERA_ASRC2_RATE1_MASK				0xF800
+#define MADERA_ASRC2_RATE1_SHIFT			    11
+#define MADERA_ASRC2_RATE1_WIDTH			     5
+
+/* (0x0ED3)  ASRC2_RATE2 */
+#define MADERA_ASRC2_RATE2_MASK				0xF800
+#define MADERA_ASRC2_RATE2_SHIFT			    11
+#define MADERA_ASRC2_RATE2_WIDTH			     5
+
+/* (0x0EE0)  ASRC1_ENABLE */
+#define MADERA_ASRC1_IN2L_ENA				0x0008
+#define MADERA_ASRC1_IN2L_ENA_MASK			0x0008
+#define MADERA_ASRC1_IN2L_ENA_SHIFT			     3
+#define MADERA_ASRC1_IN2L_ENA_WIDTH			     1
+#define MADERA_ASRC1_IN2R_ENA				0x0004
+#define MADERA_ASRC1_IN2R_ENA_MASK			0x0004
+#define MADERA_ASRC1_IN2R_ENA_SHIFT			     2
+#define MADERA_ASRC1_IN2R_ENA_WIDTH			     1
+#define MADERA_ASRC1_IN1L_ENA				0x0002
+#define MADERA_ASRC1_IN1L_ENA_MASK			0x0002
+#define MADERA_ASRC1_IN1L_ENA_SHIFT			     1
+#define MADERA_ASRC1_IN1L_ENA_WIDTH			     1
+#define MADERA_ASRC1_IN1R_ENA				0x0001
+#define MADERA_ASRC1_IN1R_ENA_MASK			0x0001
+#define MADERA_ASRC1_IN1R_ENA_SHIFT			     0
+#define MADERA_ASRC1_IN1R_ENA_WIDTH			     1
+
+/* (0x0EE2)  ASRC1_RATE1 */
+#define MADERA_ASRC1_RATE1_MASK				0xF800
+#define MADERA_ASRC1_RATE1_SHIFT			    11
+#define MADERA_ASRC1_RATE1_WIDTH			     5
+
+/* (0x0EE3)  ASRC1_RATE2 */
+#define MADERA_ASRC1_RATE2_MASK				0xF800
+#define MADERA_ASRC1_RATE2_SHIFT			    11
+#define MADERA_ASRC1_RATE2_WIDTH			     5
+
+/* (0x0EF0) - ISRC1 CTRL 1 */
+#define MADERA_ISRC1_FSH_MASK				0xF800
+#define MADERA_ISRC1_FSH_SHIFT				    11
+#define MADERA_ISRC1_FSH_WIDTH				     5
+#define MADERA_ISRC1_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC1_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC1_CLK_SEL_WIDTH			     3
+
+/* (0x0EF1)  ISRC1_CTRL_2 */
+#define MADERA_ISRC1_FSL_MASK				0xF800
+#define MADERA_ISRC1_FSL_SHIFT				    11
+#define MADERA_ISRC1_FSL_WIDTH				     5
+
+/* (0x0EF2)  ISRC1_CTRL_3 */
+#define MADERA_ISRC1_INT1_ENA				0x8000
+#define MADERA_ISRC1_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC1_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC1_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC1_INT2_ENA				0x4000
+#define MADERA_ISRC1_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC1_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC1_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC1_INT3_ENA				0x2000
+#define MADERA_ISRC1_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC1_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC1_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC1_INT4_ENA				0x1000
+#define MADERA_ISRC1_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC1_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC1_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC1_ENA				0x0200
+#define MADERA_ISRC1_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC1_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC1_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC2_ENA				0x0100
+#define MADERA_ISRC1_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC1_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC1_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC3_ENA				0x0080
+#define MADERA_ISRC1_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC1_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC1_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC4_ENA				0x0040
+#define MADERA_ISRC1_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC1_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC1_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC1_NOTCH_ENA				0x0001
+#define MADERA_ISRC1_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC1_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC1_NOTCH_ENA_WIDTH			     1
+
+/* (0x0EF3)  ISRC2_CTRL_1 */
+#define MADERA_ISRC2_FSH_MASK				0xF800
+#define MADERA_ISRC2_FSH_SHIFT				    11
+#define MADERA_ISRC2_FSH_WIDTH				     5
+#define MADERA_ISRC2_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC2_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC2_CLK_SEL_WIDTH			     3
+
+/* (0x0EF4)  ISRC2_CTRL_2 */
+#define MADERA_ISRC2_FSL_MASK				0xF800
+#define MADERA_ISRC2_FSL_SHIFT				    11
+#define MADERA_ISRC2_FSL_WIDTH				     5
+
+/* (0x0EF5)  ISRC2_CTRL_3 */
+#define MADERA_ISRC2_INT1_ENA				0x8000
+#define MADERA_ISRC2_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC2_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC2_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC2_INT2_ENA				0x4000
+#define MADERA_ISRC2_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC2_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC2_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC2_INT3_ENA				0x2000
+#define MADERA_ISRC2_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC2_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC2_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC2_INT4_ENA				0x1000
+#define MADERA_ISRC2_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC2_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC2_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC1_ENA				0x0200
+#define MADERA_ISRC2_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC2_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC2_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC2_ENA				0x0100
+#define MADERA_ISRC2_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC2_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC2_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC3_ENA				0x0080
+#define MADERA_ISRC2_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC2_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC2_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC4_ENA				0x0040
+#define MADERA_ISRC2_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC2_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC2_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC2_NOTCH_ENA				0x0001
+#define MADERA_ISRC2_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC2_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC2_NOTCH_ENA_WIDTH			     1
+
+/* (0x0EF6)  ISRC3_CTRL_1 */
+#define MADERA_ISRC3_FSH_MASK				0xF800
+#define MADERA_ISRC3_FSH_SHIFT				    11
+#define MADERA_ISRC3_FSH_WIDTH				     5
+#define MADERA_ISRC3_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC3_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC3_CLK_SEL_WIDTH			     3
+
+/* (0x0EF7)  ISRC3_CTRL_2 */
+#define MADERA_ISRC3_FSL_MASK				0xF800
+#define MADERA_ISRC3_FSL_SHIFT				    11
+#define MADERA_ISRC3_FSL_WIDTH				     5
+
+/* (0x0EF8)  ISRC3_CTRL_3 */
+#define MADERA_ISRC3_INT1_ENA				0x8000
+#define MADERA_ISRC3_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC3_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC3_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC3_INT2_ENA				0x4000
+#define MADERA_ISRC3_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC3_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC3_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC3_INT3_ENA				0x2000
+#define MADERA_ISRC3_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC3_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC3_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC3_INT4_ENA				0x1000
+#define MADERA_ISRC3_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC3_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC3_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC1_ENA				0x0200
+#define MADERA_ISRC3_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC3_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC3_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC2_ENA				0x0100
+#define MADERA_ISRC3_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC3_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC3_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC3_ENA				0x0080
+#define MADERA_ISRC3_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC3_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC3_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC4_ENA				0x0040
+#define MADERA_ISRC3_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC3_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC3_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC3_NOTCH_ENA				0x0001
+#define MADERA_ISRC3_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC3_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC3_NOTCH_ENA_WIDTH			     1
+
+/* (0x0EF9)  ISRC4_CTRL_1 */
+#define MADERA_ISRC4_FSH_MASK				0xF800
+#define MADERA_ISRC4_FSH_SHIFT				    11
+#define MADERA_ISRC4_FSH_WIDTH				     5
+#define MADERA_ISRC4_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC4_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC4_CLK_SEL_WIDTH			     3
+
+/* (0x0EFA)  ISRC4_CTRL_2 */
+#define MADERA_ISRC4_FSL_MASK				0xF800
+#define MADERA_ISRC4_FSL_SHIFT				    11
+#define MADERA_ISRC4_FSL_WIDTH				     5
+
+/* (0x0EFB)  ISRC4_CTRL_3 */
+#define MADERA_ISRC4_INT1_ENA				0x8000
+#define MADERA_ISRC4_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC4_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC4_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC4_INT2_ENA				0x4000
+#define MADERA_ISRC4_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC4_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC4_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC4_INT3_ENA				0x2000
+#define MADERA_ISRC4_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC4_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC4_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC4_INT4_ENA				0x1000
+#define MADERA_ISRC4_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC4_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC4_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC1_ENA				0x0200
+#define MADERA_ISRC4_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC4_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC4_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC2_ENA				0x0100
+#define MADERA_ISRC4_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC4_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC4_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC3_ENA				0x0080
+#define MADERA_ISRC4_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC4_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC4_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC4_ENA				0x0040
+#define MADERA_ISRC4_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC4_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC4_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC4_NOTCH_ENA				0x0001
+#define MADERA_ISRC4_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC4_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC4_NOTCH_ENA_WIDTH			     1
+
+/* (0x0F00)  Clock_Control */
+#define MADERA_EXT_NG_SEL_CLR				0x0080
+#define MADERA_EXT_NG_SEL_CLR_MASK			0x0080
+#define MADERA_EXT_NG_SEL_CLR_SHIFT			     7
+#define MADERA_EXT_NG_SEL_CLR_WIDTH			     1
+#define MADERA_EXT_NG_SEL_SET				0x0040
+#define MADERA_EXT_NG_SEL_SET_MASK			0x0040
+#define MADERA_EXT_NG_SEL_SET_SHIFT			     6
+#define MADERA_EXT_NG_SEL_SET_WIDTH			     1
+#define MADERA_CLK_R_ENA_CLR				0x0020
+#define MADERA_CLK_R_ENA_CLR_MASK			0x0020
+#define MADERA_CLK_R_ENA_CLR_SHIFT			     5
+#define MADERA_CLK_R_ENA_CLR_WIDTH			     1
+#define MADERA_CLK_R_ENA_SET				0x0010
+#define MADERA_CLK_R_ENA_SET_MASK			0x0010
+#define MADERA_CLK_R_ENA_SET_SHIFT			     4
+#define MADERA_CLK_R_ENA_SET_WIDTH			     1
+#define MADERA_CLK_NG_ENA_CLR				0x0008
+#define MADERA_CLK_NG_ENA_CLR_MASK			0x0008
+#define MADERA_CLK_NG_ENA_CLR_SHIFT			     3
+#define MADERA_CLK_NG_ENA_CLR_WIDTH			     1
+#define MADERA_CLK_NG_ENA_SET				0x0004
+#define MADERA_CLK_NG_ENA_SET_MASK			0x0004
+#define MADERA_CLK_NG_ENA_SET_SHIFT			     2
+#define MADERA_CLK_NG_ENA_SET_WIDTH			     1
+#define MADERA_CLK_L_ENA_CLR				0x0002
+#define MADERA_CLK_L_ENA_CLR_MASK			0x0002
+#define MADERA_CLK_L_ENA_CLR_SHIFT			     1
+#define MADERA_CLK_L_ENA_CLR_WIDTH			     1
+#define MADERA_CLK_L_ENA_SET				0x0001
+#define MADERA_CLK_L_ENA_SET_MASK			0x0001
+#define MADERA_CLK_L_ENA_SET_SHIFT			     0
+#define MADERA_CLK_L_ENA_SET_WIDTH			     1
+
+/* (0x0F01)  ANC_SRC */
+#define MADERA_IN_RXANCR_SEL_MASK			0x0070
+#define MADERA_IN_RXANCR_SEL_SHIFT			     4
+#define MADERA_IN_RXANCR_SEL_WIDTH			     3
+#define MADERA_IN_RXANCL_SEL_MASK			0x0007
+#define MADERA_IN_RXANCL_SEL_SHIFT			     0
+#define MADERA_IN_RXANCL_SEL_WIDTH			     3
+
+/* (0x0F17)  FCL_ADC_reformatter_control */
+#define MADERA_FCL_MIC_MODE_SEL				0x000C
+#define MADERA_FCL_MIC_MODE_SEL_SHIFT			     2
+#define MADERA_FCL_MIC_MODE_SEL_WIDTH			     2
+
+/* (0x0F73)  FCR_ADC_reformatter_control */
+#define MADERA_FCR_MIC_MODE_SEL				0x000C
+#define MADERA_FCR_MIC_MODE_SEL_SHIFT			     2
+#define MADERA_FCR_MIC_MODE_SEL_WIDTH			     2
+
+/* (0x1480)  DFC1_CTRL_W0 */
+#define MADERA_DFC1_RATE_MASK				0x007C
+#define MADERA_DFC1_RATE_SHIFT				     2
+#define MADERA_DFC1_RATE_WIDTH				     5
+#define MADERA_DFC1_DITH_ENA				0x0002
+#define MADERA_DFC1_DITH_ENA_MASK			0x0002
+#define MADERA_DFC1_DITH_ENA_SHIFT			     1
+#define MADERA_DFC1_DITH_ENA_WIDTH			     1
+#define MADERA_DFC1_ENA					0x0001
+#define MADERA_DFC1_ENA_MASK				0x0001
+#define MADERA_DFC1_ENA_SHIFT				     0
+#define MADERA_DFC1_ENA_WIDTH				     1
+
+/* (0x1482)  DFC1_RX_W0 */
+#define MADERA_DFC1_RX_DATA_WIDTH_MASK			0x1F00
+#define MADERA_DFC1_RX_DATA_WIDTH_SHIFT			     8
+#define MADERA_DFC1_RX_DATA_WIDTH_WIDTH			     5
+
+#define MADERA_DFC1_RX_DATA_TYPE_MASK			0x0007
+#define MADERA_DFC1_RX_DATA_TYPE_SHIFT			     0
+#define MADERA_DFC1_RX_DATA_TYPE_WIDTH			     3
+
+/* (0x1484)  DFC1_TX_W0 */
+#define MADERA_DFC1_TX_DATA_WIDTH_MASK			0x1F00
+#define MADERA_DFC1_TX_DATA_WIDTH_SHIFT			     8
+#define MADERA_DFC1_TX_DATA_WIDTH_WIDTH			     5
+
+#define MADERA_DFC1_TX_DATA_TYPE_MASK			0x0007
+#define MADERA_DFC1_TX_DATA_TYPE_SHIFT			     0
+#define MADERA_DFC1_TX_DATA_TYPE_WIDTH			     3
+
+/* (0x1600)  ADSP2_IRQ0 */
+#define MADERA_DSP_IRQ2					0x0002
+#define MADERA_DSP_IRQ1					0x0001
+
+/* (0x1601)  ADSP2_IRQ1 */
+#define MADERA_DSP_IRQ4					0x0002
+#define MADERA_DSP_IRQ3					0x0001
+
+/* (0x1602)  ADSP2_IRQ2 */
+#define MADERA_DSP_IRQ6					0x0002
+#define MADERA_DSP_IRQ5					0x0001
+
+/* (0x1603)  ADSP2_IRQ3 */
+#define MADERA_DSP_IRQ8					0x0002
+#define MADERA_DSP_IRQ7					0x0001
+
+/* (0x1604)  ADSP2_IRQ4 */
+#define MADERA_DSP_IRQ10				0x0002
+#define MADERA_DSP_IRQ9					0x0001
+
+/* (0x1605)  ADSP2_IRQ5 */
+#define MADERA_DSP_IRQ12				0x0002
+#define MADERA_DSP_IRQ11				0x0001
+
+/* (0x1606)  ADSP2_IRQ6 */
+#define MADERA_DSP_IRQ14				0x0002
+#define MADERA_DSP_IRQ13				0x0001
+
+/* (0x1607)  ADSP2_IRQ7 */
+#define MADERA_DSP_IRQ16				0x0002
+#define MADERA_DSP_IRQ15				0x0001
+
+/* (0x1700)  GPIO1_CTRL_1 */
+#define MADERA_GP1_LVL					0x8000
+#define MADERA_GP1_LVL_MASK				0x8000
+#define MADERA_GP1_LVL_SHIFT				    15
+#define MADERA_GP1_LVL_WIDTH				     1
+#define MADERA_GP1_OP_CFG				0x4000
+#define MADERA_GP1_OP_CFG_MASK				0x4000
+#define MADERA_GP1_OP_CFG_SHIFT				    14
+#define MADERA_GP1_OP_CFG_WIDTH				     1
+#define MADERA_GP1_DB					0x2000
+#define MADERA_GP1_DB_MASK				0x2000
+#define MADERA_GP1_DB_SHIFT				    13
+#define MADERA_GP1_DB_WIDTH				     1
+#define MADERA_GP1_POL					0x1000
+#define MADERA_GP1_POL_MASK				0x1000
+#define MADERA_GP1_POL_SHIFT				    12
+#define MADERA_GP1_POL_WIDTH				     1
+#define MADERA_GP1_IP_CFG				0x0800
+#define MADERA_GP1_IP_CFG_MASK				0x0800
+#define MADERA_GP1_IP_CFG_SHIFT				    11
+#define MADERA_GP1_IP_CFG_WIDTH				     1
+#define MADERA_GP1_FN_MASK				0x03FF
+#define MADERA_GP1_FN_SHIFT				     0
+#define MADERA_GP1_FN_WIDTH				    10
+
+/* (0x1701)  GPIO1_CTRL_2 */
+#define MADERA_GP1_DIR					0x8000
+#define MADERA_GP1_DIR_MASK				0x8000
+#define MADERA_GP1_DIR_SHIFT				    15
+#define MADERA_GP1_DIR_WIDTH				     1
+#define MADERA_GP1_PU					0x4000
+#define MADERA_GP1_PU_MASK				0x4000
+#define MADERA_GP1_PU_SHIFT				    14
+#define MADERA_GP1_PU_WIDTH				     1
+#define MADERA_GP1_PD					0x2000
+#define MADERA_GP1_PD_MASK				0x2000
+#define MADERA_GP1_PD_SHIFT				    13
+#define MADERA_GP1_PD_WIDTH				     1
+#define MADERA_GP1_DRV_STR_MASK				0x1800
+#define MADERA_GP1_DRV_STR_SHIFT			    11
+#define MADERA_GP1_DRV_STR_WIDTH			     2
+
+/* (0x1800)  IRQ1_Status_1 */
+#define MADERA_CTRLIF_ERR_EINT1				0x1000
+#define MADERA_CTRLIF_ERR_EINT1_MASK			0x1000
+#define MADERA_CTRLIF_ERR_EINT1_SHIFT			    12
+#define MADERA_CTRLIF_ERR_EINT1_WIDTH			     1
+#define MADERA_SYSCLK_FAIL_EINT1			0x0200
+#define MADERA_SYSCLK_FAIL_EINT1_MASK			0x0200
+#define MADERA_SYSCLK_FAIL_EINT1_SHIFT			     9
+#define MADERA_SYSCLK_FAIL_EINT1_WIDTH			     1
+#define MADERA_CLOCK_DETECT_EINT1			0x0100
+#define MADERA_CLOCK_DETECT_EINT1_MASK			0x0100
+#define MADERA_CLOCK_DETECT_EINT1_SHIFT			     8
+#define MADERA_CLOCK_DETECT_EINT1_WIDTH			     1
+#define MADERA_BOOT_DONE_EINT1				0x0080
+#define MADERA_BOOT_DONE_EINT1_MASK			0x0080
+#define MADERA_BOOT_DONE_EINT1_SHIFT			     7
+#define MADERA_BOOT_DONE_EINT1_WIDTH			     1
+
+/* (0x1801)  IRQ1_Status_2 */
+#define MADERA_FLLAO_LOCK_EINT1				0x0800
+#define MADERA_FLLAO_LOCK_EINT1_MASK			0x0800
+#define MADERA_FLLAO_LOCK_EINT1_SHIFT			    11
+#define MADERA_FLLAO_LOCK_EINT1_WIDTH			     1
+#define MADERA_FLL3_LOCK_EINT1				0x0400
+#define MADERA_FLL3_LOCK_EINT1_MASK			0x0400
+#define MADERA_FLL3_LOCK_EINT1_SHIFT			    10
+#define MADERA_FLL3_LOCK_EINT1_WIDTH			     1
+#define MADERA_FLL2_LOCK_EINT1				0x0200
+#define MADERA_FLL2_LOCK_EINT1_MASK			0x0200
+#define MADERA_FLL2_LOCK_EINT1_SHIFT			     9
+#define MADERA_FLL2_LOCK_EINT1_WIDTH			     1
+#define MADERA_FLL1_LOCK_EINT1				0x0100
+#define MADERA_FLL1_LOCK_EINT1_MASK			0x0100
+#define MADERA_FLL1_LOCK_EINT1_SHIFT			     8
+#define MADERA_FLL1_LOCK_EINT1_WIDTH			     1
+
+/* (0x1805)  IRQ1_Status_6 */
+#define MADERA_MICDET2_EINT1				0x0200
+#define MADERA_MICDET2_EINT1_MASK			0x0200
+#define MADERA_MICDET2_EINT1_SHIFT			     9
+#define MADERA_MICDET2_EINT1_WIDTH			     1
+#define MADERA_MICDET1_EINT1				0x0100
+#define MADERA_MICDET1_EINT1_MASK			0x0100
+#define MADERA_MICDET1_EINT1_SHIFT			     8
+#define MADERA_MICDET1_EINT1_WIDTH			     1
+#define MADERA_HPDET_EINT1				0x0001
+#define MADERA_HPDET_EINT1_MASK				0x0001
+#define MADERA_HPDET_EINT1_SHIFT			     0
+#define MADERA_HPDET_EINT1_WIDTH			     1
+
+/* (0x1806)  IRQ1_Status_7 */
+#define MADERA_MICD_CLAMP_FALL_EINT1			0x0020
+#define MADERA_MICD_CLAMP_FALL_EINT1_MASK		0x0020
+#define MADERA_MICD_CLAMP_FALL_EINT1_SHIFT		     5
+#define MADERA_MICD_CLAMP_FALL_EINT1_WIDTH		     1
+#define MADERA_MICD_CLAMP_RISE_EINT1			0x0010
+#define MADERA_MICD_CLAMP_RISE_EINT1_MASK		0x0010
+#define MADERA_MICD_CLAMP_RISE_EINT1_SHIFT		     4
+#define MADERA_MICD_CLAMP_RISE_EINT1_WIDTH		     1
+#define MADERA_JD2_FALL_EINT1				0x0008
+#define MADERA_JD2_FALL_EINT1_MASK			0x0008
+#define MADERA_JD2_FALL_EINT1_SHIFT			     3
+#define MADERA_JD2_FALL_EINT1_WIDTH			     1
+#define MADERA_JD2_RISE_EINT1				0x0004
+#define MADERA_JD2_RISE_EINT1_MASK			0x0004
+#define MADERA_JD2_RISE_EINT1_SHIFT			     2
+#define MADERA_JD2_RISE_EINT1_WIDTH			     1
+#define MADERA_JD1_FALL_EINT1				0x0002
+#define MADERA_JD1_FALL_EINT1_MASK			0x0002
+#define MADERA_JD1_FALL_EINT1_SHIFT			     1
+#define MADERA_JD1_FALL_EINT1_WIDTH			     1
+#define MADERA_JD1_RISE_EINT1				0x0001
+#define MADERA_JD1_RISE_EINT1_MASK			0x0001
+#define MADERA_JD1_RISE_EINT1_SHIFT			     0
+#define MADERA_JD1_RISE_EINT1_WIDTH			     1
+
+/* (0x1808)  IRQ1_Status_9 */
+#define MADERA_ASRC2_IN2_LOCK_EINT1			0x0800
+#define MADERA_ASRC2_IN2_LOCK_EINT1_MASK		0x0800
+#define MADERA_ASRC2_IN2_LOCK_EINT1_SHIFT		    11
+#define MADERA_ASRC2_IN2_LOCK_EINT1_WIDTH		     1
+#define MADERA_ASRC2_IN1_LOCK_EINT1			0x0400
+#define MADERA_ASRC2_IN1_LOCK_EINT1_MASK		0x0400
+#define MADERA_ASRC2_IN1_LOCK_EINT1_SHIFT		    10
+#define MADERA_ASRC2_IN1_LOCK_EINT1_WIDTH		     1
+#define MADERA_ASRC1_IN2_LOCK_EINT1			0x0200
+#define MADERA_ASRC1_IN2_LOCK_EINT1_MASK		0x0200
+#define MADERA_ASRC1_IN2_LOCK_EINT1_SHIFT		     9
+#define MADERA_ASRC1_IN2_LOCK_EINT1_WIDTH		     1
+#define MADERA_ASRC1_IN1_LOCK_EINT1			0x0100
+#define MADERA_ASRC1_IN1_LOCK_EINT1_MASK		0x0100
+#define MADERA_ASRC1_IN1_LOCK_EINT1_SHIFT		     8
+#define MADERA_ASRC1_IN1_LOCK_EINT1_WIDTH		     1
+#define MADERA_DRC2_SIG_DET_EINT1			0x0002
+#define MADERA_DRC2_SIG_DET_EINT1_MASK			0x0002
+#define MADERA_DRC2_SIG_DET_EINT1_SHIFT			     1
+#define MADERA_DRC2_SIG_DET_EINT1_WIDTH			     1
+#define MADERA_DRC1_SIG_DET_EINT1			0x0001
+#define MADERA_DRC1_SIG_DET_EINT1_MASK			0x0001
+#define MADERA_DRC1_SIG_DET_EINT1_SHIFT			     0
+#define MADERA_DRC1_SIG_DET_EINT1_WIDTH			     1
+
+/* (0x180A)  IRQ1_Status_11 */
+#define MADERA_DSP_IRQ16_EINT1				0x8000
+#define MADERA_DSP_IRQ16_EINT1_MASK			0x8000
+#define MADERA_DSP_IRQ16_EINT1_SHIFT			    15
+#define MADERA_DSP_IRQ16_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ15_EINT1				0x4000
+#define MADERA_DSP_IRQ15_EINT1_MASK			0x4000
+#define MADERA_DSP_IRQ15_EINT1_SHIFT			    14
+#define MADERA_DSP_IRQ15_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ14_EINT1				0x2000
+#define MADERA_DSP_IRQ14_EINT1_MASK			0x2000
+#define MADERA_DSP_IRQ14_EINT1_SHIFT			    13
+#define MADERA_DSP_IRQ14_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ13_EINT1				0x1000
+#define MADERA_DSP_IRQ13_EINT1_MASK			0x1000
+#define MADERA_DSP_IRQ13_EINT1_SHIFT			    12
+#define MADERA_DSP_IRQ13_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ12_EINT1				0x0800
+#define MADERA_DSP_IRQ12_EINT1_MASK			0x0800
+#define MADERA_DSP_IRQ12_EINT1_SHIFT			    11
+#define MADERA_DSP_IRQ12_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ11_EINT1				0x0400
+#define MADERA_DSP_IRQ11_EINT1_MASK			0x0400
+#define MADERA_DSP_IRQ11_EINT1_SHIFT			    10
+#define MADERA_DSP_IRQ11_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ10_EINT1				0x0200
+#define MADERA_DSP_IRQ10_EINT1_MASK			0x0200
+#define MADERA_DSP_IRQ10_EINT1_SHIFT			     9
+#define MADERA_DSP_IRQ10_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ9_EINT1				0x0100
+#define MADERA_DSP_IRQ9_EINT1_MASK			0x0100
+#define MADERA_DSP_IRQ9_EINT1_SHIFT			     8
+#define MADERA_DSP_IRQ9_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ8_EINT1				0x0080
+#define MADERA_DSP_IRQ8_EINT1_MASK			0x0080
+#define MADERA_DSP_IRQ8_EINT1_SHIFT			     7
+#define MADERA_DSP_IRQ8_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ7_EINT1				0x0040
+#define MADERA_DSP_IRQ7_EINT1_MASK			0x0040
+#define MADERA_DSP_IRQ7_EINT1_SHIFT			     6
+#define MADERA_DSP_IRQ7_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ6_EINT1				0x0020
+#define MADERA_DSP_IRQ6_EINT1_MASK			0x0020
+#define MADERA_DSP_IRQ6_EINT1_SHIFT			     5
+#define MADERA_DSP_IRQ6_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ5_EINT1				0x0010
+#define MADERA_DSP_IRQ5_EINT1_MASK			0x0010
+#define MADERA_DSP_IRQ5_EINT1_SHIFT			     4
+#define MADERA_DSP_IRQ5_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ4_EINT1				0x0008
+#define MADERA_DSP_IRQ4_EINT1_MASK			0x0008
+#define MADERA_DSP_IRQ4_EINT1_SHIFT			     3
+#define MADERA_DSP_IRQ4_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ3_EINT1				0x0004
+#define MADERA_DSP_IRQ3_EINT1_MASK			0x0004
+#define MADERA_DSP_IRQ3_EINT1_SHIFT			     2
+#define MADERA_DSP_IRQ3_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ2_EINT1				0x0002
+#define MADERA_DSP_IRQ2_EINT1_MASK			0x0002
+#define MADERA_DSP_IRQ2_EINT1_SHIFT			     1
+#define MADERA_DSP_IRQ2_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ1_EINT1				0x0001
+#define MADERA_DSP_IRQ1_EINT1_MASK			0x0001
+#define MADERA_DSP_IRQ1_EINT1_SHIFT			     0
+#define MADERA_DSP_IRQ1_EINT1_WIDTH			     1
+
+/* (0x180B)  IRQ1_Status_12 */
+#define MADERA_SPKOUTR_SC_EINT1				0x0080
+#define MADERA_SPKOUTR_SC_EINT1_MASK			0x0080
+#define MADERA_SPKOUTR_SC_EINT1_SHIFT			     7
+#define MADERA_SPKOUTR_SC_EINT1_WIDTH			     1
+#define MADERA_SPKOUTL_SC_EINT1				0x0040
+#define MADERA_SPKOUTL_SC_EINT1_MASK			0x0040
+#define MADERA_SPKOUTL_SC_EINT1_SHIFT			     6
+#define MADERA_SPKOUTL_SC_EINT1_WIDTH			     1
+#define MADERA_HP3R_SC_EINT1				0x0020
+#define MADERA_HP3R_SC_EINT1_MASK			0x0020
+#define MADERA_HP3R_SC_EINT1_SHIFT			     5
+#define MADERA_HP3R_SC_EINT1_WIDTH			     1
+#define MADERA_HP3L_SC_EINT1				0x0010
+#define MADERA_HP3L_SC_EINT1_MASK			0x0010
+#define MADERA_HP3L_SC_EINT1_SHIFT			     4
+#define MADERA_HP3L_SC_EINT1_WIDTH			     1
+#define MADERA_HP2R_SC_EINT1				0x0008
+#define MADERA_HP2R_SC_EINT1_MASK			0x0008
+#define MADERA_HP2R_SC_EINT1_SHIFT			     3
+#define MADERA_HP2R_SC_EINT1_WIDTH			     1
+#define MADERA_HP2L_SC_EINT1				0x0004
+#define MADERA_HP2L_SC_EINT1_MASK			0x0004
+#define MADERA_HP2L_SC_EINT1_SHIFT			     2
+#define MADERA_HP2L_SC_EINT1_WIDTH			     1
+#define MADERA_HP1R_SC_EINT1				0x0002
+#define MADERA_HP1R_SC_EINT1_MASK			0x0002
+#define MADERA_HP1R_SC_EINT1_SHIFT			     1
+#define MADERA_HP1R_SC_EINT1_WIDTH			     1
+#define MADERA_HP1L_SC_EINT1				0x0001
+#define MADERA_HP1L_SC_EINT1_MASK			0x0001
+#define MADERA_HP1L_SC_EINT1_SHIFT			     0
+#define MADERA_HP1L_SC_EINT1_WIDTH			     1
+
+/* (0x180E)  IRQ1_Status_15 */
+#define MADERA_SPK_OVERHEAT_WARN_EINT1			0x0004
+#define MADERA_SPK_OVERHEAT_WARN_EINT1_MASK		0x0004
+#define MADERA_SPK_OVERHEAT_WARN_EINT1_SHIFT		     2
+#define MADERA_SPK_OVERHEAT_WARN_EINT1_WIDTH		     1
+#define MADERA_SPK_OVERHEAT_EINT1			0x0002
+#define MADERA_SPK_OVERHEAT_EINT1_MASK			0x0002
+#define MADERA_SPK_OVERHEAT_EINT1_SHIFT			     1
+#define MADERA_SPK_OVERHEAT_EINT1_WIDTH			     1
+#define MADERA_SPK_SHUTDOWN_EINT1			0x0001
+#define MADERA_SPK_SHUTDOWN_EINT1_MASK			0x0001
+#define MADERA_SPK_SHUTDOWN_EINT1_SHIFT			     0
+#define MADERA_SPK_SHUTDOWN_EINT1_WIDTH			     1
+
+/* (0x1820) - IRQ1 Status 33 */
+#define MADERA_DSP7_BUS_ERR_EINT1			0x0040
+#define MADERA_DSP7_BUS_ERR_EINT1_MASK			0x0040
+#define MADERA_DSP7_BUS_ERR_EINT1_SHIFT			     6
+#define MADERA_DSP7_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP6_BUS_ERR_EINT1			0x0020
+#define MADERA_DSP6_BUS_ERR_EINT1_MASK			0x0020
+#define MADERA_DSP6_BUS_ERR_EINT1_SHIFT			     5
+#define MADERA_DSP6_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP5_BUS_ERR_EINT1			0x0010
+#define MADERA_DSP5_BUS_ERR_EINT1_MASK			0x0010
+#define MADERA_DSP5_BUS_ERR_EINT1_SHIFT			     4
+#define MADERA_DSP5_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP4_BUS_ERR_EINT1			0x0008
+#define MADERA_DSP4_BUS_ERR_EINT1_MASK			0x0008
+#define MADERA_DSP4_BUS_ERR_EINT1_SHIFT			     3
+#define MADERA_DSP4_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP3_BUS_ERR_EINT1			0x0004
+#define MADERA_DSP3_BUS_ERR_EINT1_MASK			0x0004
+#define MADERA_DSP3_BUS_ERR_EINT1_SHIFT			     2
+#define MADERA_DSP3_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP2_BUS_ERR_EINT1			0x0002
+#define MADERA_DSP2_BUS_ERR_EINT1_MASK			0x0002
+#define MADERA_DSP2_BUS_ERR_EINT1_SHIFT			     1
+#define MADERA_DSP2_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP1_BUS_ERR_EINT1			0x0001
+#define MADERA_DSP1_BUS_ERR_EINT1_MASK			0x0001
+#define MADERA_DSP1_BUS_ERR_EINT1_SHIFT			     0
+#define MADERA_DSP1_BUS_ERR_EINT1_WIDTH			     1
+
+/* (0x184E)  IRQ1_Mask_15 */
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1		0x0004
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_MASK		0x0004
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_SHIFT		     2
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_WIDTH		     1
+#define MADERA_IM_SPK_OVERHEAT_EINT1			0x0002
+#define MADERA_IM_SPK_OVERHEAT_EINT1_MASK		0x0002
+#define MADERA_IM_SPK_OVERHEAT_EINT1_SHIFT		     1
+#define MADERA_IM_SPK_OVERHEAT_EINT1_WIDTH		     1
+#define MADERA_IM_SPK_SHUTDOWN_EINT1			0x0001
+#define MADERA_IM_SPK_SHUTDOWN_EINT1_MASK		0x0001
+#define MADERA_IM_SPK_SHUTDOWN_EINT1_SHIFT		     0
+#define MADERA_IM_SPK_SHUTDOWN_EINT1_WIDTH		     1
+
+/* (0x1880) - IRQ1 Raw Status 1 */
+#define MADERA_CTRLIF_ERR_STS1				0x1000
+#define MADERA_CTRLIF_ERR_STS1_MASK			0x1000
+#define MADERA_CTRLIF_ERR_STS1_SHIFT			    12
+#define MADERA_CTRLIF_ERR_STS1_WIDTH			     1
+#define MADERA_SYSCLK_FAIL_STS1				0x0200
+#define MADERA_SYSCLK_FAIL_STS1_MASK			0x0200
+#define MADERA_SYSCLK_FAIL_STS1_SHIFT			     9
+#define MADERA_SYSCLK_FAIL_STS1_WIDTH			     1
+#define MADERA_CLOCK_DETECT_STS1			0x0100
+#define MADERA_CLOCK_DETECT_STS1_MASK			0x0100
+#define MADERA_CLOCK_DETECT_STS1_SHIFT			     8
+#define MADERA_CLOCK_DETECT_STS1_WIDTH			     1
+#define MADERA_BOOT_DONE_STS1				0x0080
+#define MADERA_BOOT_DONE_STS1_MASK			0x0080
+#define MADERA_BOOT_DONE_STS1_SHIFT			     7
+#define MADERA_BOOT_DONE_STS1_WIDTH			     1
+
+/* (0x1881) - IRQ1 Raw Status 2 */
+#define MADERA_FLL3_LOCK_STS1				0x0400
+#define MADERA_FLL3_LOCK_STS1_MASK			0x0400
+#define MADERA_FLL3_LOCK_STS1_SHIFT			    10
+#define MADERA_FLL3_LOCK_STS1_WIDTH			     1
+#define MADERA_FLL2_LOCK_STS1				0x0200
+#define MADERA_FLL2_LOCK_STS1_MASK			0x0200
+#define MADERA_FLL2_LOCK_STS1_SHIFT			     9
+#define MADERA_FLL2_LOCK_STS1_WIDTH			     1
+#define MADERA_FLL1_LOCK_STS1				0x0100
+#define MADERA_FLL1_LOCK_STS1_MASK			0x0100
+#define MADERA_FLL1_LOCK_STS1_SHIFT			     8
+#define MADERA_FLL1_LOCK_STS1_WIDTH			     1
+
+/* (0x1886) - IRQ1 Raw Status 7 */
+#define MADERA_MICD_CLAMP_FALL_STS1			0x0020
+#define MADERA_MICD_CLAMP_FALL_STS1_MASK		0x0020
+#define MADERA_MICD_CLAMP_FALL_STS1_SHIFT		     5
+#define MADERA_MICD_CLAMP_FALL_STS1_WIDTH		     1
+#define MADERA_MICD_CLAMP_RISE_STS1			0x0010
+#define MADERA_MICD_CLAMP_RISE_STS1_MASK		0x0010
+#define MADERA_MICD_CLAMP_RISE_STS1_SHIFT		     4
+#define MADERA_MICD_CLAMP_RISE_STS1_WIDTH		     1
+#define MADERA_JD2_FALL_STS1				0x0008
+#define MADERA_JD2_FALL_STS1_MASK			0x0008
+#define MADERA_JD2_FALL_STS1_SHIFT			     3
+#define MADERA_JD2_FALL_STS1_WIDTH			     1
+#define MADERA_JD2_RISE_STS1				0x0004
+#define MADERA_JD2_RISE_STS1_MASK			0x0004
+#define MADERA_JD2_RISE_STS1_SHIFT			     2
+#define MADERA_JD2_RISE_STS1_WIDTH			     1
+#define MADERA_JD1_FALL_STS1				0x0002
+#define MADERA_JD1_FALL_STS1_MASK			0x0002
+#define MADERA_JD1_FALL_STS1_SHIFT			     1
+#define MADERA_JD1_FALL_STS1_WIDTH			     1
+#define MADERA_JD1_RISE_STS1				0x0001
+#define MADERA_JD1_RISE_STS1_MASK			0x0001
+#define MADERA_JD1_RISE_STS1_SHIFT			     0
+#define MADERA_JD1_RISE_STS1_WIDTH			     1
+
+/* (0x188E) - IRQ1 Raw Status 15 */
+#define MADERA_SPK_OVERHEAT_WARN_STS1			0x0004
+#define MADERA_SPK_OVERHEAT_WARN_STS1_MASK		0x0004
+#define MADERA_SPK_OVERHEAT_WARN_STS1_SHIFT		     2
+#define MADERA_SPK_OVERHEAT_WARN_STS1_WIDTH		     1
+#define MADERA_SPK_OVERHEAT_STS1			0x0002
+#define MADERA_SPK_OVERHEAT_STS1_MASK			0x0002
+#define MADERA_SPK_OVERHEAT_STS1_SHIFT			     1
+#define MADERA_SPK_OVERHEAT_STS1_WIDTH			     1
+#define MADERA_SPK_SHUTDOWN_STS1			0x0001
+#define MADERA_SPK_SHUTDOWN_STS1_MASK			0x0001
+#define MADERA_SPK_SHUTDOWN_STS1_SHIFT			     0
+#define MADERA_SPK_SHUTDOWN_STS1_WIDTH			     1
+
+/* (0x1A06)  Interrupt_Debounce_7 */
+#define MADERA_MICD_CLAMP_DB				0x0010
+#define MADERA_MICD_CLAMP_DB_MASK			0x0010
+#define MADERA_MICD_CLAMP_DB_SHIFT			     4
+#define MADERA_MICD_CLAMP_DB_WIDTH			     1
+#define MADERA_JD2_DB					0x0004
+#define MADERA_JD2_DB_MASK				0x0004
+#define MADERA_JD2_DB_SHIFT				     2
+#define MADERA_JD2_DB_WIDTH				     1
+#define MADERA_JD1_DB					0x0001
+#define MADERA_JD1_DB_MASK				0x0001
+#define MADERA_JD1_DB_SHIFT				     0
+#define MADERA_JD1_DB_WIDTH				     1
+
+/* (0x1A0E)  Interrupt_Debounce_15 */
+#define MADERA_SPK_OVERHEAT_WARN_DB			0x0004
+#define MADERA_SPK_OVERHEAT_WARN_DB_MASK		0x0004
+#define MADERA_SPK_OVERHEAT_WARN_DB_SHIFT		     2
+#define MADERA_SPK_OVERHEAT_WARN_DB_WIDTH		     1
+#define MADERA_SPK_OVERHEAT_DB				0x0002
+#define MADERA_SPK_OVERHEAT_DB_MASK			0x0002
+#define MADERA_SPK_OVERHEAT_DB_SHIFT			     1
+#define MADERA_SPK_OVERHEAT_DB_WIDTH			     1
+
+/* (0x1A80)  IRQ1_CTRL */
+#define MADERA_IM_IRQ1					0x0800
+#define MADERA_IM_IRQ1_MASK				0x0800
+#define MADERA_IM_IRQ1_SHIFT				    11
+#define MADERA_IM_IRQ1_WIDTH				     1
+#define MADERA_IRQ_POL					0x0400
+#define MADERA_IRQ_POL_MASK				0x0400
+#define MADERA_IRQ_POL_SHIFT				    10
+#define MADERA_IRQ_POL_WIDTH				     1
+
+/* (0x20004)  OTP_HPDET_Cal_1 */
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11	    0xFF000000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11_MASK	    0xFF000000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11_SHIFT		    24
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11_WIDTH		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10	    0x00FF0000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10_MASK	    0x00FF0000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10_SHIFT		    16
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10_WIDTH		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01	    0x0000FF00
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01_MASK	    0x0000FF00
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01_SHIFT		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01_WIDTH		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00	    0x000000FF
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00_MASK	    0x000000FF
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00_SHIFT		     0
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00_WIDTH		     8
+
+/* (0x20006)  OTP_HPDET_Cal_2 */
+#define MADERA_OTP_HPDET_GRADIENT_1X		    0x0000FF00
+#define MADERA_OTP_HPDET_GRADIENT_1X_MASK	    0x0000FF00
+#define MADERA_OTP_HPDET_GRADIENT_1X_SHIFT		     8
+#define MADERA_OTP_HPDET_GRADIENT_1X_WIDTH		     8
+#define MADERA_OTP_HPDET_GRADIENT_0X		    0x000000FF
+#define MADERA_OTP_HPDET_GRADIENT_0X_MASK	    0x000000FF
+#define MADERA_OTP_HPDET_GRADIENT_0X_SHIFT		     0
+#define MADERA_OTP_HPDET_GRADIENT_0X_WIDTH		     8
+
+#endif
-- 
cgit v1.2.3


From 16b27467f46c1e0dbf093f53971aeb5decbaff4e Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 21 May 2018 10:59:56 +0100
Subject: mfd: madera: Add common support for Cirrus Logic Madera codecs

This adds the generic core support for Cirrus Logic "Madera" class codecs.
These are complex audio codec SoCs with a variety of digital and analogue
I/O, onboard audio processing and DSPs, and other features.

These codecs are all based off a common set of hardware IP so can be
supported by a core of common code (with a few minor device-to-device
variations).

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Nikesh Oswal <Nikesh.Oswal@cirrus.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 MAINTAINERS                      |   3 +
 drivers/mfd/Kconfig              |  29 ++
 drivers/mfd/Makefile             |   5 +
 drivers/mfd/madera-core.c        | 609 +++++++++++++++++++++++++++++++++++++++
 drivers/mfd/madera-i2c.c         | 140 +++++++++
 drivers/mfd/madera-spi.c         | 139 +++++++++
 drivers/mfd/madera.h             |  44 +++
 include/linux/mfd/madera/core.h  | 187 ++++++++++++
 include/linux/mfd/madera/pdata.h |  59 ++++
 9 files changed, 1215 insertions(+)
 create mode 100644 drivers/mfd/madera-core.c
 create mode 100644 drivers/mfd/madera-i2c.c
 create mode 100644 drivers/mfd/madera-spi.c
 create mode 100644 drivers/mfd/madera.h
 create mode 100644 include/linux/mfd/madera/core.h
 create mode 100644 include/linux/mfd/madera/pdata.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 6e5bb62acea2..7b8e857d6b33 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3487,7 +3487,10 @@ L:	patches@opensource.cirrus.com
 T:	git https://github.com/CirrusLogic/linux-drivers.git
 W:	https://github.com/CirrusLogic/linux-drivers/wiki
 S:	Supported
+F:	Documentation/devicetree/bindings/mfd/madera.txt
 F:	include/linux/mfd/madera/*
+F:	drivers/mfd/madera*
+F:	drivers/mfd/cs47l*
 
 CLEANCACHE API
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b860eb5aa194..f5ca392f8bc2 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -232,6 +232,35 @@ config MFD_CROS_EC_CHARDEV
           If you have a supported Chromebook, choose Y or M here.
           The module will be called cros_ec_dev.
 
+config MFD_MADERA
+	tristate "Cirrus Logic Madera codecs"
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_IRQ
+	select MADERA_IRQ
+	select PINCTRL
+	select PINCTRL_MADERA
+	help
+	  Support for the Cirrus Logic Madera platform audio codecs
+
+config MFD_MADERA_I2C
+	tristate "Cirrus Logic Madera codecs with I2C"
+	depends on MFD_MADERA
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  Support for the Cirrus Logic Madera platform audio SoC
+	  core functionality controlled via I2C.
+
+config MFD_MADERA_SPI
+	tristate "Cirrus Logic Madera codecs with SPI"
+	depends on MFD_MADERA
+	depends on SPI_MASTER
+	select REGMAP_SPI
+	help
+	  Support for the Cirrus Logic Madera platform audio SoC
+	  core functionality controlled via SPI.
+
 config MFD_ASIC3
 	bool "Compaq ASIC3"
 	depends on GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d9d2cf0d32ef..0a89a6a6d793 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -73,6 +73,11 @@ wm8994-objs			:= wm8994-core.o wm8994-irq.o wm8994-regmap.o
 obj-$(CONFIG_MFD_WM8994)	+= wm8994.o
 obj-$(CONFIG_MFD_WM97xx)	+= wm97xx-core.o
 
+madera-objs			:= madera-core.o
+obj-$(CONFIG_MFD_MADERA)	+= madera.o
+obj-$(CONFIG_MFD_MADERA_I2C)	+= madera-i2c.o
+obj-$(CONFIG_MFD_MADERA_SPI)	+= madera-spi.o
+
 obj-$(CONFIG_TPS6105X)		+= tps6105x.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_TPS6507X)		+= tps6507x.o
diff --git a/drivers/mfd/madera-core.c b/drivers/mfd/madera-core.c
new file mode 100644
index 000000000000..8cfea969b060
--- /dev/null
+++ b/drivers/mfd/madera-core.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Core MFD support for Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+
+#include <linux/mfd/madera/core.h>
+#include <linux/mfd/madera/registers.h>
+
+#include "madera.h"
+
+#define CS47L35_SILICON_ID	0x6360
+#define CS47L85_SILICON_ID	0x6338
+#define CS47L90_SILICON_ID	0x6364
+
+#define MADERA_32KZ_MCLK2	1
+
+static const char * const madera_core_supplies[] = {
+	"AVDD",
+	"DBVDD1",
+};
+
+static const struct mfd_cell madera_ldo1_devs[] = {
+	{ .name = "madera-ldo1" },
+};
+
+static const char * const cs47l35_supplies[] = {
+	"MICVDD",
+	"DBVDD2",
+	"CPVDD1",
+	"CPVDD2",
+	"SPKVDD",
+};
+
+static const struct mfd_cell cs47l35_devs[] = {
+	{ .name = "madera-pinctrl", },
+	{ .name = "madera-irq", },
+	{ .name = "madera-micsupp", },
+	{ .name = "madera-gpio", },
+	{ .name = "madera-extcon", },
+	{
+		.name = "cs47l35-codec",
+		.parent_supplies = cs47l35_supplies,
+		.num_parent_supplies = ARRAY_SIZE(cs47l35_supplies),
+	},
+};
+
+static const char * const cs47l85_supplies[] = {
+	"MICVDD",
+	"DBVDD2",
+	"DBVDD3",
+	"DBVDD4",
+	"CPVDD1",
+	"CPVDD2",
+	"SPKVDDL",
+	"SPKVDDR",
+};
+
+static const struct mfd_cell cs47l85_devs[] = {
+	{ .name = "madera-pinctrl", },
+	{ .name = "madera-irq", },
+	{ .name = "madera-micsupp" },
+	{ .name = "madera-gpio", },
+	{ .name = "madera-extcon", },
+	{
+		.name = "cs47l85-codec",
+		.parent_supplies = cs47l85_supplies,
+		.num_parent_supplies = ARRAY_SIZE(cs47l85_supplies),
+	},
+};
+
+static const char * const cs47l90_supplies[] = {
+	"MICVDD",
+	"DBVDD2",
+	"DBVDD3",
+	"DBVDD4",
+	"CPVDD1",
+	"CPVDD2",
+};
+
+static const struct mfd_cell cs47l90_devs[] = {
+	{ .name = "madera-pinctrl", },
+	{ .name = "madera-irq", },
+	{ .name = "madera-micsupp", },
+	{ .name = "madera-gpio", },
+	{ .name = "madera-extcon", },
+	{
+		.name = "cs47l90-codec",
+		.parent_supplies = cs47l90_supplies,
+		.num_parent_supplies = ARRAY_SIZE(cs47l90_supplies),
+	},
+};
+
+/* Used by madera-i2c and madera-spi drivers */
+const char *madera_name_from_type(enum madera_type type)
+{
+	switch (type) {
+	case CS47L35:
+		return "CS47L35";
+	case CS47L85:
+		return "CS47L85";
+	case CS47L90:
+		return "CS47L90";
+	case CS47L91:
+		return "CS47L91";
+	case WM1840:
+		return "WM1840";
+	default:
+		return "Unknown";
+	}
+}
+EXPORT_SYMBOL_GPL(madera_name_from_type);
+
+#define MADERA_BOOT_POLL_MAX_INTERVAL_US  5000
+#define MADERA_BOOT_POLL_TIMEOUT_US	 25000
+
+static int madera_wait_for_boot(struct madera *madera)
+{
+	unsigned int val;
+	int ret;
+
+	/*
+	 * We can't use an interrupt as we need to runtime resume to do so,
+	 * so we poll the status bit. This won't race with the interrupt
+	 * handler because it will be blocked on runtime resume.
+	 */
+	ret = regmap_read_poll_timeout(madera->regmap,
+				       MADERA_IRQ1_RAW_STATUS_1,
+				       val,
+				       (val & MADERA_BOOT_DONE_STS1),
+				       MADERA_BOOT_POLL_MAX_INTERVAL_US,
+				       MADERA_BOOT_POLL_TIMEOUT_US);
+
+	if (ret)
+		dev_err(madera->dev, "Polling BOOT_DONE_STS failed: %d\n", ret);
+
+	/*
+	 * BOOT_DONE defaults to unmasked on boot so we must ack it.
+	 * Do this unconditionally to avoid interrupt storms.
+	 */
+	regmap_write(madera->regmap, MADERA_IRQ1_STATUS_1,
+		     MADERA_BOOT_DONE_EINT1);
+
+	pm_runtime_mark_last_busy(madera->dev);
+
+	return ret;
+}
+
+static int madera_soft_reset(struct madera *madera)
+{
+	int ret;
+
+	ret = regmap_write(madera->regmap, MADERA_SOFTWARE_RESET, 0);
+	if (ret != 0) {
+		dev_err(madera->dev, "Failed to soft reset device: %d\n", ret);
+		return ret;
+	}
+
+	/* Allow time for internal clocks to startup after reset */
+	usleep_range(1000, 2000);
+
+	return 0;
+}
+
+static void madera_enable_hard_reset(struct madera *madera)
+{
+	if (!madera->pdata.reset)
+		return;
+
+	/*
+	 * There are many existing out-of-tree users of these codecs that we
+	 * can't break so preserve the expected behaviour of setting the line
+	 * low to assert reset.
+	 */
+	gpiod_set_raw_value_cansleep(madera->pdata.reset, 0);
+}
+
+static void madera_disable_hard_reset(struct madera *madera)
+{
+	if (!madera->pdata.reset)
+		return;
+
+	gpiod_set_raw_value_cansleep(madera->pdata.reset, 1);
+	usleep_range(1000, 2000);
+}
+
+static int __maybe_unused madera_runtime_resume(struct device *dev)
+{
+	struct madera *madera = dev_get_drvdata(dev);
+	int ret;
+
+	dev_dbg(dev, "Leaving sleep mode\n");
+
+	ret = regulator_enable(madera->dcvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable DCVDD: %d\n", ret);
+		return ret;
+	}
+
+	regcache_cache_only(madera->regmap, false);
+	regcache_cache_only(madera->regmap_32bit, false);
+
+	ret = madera_wait_for_boot(madera);
+	if (ret)
+		goto err;
+
+	ret = regcache_sync(madera->regmap);
+	if (ret) {
+		dev_err(dev, "Failed to restore 16-bit register cache\n");
+		goto err;
+	}
+
+	ret = regcache_sync(madera->regmap_32bit);
+	if (ret) {
+		dev_err(dev, "Failed to restore 32-bit register cache\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	regcache_cache_only(madera->regmap_32bit, true);
+	regcache_cache_only(madera->regmap, true);
+	regulator_disable(madera->dcvdd);
+
+	return ret;
+}
+
+static int __maybe_unused madera_runtime_suspend(struct device *dev)
+{
+	struct madera *madera = dev_get_drvdata(dev);
+
+	dev_dbg(madera->dev, "Entering sleep mode\n");
+
+	regcache_cache_only(madera->regmap, true);
+	regcache_mark_dirty(madera->regmap);
+	regcache_cache_only(madera->regmap_32bit, true);
+	regcache_mark_dirty(madera->regmap_32bit);
+
+	regulator_disable(madera->dcvdd);
+
+	return 0;
+}
+
+const struct dev_pm_ops madera_pm_ops = {
+	SET_RUNTIME_PM_OPS(madera_runtime_suspend,
+			   madera_runtime_resume,
+			   NULL)
+};
+EXPORT_SYMBOL_GPL(madera_pm_ops);
+
+const struct of_device_id madera_of_match[] = {
+	{ .compatible = "cirrus,cs47l35", .data = (void *)CS47L35 },
+	{ .compatible = "cirrus,cs47l85", .data = (void *)CS47L85 },
+	{ .compatible = "cirrus,cs47l90", .data = (void *)CS47L90 },
+	{ .compatible = "cirrus,cs47l91", .data = (void *)CS47L91 },
+	{ .compatible = "cirrus,wm1840", .data = (void *)WM1840 },
+	{}
+};
+EXPORT_SYMBOL_GPL(madera_of_match);
+
+static int madera_get_reset_gpio(struct madera *madera)
+{
+	struct gpio_desc *reset;
+	int ret;
+
+	if (madera->pdata.reset)
+		return 0;
+
+	reset = devm_gpiod_get_optional(madera->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(reset)) {
+		ret = PTR_ERR(reset);
+		if (ret != -EPROBE_DEFER)
+			dev_err(madera->dev, "Failed to request /RESET: %d\n",
+				ret);
+		return ret;
+	}
+
+	/*
+	 * A hard reset is needed for full reset of the chip. We allow running
+	 * without hard reset only because it can be useful for early
+	 * prototyping and some debugging, but we need to warn it's not ideal.
+	 */
+	if (!reset)
+		dev_warn(madera->dev,
+			 "Running without reset GPIO is not recommended\n");
+
+	madera->pdata.reset = reset;
+
+	return 0;
+}
+
+static void madera_set_micbias_info(struct madera *madera)
+{
+	/*
+	 * num_childbias is an array because future codecs can have different
+	 * childbiases for each micbias. Unspecified values default to 0.
+	 */
+	switch (madera->type) {
+	case CS47L35:
+		madera->num_micbias = 2;
+		madera->num_childbias[0] = 2;
+		madera->num_childbias[1] = 2;
+		return;
+	case CS47L85:
+	case WM1840:
+		madera->num_micbias = 4;
+		/* no child biases */
+		return;
+	case CS47L90:
+	case CS47L91:
+		madera->num_micbias = 2;
+		madera->num_childbias[0] = 4;
+		madera->num_childbias[1] = 4;
+		return;
+	default:
+		return;
+	}
+}
+
+int madera_dev_init(struct madera *madera)
+{
+	struct device *dev = madera->dev;
+	unsigned int hwid;
+	int (*patch_fn)(struct madera *) = NULL;
+	const struct mfd_cell *mfd_devs;
+	int n_devs = 0;
+	int i, ret;
+
+	dev_set_drvdata(madera->dev, madera);
+	BLOCKING_INIT_NOTIFIER_HEAD(&madera->notifier);
+	madera_set_micbias_info(madera);
+
+	/*
+	 * We need writable hw config info that all children can share.
+	 * Simplest to take one shared copy of pdata struct.
+	 */
+	if (dev_get_platdata(madera->dev)) {
+		memcpy(&madera->pdata, dev_get_platdata(madera->dev),
+		       sizeof(madera->pdata));
+	}
+
+	ret = madera_get_reset_gpio(madera);
+	if (ret)
+		return ret;
+
+	regcache_cache_only(madera->regmap, true);
+	regcache_cache_only(madera->regmap_32bit, true);
+
+	for (i = 0; i < ARRAY_SIZE(madera_core_supplies); i++)
+		madera->core_supplies[i].supply = madera_core_supplies[i];
+
+	madera->num_core_supplies = ARRAY_SIZE(madera_core_supplies);
+
+	/*
+	 * On some codecs DCVDD could be supplied by the internal LDO1.
+	 * For those we must add the LDO1 driver before requesting DCVDD
+	 * No devm_ because we need to control shutdown order of children.
+	 */
+	switch (madera->type) {
+	case CS47L35:
+	case CS47L90:
+	case CS47L91:
+		break;
+	case CS47L85:
+	case WM1840:
+		ret = mfd_add_devices(madera->dev, PLATFORM_DEVID_NONE,
+				      madera_ldo1_devs,
+				      ARRAY_SIZE(madera_ldo1_devs),
+				      NULL, 0, NULL);
+		if (ret) {
+			dev_err(dev, "Failed to add LDO1 child: %d\n", ret);
+			return ret;
+		}
+		break;
+	default:
+		/* No point continuing if the type is unknown */
+		dev_err(madera->dev, "Unknown device type %d\n", madera->type);
+		return -ENODEV;
+	}
+
+	ret = devm_regulator_bulk_get(dev, madera->num_core_supplies,
+				      madera->core_supplies);
+	if (ret) {
+		dev_err(dev, "Failed to request core supplies: %d\n", ret);
+		goto err_devs;
+	}
+
+	/*
+	 * Don't use devres here. If the regulator is one of our children it
+	 * will already have been removed before devres cleanup on this mfd
+	 * driver tries to call put() on it. We need control of shutdown order.
+	 */
+	madera->dcvdd = regulator_get(madera->dev, "DCVDD");
+	if (IS_ERR(madera->dcvdd)) {
+		ret = PTR_ERR(madera->dcvdd);
+		dev_err(dev, "Failed to request DCVDD: %d\n", ret);
+		goto err_devs;
+	}
+
+	ret = regulator_bulk_enable(madera->num_core_supplies,
+				    madera->core_supplies);
+	if (ret) {
+		dev_err(dev, "Failed to enable core supplies: %d\n", ret);
+		goto err_dcvdd;
+	}
+
+	ret = regulator_enable(madera->dcvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable DCVDD: %d\n", ret);
+		goto err_enable;
+	}
+
+	madera_disable_hard_reset(madera);
+
+	regcache_cache_only(madera->regmap, false);
+	regcache_cache_only(madera->regmap_32bit, false);
+
+	/*
+	 * Now we can power up and verify that this is a chip we know about
+	 * before we start doing any writes to its registers.
+	 */
+	ret = regmap_read(madera->regmap, MADERA_SOFTWARE_RESET, &hwid);
+	if (ret) {
+		dev_err(dev, "Failed to read ID register: %d\n", ret);
+		goto err_reset;
+	}
+
+	switch (hwid) {
+	case CS47L35_SILICON_ID:
+		if (IS_ENABLED(CONFIG_MFD_CS47L35)) {
+			switch (madera->type) {
+			case CS47L35:
+				patch_fn = cs47l35_patch;
+				mfd_devs = cs47l35_devs;
+				n_devs = ARRAY_SIZE(cs47l35_devs);
+				break;
+			default:
+				break;
+			}
+		}
+		break;
+	case CS47L85_SILICON_ID:
+		if (IS_ENABLED(CONFIG_MFD_CS47L85)) {
+			switch (madera->type) {
+			case CS47L85:
+			case WM1840:
+				patch_fn = cs47l85_patch;
+				mfd_devs = cs47l85_devs;
+				n_devs = ARRAY_SIZE(cs47l85_devs);
+				break;
+			default:
+				break;
+			}
+		}
+		break;
+	case CS47L90_SILICON_ID:
+		if (IS_ENABLED(CONFIG_MFD_CS47L90)) {
+			switch (madera->type) {
+			case CS47L90:
+			case CS47L91:
+				patch_fn = cs47l90_patch;
+				mfd_devs = cs47l90_devs;
+				n_devs = ARRAY_SIZE(cs47l90_devs);
+				break;
+			default:
+				break;
+			}
+		}
+		break;
+	default:
+		dev_err(madera->dev, "Unknown device ID: %x\n", hwid);
+		ret = -EINVAL;
+		goto err_reset;
+	}
+
+	if (!n_devs) {
+		dev_err(madera->dev, "Device ID 0x%x not a %s\n", hwid,
+			madera->type_name);
+		ret = -ENODEV;
+		goto err_reset;
+	}
+
+	/*
+	 * It looks like a device we support. If we don't have a hard reset
+	 * we can now attempt a soft reset.
+	 */
+	if (!madera->pdata.reset) {
+		ret = madera_soft_reset(madera);
+		if (ret)
+			goto err_reset;
+	}
+
+	ret = madera_wait_for_boot(madera);
+	if (ret) {
+		dev_err(madera->dev, "Device failed initial boot: %d\n", ret);
+		goto err_reset;
+	}
+
+	ret = regmap_read(madera->regmap, MADERA_HARDWARE_REVISION,
+			  &madera->rev);
+	if (ret) {
+		dev_err(dev, "Failed to read revision register: %d\n", ret);
+		goto err_reset;
+	}
+	madera->rev &= MADERA_HW_REVISION_MASK;
+
+	dev_info(dev, "%s silicon revision %d\n", madera->type_name,
+		 madera->rev);
+
+	/* Apply hardware patch */
+	if (patch_fn) {
+		ret = patch_fn(madera);
+		if (ret) {
+			dev_err(madera->dev, "Failed to apply patch %d\n", ret);
+			goto err_reset;
+		}
+	}
+
+	/* Init 32k clock sourced from MCLK2 */
+	ret = regmap_update_bits(madera->regmap,
+			MADERA_CLOCK_32K_1,
+			MADERA_CLK_32K_ENA_MASK | MADERA_CLK_32K_SRC_MASK,
+			MADERA_CLK_32K_ENA | MADERA_32KZ_MCLK2);
+	if (ret) {
+		dev_err(madera->dev, "Failed to init 32k clock: %d\n", ret);
+		goto err_reset;
+	}
+
+	pm_runtime_set_active(madera->dev);
+	pm_runtime_enable(madera->dev);
+	pm_runtime_set_autosuspend_delay(madera->dev, 100);
+	pm_runtime_use_autosuspend(madera->dev);
+
+	/* No devm_ because we need to control shutdown order of children */
+	ret = mfd_add_devices(madera->dev, PLATFORM_DEVID_NONE,
+			      mfd_devs, n_devs,
+			      NULL, 0, NULL);
+	if (ret) {
+		dev_err(madera->dev, "Failed to add subdevices: %d\n", ret);
+		goto err_pm_runtime;
+	}
+
+	return 0;
+
+err_pm_runtime:
+	pm_runtime_disable(madera->dev);
+err_reset:
+	madera_enable_hard_reset(madera);
+	regulator_disable(madera->dcvdd);
+err_enable:
+	regulator_bulk_disable(madera->num_core_supplies,
+			       madera->core_supplies);
+err_dcvdd:
+	regulator_put(madera->dcvdd);
+err_devs:
+	mfd_remove_devices(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(madera_dev_init);
+
+int madera_dev_exit(struct madera *madera)
+{
+	/* Prevent any IRQs being serviced while we clean up */
+	disable_irq(madera->irq);
+
+	/*
+	 * DCVDD could be supplied by a child node, we must disable it before
+	 * removing the children, and prevent PM runtime from turning it back on
+	 */
+	pm_runtime_disable(madera->dev);
+
+	regulator_disable(madera->dcvdd);
+	regulator_put(madera->dcvdd);
+
+	mfd_remove_devices(madera->dev);
+	madera_enable_hard_reset(madera);
+
+	regulator_bulk_disable(madera->num_core_supplies,
+			       madera->core_supplies);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(madera_dev_exit);
+
+MODULE_DESCRIPTION("Madera core MFD driver");
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/madera-i2c.c b/drivers/mfd/madera-i2c.c
new file mode 100644
index 000000000000..05ae94be01d8
--- /dev/null
+++ b/drivers/mfd/madera-i2c.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I2C bus interface to Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/madera/core.h>
+
+#include "madera.h"
+
+static int madera_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct madera *madera;
+	const struct regmap_config *regmap_16bit_config = NULL;
+	const struct regmap_config *regmap_32bit_config = NULL;
+	const void *of_data;
+	unsigned long type;
+	const char *name;
+	int ret;
+
+	of_data = of_device_get_match_data(&i2c->dev);
+	if (of_data)
+		type = (unsigned long)of_data;
+	else
+		type = id->driver_data;
+
+	switch (type) {
+	case CS47L35:
+		if (IS_ENABLED(CONFIG_MFD_CS47L35)) {
+			regmap_16bit_config = &cs47l35_16bit_i2c_regmap;
+			regmap_32bit_config = &cs47l35_32bit_i2c_regmap;
+		}
+		break;
+	case CS47L85:
+	case WM1840:
+		if (IS_ENABLED(CONFIG_MFD_CS47L85)) {
+			regmap_16bit_config = &cs47l85_16bit_i2c_regmap;
+			regmap_32bit_config = &cs47l85_32bit_i2c_regmap;
+		}
+		break;
+	case CS47L90:
+	case CS47L91:
+		if (IS_ENABLED(CONFIG_MFD_CS47L90)) {
+			regmap_16bit_config = &cs47l90_16bit_i2c_regmap;
+			regmap_32bit_config = &cs47l90_32bit_i2c_regmap;
+		}
+		break;
+	default:
+		dev_err(&i2c->dev,
+			"Unknown Madera I2C device type %ld\n", type);
+		return -EINVAL;
+	}
+
+	name = madera_name_from_type(type);
+
+	if (!regmap_16bit_config) {
+		/* it's polite to say which codec isn't built into the kernel */
+		dev_err(&i2c->dev,
+			"Kernel does not include support for %s\n", name);
+		return -EINVAL;
+	}
+
+	madera = devm_kzalloc(&i2c->dev, sizeof(*madera), GFP_KERNEL);
+	if (!madera)
+		return -ENOMEM;
+
+
+	madera->regmap = devm_regmap_init_i2c(i2c, regmap_16bit_config);
+	if (IS_ERR(madera->regmap)) {
+		ret = PTR_ERR(madera->regmap);
+		dev_err(&i2c->dev,
+			"Failed to allocate 16-bit register map: %d\n",	ret);
+		return ret;
+	}
+
+	madera->regmap_32bit = devm_regmap_init_i2c(i2c, regmap_32bit_config);
+	if (IS_ERR(madera->regmap_32bit)) {
+		ret = PTR_ERR(madera->regmap_32bit);
+		dev_err(&i2c->dev,
+			"Failed to allocate 32-bit register map: %d\n", ret);
+		return ret;
+	}
+
+	madera->type = type;
+	madera->type_name = name;
+	madera->dev = &i2c->dev;
+	madera->irq = i2c->irq;
+
+	return madera_dev_init(madera);
+}
+
+static int madera_i2c_remove(struct i2c_client *i2c)
+{
+	struct madera *madera = dev_get_drvdata(&i2c->dev);
+
+	madera_dev_exit(madera);
+
+	return 0;
+}
+
+static const struct i2c_device_id madera_i2c_id[] = {
+	{ "cs47l35", CS47L35 },
+	{ "cs47l85", CS47L85 },
+	{ "cs47l90", CS47L90 },
+	{ "cs47l91", CS47L91 },
+	{ "wm1840", WM1840 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, madera_i2c_id);
+
+static struct i2c_driver madera_i2c_driver = {
+	.driver = {
+		.name	= "madera",
+		.pm	= &madera_pm_ops,
+		.of_match_table	= of_match_ptr(madera_of_match),
+	},
+	.probe		= madera_i2c_probe,
+	.remove		= madera_i2c_remove,
+	.id_table	= madera_i2c_id,
+};
+
+module_i2c_driver(madera_i2c_driver);
+
+MODULE_DESCRIPTION("Madera I2C bus interface");
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/madera-spi.c b/drivers/mfd/madera-spi.c
new file mode 100644
index 000000000000..4c398b278bba
--- /dev/null
+++ b/drivers/mfd/madera-spi.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SPI bus interface to Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+
+#include <linux/mfd/madera/core.h>
+
+#include "madera.h"
+
+static int madera_spi_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct madera *madera;
+	const struct regmap_config *regmap_16bit_config = NULL;
+	const struct regmap_config *regmap_32bit_config = NULL;
+	const void *of_data;
+	unsigned long type;
+	const char *name;
+	int ret;
+
+	of_data = of_device_get_match_data(&spi->dev);
+	if (of_data)
+		type = (unsigned long)of_data;
+	else
+		type = id->driver_data;
+
+	switch (type) {
+	case CS47L35:
+		if (IS_ENABLED(CONFIG_MFD_CS47L35)) {
+			regmap_16bit_config = &cs47l35_16bit_spi_regmap;
+			regmap_32bit_config = &cs47l35_32bit_spi_regmap;
+		}
+		break;
+	case CS47L85:
+	case WM1840:
+		if (IS_ENABLED(CONFIG_MFD_CS47L85)) {
+			regmap_16bit_config = &cs47l85_16bit_spi_regmap;
+			regmap_32bit_config = &cs47l85_32bit_spi_regmap;
+		}
+		break;
+	case CS47L90:
+	case CS47L91:
+		if (IS_ENABLED(CONFIG_MFD_CS47L90)) {
+			regmap_16bit_config = &cs47l90_16bit_spi_regmap;
+			regmap_32bit_config = &cs47l90_32bit_spi_regmap;
+		}
+		break;
+	default:
+		dev_err(&spi->dev,
+			"Unknown Madera SPI device type %ld\n", type);
+		return -EINVAL;
+	}
+
+	name = madera_name_from_type(type);
+
+	if (!regmap_16bit_config) {
+		/* it's polite to say which codec isn't built into the kernel */
+		dev_err(&spi->dev,
+			"Kernel does not include support for %s\n", name);
+		return -EINVAL;
+	}
+
+	madera = devm_kzalloc(&spi->dev, sizeof(*madera), GFP_KERNEL);
+	if (!madera)
+		return -ENOMEM;
+
+	madera->regmap = devm_regmap_init_spi(spi, regmap_16bit_config);
+	if (IS_ERR(madera->regmap)) {
+		ret = PTR_ERR(madera->regmap);
+		dev_err(&spi->dev,
+			"Failed to allocate 16-bit register map: %d\n",	ret);
+		return ret;
+	}
+
+	madera->regmap_32bit = devm_regmap_init_spi(spi, regmap_32bit_config);
+	if (IS_ERR(madera->regmap_32bit)) {
+		ret = PTR_ERR(madera->regmap_32bit);
+		dev_err(&spi->dev,
+			"Failed to allocate 32-bit register map: %d\n",	ret);
+		return ret;
+	}
+
+	madera->type = type;
+	madera->type_name = name;
+	madera->dev = &spi->dev;
+	madera->irq = spi->irq;
+
+	return madera_dev_init(madera);
+}
+
+static int madera_spi_remove(struct spi_device *spi)
+{
+	struct madera *madera = spi_get_drvdata(spi);
+
+	madera_dev_exit(madera);
+
+	return 0;
+}
+
+static const struct spi_device_id madera_spi_ids[] = {
+	{ "cs47l35", CS47L35 },
+	{ "cs47l85", CS47L85 },
+	{ "cs47l90", CS47L90 },
+	{ "cs47l91", CS47L91 },
+	{ "wm1840", WM1840 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, madera_spi_ids);
+
+static struct spi_driver madera_spi_driver = {
+	.driver = {
+		.name	= "madera",
+		.pm	= &madera_pm_ops,
+		.of_match_table	= of_match_ptr(madera_of_match),
+	},
+	.probe		= madera_spi_probe,
+	.remove		= madera_spi_remove,
+	.id_table	= madera_spi_ids,
+};
+
+module_spi_driver(madera_spi_driver);
+
+MODULE_DESCRIPTION("Madera SPI bus interface");
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/madera.h b/drivers/mfd/madera.h
new file mode 100644
index 000000000000..891b84efb9a7
--- /dev/null
+++ b/drivers/mfd/madera.h
@@ -0,0 +1,44 @@
+/*
+ * MFD internals for Cirrus Logic Madera codecs
+ *
+ * Copyright 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MADERA_MFD_H
+#define MADERA_MFD_H
+
+#include <linux/of.h>
+#include <linux/pm.h>
+
+struct madera;
+
+extern const struct dev_pm_ops madera_pm_ops;
+extern const struct of_device_id madera_of_match[];
+
+int madera_dev_init(struct madera *madera);
+int madera_dev_exit(struct madera *madera);
+
+const char *madera_name_from_type(enum madera_type type);
+
+extern const struct regmap_config cs47l35_16bit_spi_regmap;
+extern const struct regmap_config cs47l35_32bit_spi_regmap;
+extern const struct regmap_config cs47l35_16bit_i2c_regmap;
+extern const struct regmap_config cs47l35_32bit_i2c_regmap;
+int cs47l35_patch(struct madera *madera);
+
+extern const struct regmap_config cs47l85_16bit_spi_regmap;
+extern const struct regmap_config cs47l85_32bit_spi_regmap;
+extern const struct regmap_config cs47l85_16bit_i2c_regmap;
+extern const struct regmap_config cs47l85_32bit_i2c_regmap;
+int cs47l85_patch(struct madera *madera);
+
+extern const struct regmap_config cs47l90_16bit_spi_regmap;
+extern const struct regmap_config cs47l90_32bit_spi_regmap;
+extern const struct regmap_config cs47l90_16bit_i2c_regmap;
+extern const struct regmap_config cs47l90_32bit_i2c_regmap;
+int cs47l90_patch(struct madera *madera);
+#endif
diff --git a/include/linux/mfd/madera/core.h b/include/linux/mfd/madera/core.h
new file mode 100644
index 000000000000..c332681848ef
--- /dev/null
+++ b/include/linux/mfd/madera/core.h
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MFD internals for Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#ifndef MADERA_CORE_H
+#define MADERA_CORE_H
+
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/madera/pdata.h>
+#include <linux/notifier.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+
+enum madera_type {
+	/* 0 is reserved for indicating failure to identify */
+	CS47L35 = 1,
+	CS47L85 = 2,
+	CS47L90 = 3,
+	CS47L91 = 4,
+	WM1840 = 7,
+};
+
+#define MADERA_MAX_CORE_SUPPLIES	2
+#define MADERA_MAX_GPIOS		40
+
+#define CS47L35_NUM_GPIOS		16
+#define CS47L85_NUM_GPIOS		40
+#define CS47L90_NUM_GPIOS		38
+
+#define MADERA_MAX_MICBIAS		4
+
+/* Notifier events */
+#define MADERA_NOTIFY_VOICE_TRIGGER	0x1
+#define MADERA_NOTIFY_HPDET		0x2
+#define MADERA_NOTIFY_MICDET		0x4
+
+/* GPIO Function Definitions */
+#define MADERA_GP_FN_ALTERNATE		0x00
+#define MADERA_GP_FN_GPIO		0x01
+#define MADERA_GP_FN_DSP_GPIO		0x02
+#define MADERA_GP_FN_IRQ1		0x03
+#define MADERA_GP_FN_IRQ2		0x04
+#define MADERA_GP_FN_FLL1_CLOCK		0x10
+#define MADERA_GP_FN_FLL2_CLOCK		0x11
+#define MADERA_GP_FN_FLL3_CLOCK		0x12
+#define MADERA_GP_FN_FLLAO_CLOCK	0x13
+#define MADERA_GP_FN_FLL1_LOCK		0x18
+#define MADERA_GP_FN_FLL2_LOCK		0x19
+#define MADERA_GP_FN_FLL3_LOCK		0x1A
+#define MADERA_GP_FN_FLLAO_LOCK		0x1B
+#define MADERA_GP_FN_OPCLK_OUT		0x40
+#define MADERA_GP_FN_OPCLK_ASYNC_OUT	0x41
+#define MADERA_GP_FN_PWM1		0x48
+#define MADERA_GP_FN_PWM2		0x49
+#define MADERA_GP_FN_SPDIF_OUT		0x4C
+#define MADERA_GP_FN_HEADPHONE_DET	0x50
+#define MADERA_GP_FN_MIC_DET		0x58
+#define MADERA_GP_FN_DRC1_SIGNAL_DETECT	0x80
+#define MADERA_GP_FN_DRC2_SIGNAL_DETECT	0x81
+#define MADERA_GP_FN_ASRC1_IN1_LOCK	0x88
+#define MADERA_GP_FN_ASRC1_IN2_LOCK	0x89
+#define MADERA_GP_FN_ASRC2_IN1_LOCK	0x8A
+#define MADERA_GP_FN_ASRC2_IN2_LOCK	0x8B
+#define MADERA_GP_FN_DSP_IRQ1		0xA0
+#define MADERA_GP_FN_DSP_IRQ2		0xA1
+#define MADERA_GP_FN_DSP_IRQ3		0xA2
+#define MADERA_GP_FN_DSP_IRQ4		0xA3
+#define MADERA_GP_FN_DSP_IRQ5		0xA4
+#define MADERA_GP_FN_DSP_IRQ6		0xA5
+#define MADERA_GP_FN_DSP_IRQ7		0xA6
+#define MADERA_GP_FN_DSP_IRQ8		0xA7
+#define MADERA_GP_FN_DSP_IRQ9		0xA8
+#define MADERA_GP_FN_DSP_IRQ10		0xA9
+#define MADERA_GP_FN_DSP_IRQ11		0xAA
+#define MADERA_GP_FN_DSP_IRQ12		0xAB
+#define MADERA_GP_FN_DSP_IRQ13		0xAC
+#define MADERA_GP_FN_DSP_IRQ14		0xAD
+#define MADERA_GP_FN_DSP_IRQ15		0xAE
+#define MADERA_GP_FN_DSP_IRQ16		0xAF
+#define MADERA_GP_FN_HPOUT1L_SC		0xB0
+#define MADERA_GP_FN_HPOUT1R_SC		0xB1
+#define MADERA_GP_FN_HPOUT2L_SC		0xB2
+#define MADERA_GP_FN_HPOUT2R_SC		0xB3
+#define MADERA_GP_FN_HPOUT3L_SC		0xB4
+#define MADERA_GP_FN_HPOUT4R_SC		0xB5
+#define MADERA_GP_FN_SPKOUTL_SC		0xB6
+#define MADERA_GP_FN_SPKOUTR_SC		0xB7
+#define MADERA_GP_FN_HPOUT1L_ENA	0xC0
+#define MADERA_GP_FN_HPOUT1R_ENA	0xC1
+#define MADERA_GP_FN_HPOUT2L_ENA	0xC2
+#define MADERA_GP_FN_HPOUT2R_ENA	0xC3
+#define MADERA_GP_FN_HPOUT3L_ENA	0xC4
+#define MADERA_GP_FN_HPOUT4R_ENA	0xC5
+#define MADERA_GP_FN_SPKOUTL_ENA	0xC6
+#define MADERA_GP_FN_SPKOUTR_ENA	0xC7
+#define MADERA_GP_FN_HPOUT1L_DIS	0xD0
+#define MADERA_GP_FN_HPOUT1R_DIS	0xD1
+#define MADERA_GP_FN_HPOUT2L_DIS	0xD2
+#define MADERA_GP_FN_HPOUT2R_DIS	0xD3
+#define MADERA_GP_FN_HPOUT3L_DIS	0xD4
+#define MADERA_GP_FN_HPOUT4R_DIS	0xD5
+#define MADERA_GP_FN_SPKOUTL_DIS	0xD6
+#define MADERA_GP_FN_SPKOUTR_DIS	0xD7
+#define MADERA_GP_FN_SPK_SHUTDOWN	0xE0
+#define MADERA_GP_FN_SPK_OVH_SHUTDOWN	0xE1
+#define MADERA_GP_FN_SPK_OVH_WARN	0xE2
+#define MADERA_GP_FN_TIMER1_STATUS	0x140
+#define MADERA_GP_FN_TIMER2_STATUS	0x141
+#define MADERA_GP_FN_TIMER3_STATUS	0x142
+#define MADERA_GP_FN_TIMER4_STATUS	0x143
+#define MADERA_GP_FN_TIMER5_STATUS	0x144
+#define MADERA_GP_FN_TIMER6_STATUS	0x145
+#define MADERA_GP_FN_TIMER7_STATUS	0x146
+#define MADERA_GP_FN_TIMER8_STATUS	0x147
+#define MADERA_GP_FN_EVENTLOG1_FIFO_STS	0x150
+#define MADERA_GP_FN_EVENTLOG2_FIFO_STS	0x151
+#define MADERA_GP_FN_EVENTLOG3_FIFO_STS	0x152
+#define MADERA_GP_FN_EVENTLOG4_FIFO_STS	0x153
+#define MADERA_GP_FN_EVENTLOG5_FIFO_STS	0x154
+#define MADERA_GP_FN_EVENTLOG6_FIFO_STS	0x155
+#define MADERA_GP_FN_EVENTLOG7_FIFO_STS	0x156
+#define MADERA_GP_FN_EVENTLOG8_FIFO_STS	0x157
+
+struct snd_soc_dapm_context;
+
+/*
+ * struct madera - internal data shared by the set of Madera drivers
+ *
+ * This should not be used by anything except child drivers of the Madera MFD
+ *
+ * @regmap:		pointer to the regmap instance for 16-bit registers
+ * @regmap_32bit:	pointer to the regmap instance for 32-bit registers
+ * @dev:		pointer to the MFD device
+ * @type:		type of codec
+ * @rev:		silicon revision
+ * @type_name:		display name of this codec
+ * @num_core_supplies:	number of core supply regulators
+ * @core_supplies:	list of core supplies that are always required
+ * @dcvdd:		pointer to DCVDD regulator
+ * @internal_dcvdd:	true if DCVDD is supplied from the internal LDO1
+ * @pdata:		our pdata
+ * @irq_dev:		the irqchip child driver device
+ * @irq:		host irq number from SPI or I2C configuration
+ * @out_clamp:		indicates output clamp state for each analogue output
+ * @out_shorted:	indicates short circuit state for each analogue output
+ * @hp_ena:		bitflags of enable state for the headphone outputs
+ * @num_micbias:	number of MICBIAS outputs
+ * @num_childbias:	number of child biases for each MICBIAS
+ * @dapm:		pointer to codec driver DAPM context
+ * @notifier:		notifier for signalling events to ASoC machine driver
+ */
+struct madera {
+	struct regmap *regmap;
+	struct regmap *regmap_32bit;
+
+	struct device *dev;
+
+	enum madera_type type;
+	unsigned int rev;
+	const char *type_name;
+
+	int num_core_supplies;
+	struct regulator_bulk_data core_supplies[MADERA_MAX_CORE_SUPPLIES];
+	struct regulator *dcvdd;
+	bool internal_dcvdd;
+
+	struct madera_pdata pdata;
+
+	struct device *irq_dev;
+	int irq;
+
+	unsigned int num_micbias;
+	unsigned int num_childbias[MADERA_MAX_MICBIAS];
+
+	struct snd_soc_dapm_context *dapm;
+
+	struct blocking_notifier_head notifier;
+};
+#endif
diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h
new file mode 100644
index 000000000000..0b311f39c8f4
--- /dev/null
+++ b/include/linux/mfd/madera/pdata.h
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform data for Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#ifndef MADERA_PDATA_H
+#define MADERA_PDATA_H
+
+#include <linux/kernel.h>
+#include <linux/regulator/arizona-ldo1.h>
+#include <linux/regulator/arizona-micsupp.h>
+#include <linux/regulator/machine.h>
+
+#define MADERA_MAX_MICBIAS		4
+#define MADERA_MAX_CHILD_MICBIAS	4
+
+#define MADERA_MAX_GPSW			2
+
+struct gpio_desc;
+struct pinctrl_map;
+struct madera_irqchip_pdata;
+struct madera_codec_pdata;
+
+/**
+ * struct madera_pdata - Configuration data for Madera devices
+ *
+ * @reset:	    GPIO controlling /RESET (NULL = none)
+ * @ldo1:	    Substruct of pdata for the LDO1 regulator
+ * @micvdd:	    Substruct of pdata for the MICVDD regulator
+ * @irq_flags:	    Mode for primary IRQ (defaults to active low)
+ * @gpio_base:	    Base GPIO number
+ * @gpio_configs:   Array of GPIO configurations (See Documentation/pinctrl.txt)
+ * @n_gpio_configs: Number of entries in gpio_configs
+ * @gpsw:	    General purpose switch mode setting. Depends on the external
+ *		    hardware connected to the switch. (See the SW1_MODE field
+ *		    in the datasheet for the available values for your codec)
+ */
+struct madera_pdata {
+	struct gpio_desc *reset;
+
+	struct arizona_ldo1_pdata ldo1;
+	struct arizona_micsupp_pdata micvdd;
+
+	unsigned int irq_flags;
+	int gpio_base;
+
+	const struct pinctrl_map *gpio_configs;
+	int n_gpio_configs;
+
+	u32 gpsw[MADERA_MAX_GPSW];
+};
+
+#endif
-- 
cgit v1.2.3


From 9f4a8a2d7f9d71093f41c4bb0ef8707e8145bad3 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 17 May 2018 15:58:00 +1000
Subject: fsi/sbefifo: Add driver for the SBE FIFO

This driver provides an in-kernel and a user API for accessing
the command FIFO of the SBE (Self Boot Engine) of the POWER9
processor, via the FSI bus.

It provides an in-kernel interface to submit command and receive
responses, along with a helper to locate and analyse the response
status block. It's a simple synchronous submit() type API.

The user interface uses the write/read interface that an earlier
version of this driver already provided, however it has some
specific limitations in order to keep the driver simple and
avoid using up a lot of kernel memory:

 - The user should perform a single write() with the command and
   a single read() to get the response (with a buffer big enough
   to hold the entire response).

 - On a write() the command is simply "stored" into a kernel buffer,
   it is submitted as one operation on the subsequent read(). This
   allows to have the code write directly from the FIFO into the user
   buffer and avoid hogging the SBE between the write() and read()
   syscall as it's critical that the SBE be freed asap to respond
   to the host. An extra write() will simply replace the previously
   written command.

 - A write of a single 4 bytes containing the value 0x52534554
   in big endian will trigger a reset request. No read is necessary,
   the write() call will return when the reset has been acknowledged
   or times out.

 - The command is limited to 4K bytes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Joel Stanley <joel@jms.id.au>
---
---
 drivers/fsi/Kconfig         |    7 +
 drivers/fsi/Makefile        |    1 +
 drivers/fsi/fsi-sbefifo.c   | 1005 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/fsi-sbefifo.h |   33 ++
 4 files changed, 1046 insertions(+)
 create mode 100644 drivers/fsi/fsi-sbefifo.c
 create mode 100644 include/linux/fsi-sbefifo.h

(limited to 'include/linux')

diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index a326ed663d3c..24f84a96b8b9 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -32,4 +32,11 @@ config FSI_SCOM
 	---help---
 	This option enables an FSI based SCOM device driver.
 
+config FSI_SBEFIFO
+	tristate "SBEFIFO FSI client device driver"
+	---help---
+	This option enables an FSI based SBEFIFO device driver. The SBEFIFO is
+	a pipe-like FSI device for communicating with the self boot engine
+	(SBE) on POWER processors.
+
 endif
diff --git a/drivers/fsi/Makefile b/drivers/fsi/Makefile
index 65eb99dfafdb..851182e1cd9e 100644
--- a/drivers/fsi/Makefile
+++ b/drivers/fsi/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_FSI) += fsi-core.o
 obj-$(CONFIG_FSI_MASTER_HUB) += fsi-master-hub.o
 obj-$(CONFIG_FSI_MASTER_GPIO) += fsi-master-gpio.o
 obj-$(CONFIG_FSI_SCOM) += fsi-scom.o
+obj-$(CONFIG_FSI_SBEFIFO) += fsi-sbefifo.o
diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c
new file mode 100644
index 000000000000..9b8b6b346af6
--- /dev/null
+++ b/drivers/fsi/fsi-sbefifo.c
@@ -0,0 +1,1005 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) IBM Corporation 2017
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERGCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fsi.h>
+#include <linux/fsi-sbefifo.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/uio.h>
+#include <linux/vmalloc.h>
+
+/*
+ * The SBEFIFO is a pipe-like FSI device for communicating with
+ * the self boot engine on POWER processors.
+ */
+
+#define DEVICE_NAME		"sbefifo"
+#define FSI_ENGID_SBE		0x22
+
+/*
+ * Register layout
+ */
+
+/* Register banks */
+#define SBEFIFO_UP		0x00		/* FSI -> Host */
+#define SBEFIFO_DOWN		0x40		/* Host -> FSI */
+
+/* Per-bank registers */
+#define SBEFIFO_FIFO		0x00		/* The FIFO itself */
+#define SBEFIFO_STS		0x04		/* Status register */
+#define   SBEFIFO_STS_PARITY_ERR	0x20000000
+#define   SBEFIFO_STS_RESET_REQ		0x02000000
+#define   SBEFIFO_STS_GOT_EOT		0x00800000
+#define   SBEFIFO_STS_MAX_XFER_LIMIT	0x00400000
+#define   SBEFIFO_STS_FULL		0x00200000
+#define   SBEFIFO_STS_EMPTY		0x00100000
+#define   SBEFIFO_STS_ECNT_MASK		0x000f0000
+#define   SBEFIFO_STS_ECNT_SHIFT	16
+#define   SBEFIFO_STS_VALID_MASK	0x0000ff00
+#define   SBEFIFO_STS_VALID_SHIFT	8
+#define   SBEFIFO_STS_EOT_MASK		0x000000ff
+#define   SBEFIFO_STS_EOT_SHIFT		0
+#define SBEFIFO_EOT_RAISE	0x08		/* (Up only) Set End Of Transfer */
+#define SBEFIFO_REQ_RESET	0x0C		/* (Up only) Reset Request */
+#define SBEFIFO_PERFORM_RESET	0x10		/* (Down only) Perform Reset */
+#define SBEFIFO_EOT_ACK		0x14		/* (Down only) Acknowledge EOT */
+#define SBEFIFO_DOWN_MAX	0x18		/* (Down only) Max transfer */
+
+/* CFAM GP Mailbox SelfBoot Message register */
+#define CFAM_GP_MBOX_SBM_ADDR	0x2824	/* Converted 0x2809 */
+
+#define CFAM_SBM_SBE_BOOTED		0x80000000
+#define CFAM_SBM_SBE_ASYNC_FFDC		0x40000000
+#define CFAM_SBM_SBE_STATE_MASK		0x00f00000
+#define CFAM_SBM_SBE_STATE_SHIFT	20
+
+enum sbe_state
+{
+	SBE_STATE_UNKNOWN = 0x0, // Unkown, initial state
+	SBE_STATE_IPLING  = 0x1, // IPL'ing - autonomous mode (transient)
+	SBE_STATE_ISTEP   = 0x2, // ISTEP - Running IPL by steps (transient)
+	SBE_STATE_MPIPL   = 0x3, // MPIPL
+	SBE_STATE_RUNTIME = 0x4, // SBE Runtime
+	SBE_STATE_DMT     = 0x5, // Dead Man Timer State (transient)
+	SBE_STATE_DUMP    = 0x6, // Dumping
+	SBE_STATE_FAILURE = 0x7, // Internal SBE failure
+	SBE_STATE_QUIESCE = 0x8, // Final state - needs SBE reset to get out
+};
+
+/* FIFO depth */
+#define SBEFIFO_FIFO_DEPTH		8
+
+/* Helpers */
+#define sbefifo_empty(sts)	((sts) & SBEFIFO_STS_EMPTY)
+#define sbefifo_full(sts)	((sts) & SBEFIFO_STS_FULL)
+#define sbefifo_parity_err(sts)	((sts) & SBEFIFO_STS_PARITY_ERR)
+#define sbefifo_populated(sts)	(((sts) & SBEFIFO_STS_ECNT_MASK) >> SBEFIFO_STS_ECNT_SHIFT)
+#define sbefifo_vacant(sts)	(SBEFIFO_FIFO_DEPTH - sbefifo_populated(sts))
+#define sbefifo_eot_set(sts)	(((sts) & SBEFIFO_STS_EOT_MASK) >> SBEFIFO_STS_EOT_SHIFT)
+
+/* Reset request timeout in ms */
+#define SBEFIFO_RESET_TIMEOUT		10000
+
+/* Timeouts for commands in ms */
+#define SBEFIFO_TIMEOUT_START_CMD	10000
+#define SBEFIFO_TIMEOUT_IN_CMD		1000
+#define SBEFIFO_TIMEOUT_START_RSP	10000
+#define SBEFIFO_TIMEOUT_IN_RSP		1000
+
+/* Other constants */
+#define SBEFIFO_MAX_CMD_LEN		PAGE_SIZE
+#define SBEFIFO_RESET_MAGIC		0x52534554 /* "RSET" */
+
+struct sbefifo {
+	uint32_t		magic;
+#define SBEFIFO_MAGIC		0x53424546 /* "SBEF" */
+	struct fsi_device	*fsi_dev;
+	struct miscdevice	mdev;
+	struct mutex		lock;
+	char			name[32];
+	int			idx;
+	bool			broken;
+	bool			async_ffdc;
+};
+
+struct sbefifo_user {
+	struct sbefifo		*sbefifo;
+	struct mutex		file_lock;
+	void			*pending_cmd;
+	size_t			pending_len;
+};
+
+static DEFINE_IDA(sbefifo_ida);
+static DEFINE_MUTEX(sbefifo_ffdc_mutex);
+
+
+static void sbefifo_dump_ffdc(struct device *dev, const __be32 *ffdc,
+			      size_t ffdc_sz, bool internal)
+{
+	int pack = 0;
+#define FFDC_LSIZE	60
+	static char ffdc_line[FFDC_LSIZE];
+	char *p = ffdc_line;
+
+	mutex_lock(&sbefifo_ffdc_mutex);
+	while (ffdc_sz) {
+		u32 w0, w1, w2, i;
+		if (ffdc_sz < 3) {
+			dev_err(dev, "SBE invalid FFDC package size %zd\n", ffdc_sz);
+			return;
+		}
+		w0 = be32_to_cpu(*(ffdc++));
+		w1 = be32_to_cpu(*(ffdc++));
+		w2 = be32_to_cpu(*(ffdc++));
+		ffdc_sz -= 3;
+		if ((w0 >> 16) != 0xFFDC) {
+			dev_err(dev, "SBE invalid FFDC package signature %08x %08x %08x\n",
+				w0, w1, w2);
+			break;
+		}
+		w0 &= 0xffff;
+		if (w0 > ffdc_sz) {
+			dev_err(dev, "SBE FFDC package len %d words but only %zd remaining\n",
+				w0, ffdc_sz);
+			w0 = ffdc_sz;
+			break;
+		}
+		if (internal) {
+			dev_warn(dev, "+---- SBE FFDC package %d for async err -----+\n",
+				 pack++);
+		} else {
+			dev_warn(dev, "+---- SBE FFDC package %d for cmd %02x:%02x -----+\n",
+				 pack++, (w1 >> 8) & 0xff, w1 & 0xff);
+		}
+		dev_warn(dev, "| Response code: %08x                   |\n", w2);
+		dev_warn(dev, "|-------------------------------------------|\n");
+		for (i = 0; i < w0; i++) {
+			if ((i & 3) == 0) {
+				p = ffdc_line;
+				p += sprintf(p, "| %04x:", i << 4);
+			}
+			p += sprintf(p, " %08x", be32_to_cpu(*(ffdc++)));
+			ffdc_sz--;
+			if ((i & 3) == 3 || i == (w0 - 1)) {
+				while ((i & 3) < 3) {
+					p += sprintf(p, "         ");
+					i++;
+				}
+				dev_warn(dev, "%s |\n", ffdc_line);
+			}
+		}
+		dev_warn(dev, "+-------------------------------------------+\n");
+	}
+}
+
+int sbefifo_parse_status(struct device *dev, u16 cmd, __be32 *response,
+			 size_t resp_len, size_t *data_len)
+{
+	u32 dh, s0, s1;
+	size_t ffdc_sz;
+
+	if (resp_len < 3) {
+		pr_debug("sbefifo: cmd %04x, response too small: %zd\n",
+			 cmd, resp_len);
+		return -ENXIO;
+	}
+	dh = be32_to_cpu(response[resp_len - 1]);
+	if (dh > resp_len || dh < 3) {
+		dev_err(dev, "SBE cmd %02x:%02x status offset out of range: %d/%zd\n",
+			cmd >> 8, cmd & 0xff, dh, resp_len);
+		return -ENXIO;
+	}
+	s0 = be32_to_cpu(response[resp_len - dh]);
+	s1 = be32_to_cpu(response[resp_len - dh + 1]);
+	if (((s0 >> 16) != 0xC0DE) || ((s0 & 0xffff) != cmd)) {
+		dev_err(dev, "SBE cmd %02x:%02x, status signature invalid: 0x%08x 0x%08x\n",
+			cmd >> 8, cmd & 0xff, s0, s1);
+		return -ENXIO;
+	}
+	if (s1 != 0) {
+		ffdc_sz = dh - 3;
+		dev_warn(dev, "SBE error cmd %02x:%02x status=%04x:%04x\n",
+			 cmd >> 8, cmd & 0xff, s1 >> 16, s1 & 0xffff);
+		if (ffdc_sz)
+			sbefifo_dump_ffdc(dev, &response[resp_len - dh + 2],
+					  ffdc_sz, false);
+	}
+	if (data_len)
+		*data_len = resp_len - dh;
+
+	/*
+	 * Primary status don't have the top bit set, so can't be confused with
+	 * Linux negative error codes, so return the status word whole.
+	 */
+	return s1;
+}
+EXPORT_SYMBOL_GPL(sbefifo_parse_status);
+
+static int sbefifo_regr(struct sbefifo *sbefifo, int reg, u32 *word)
+{
+	__be32 raw_word;
+	int rc;
+
+	rc = fsi_device_read(sbefifo->fsi_dev, reg, &raw_word,
+			     sizeof(raw_word));
+	if (rc)
+		return rc;
+
+	*word = be32_to_cpu(raw_word);
+
+	return 0;
+}
+
+static int sbefifo_regw(struct sbefifo *sbefifo, int reg, u32 word)
+{
+	__be32 raw_word = cpu_to_be32(word);
+
+	return fsi_device_write(sbefifo->fsi_dev, reg, &raw_word,
+				sizeof(raw_word));
+}
+
+static int sbefifo_check_sbe_state(struct sbefifo *sbefifo)
+{
+	__be32 raw_word;
+	u32 sbm;
+	int rc;
+
+	rc = fsi_slave_read(sbefifo->fsi_dev->slave, CFAM_GP_MBOX_SBM_ADDR,
+			    &raw_word, sizeof(raw_word));
+	if (rc)
+		return rc;
+	sbm = be32_to_cpu(raw_word);
+
+	/* SBE booted at all ? */
+	if (!(sbm & CFAM_SBM_SBE_BOOTED))
+		return -ESHUTDOWN;
+
+	/* Check its state */
+	switch ((sbm & CFAM_SBM_SBE_STATE_MASK) >> CFAM_SBM_SBE_STATE_SHIFT) {
+	case SBE_STATE_UNKNOWN:
+		return -ESHUTDOWN;
+	case SBE_STATE_IPLING:
+	case SBE_STATE_ISTEP:
+	case SBE_STATE_MPIPL:
+	case SBE_STATE_DMT:
+		return -EBUSY;
+	case SBE_STATE_RUNTIME:
+	case SBE_STATE_DUMP: /* Not sure about that one */
+		break;
+	case SBE_STATE_FAILURE:
+	case SBE_STATE_QUIESCE:
+		return -ESHUTDOWN;
+	}
+
+	/* Is there async FFDC available ? Remember it */
+	if (sbm & CFAM_SBM_SBE_ASYNC_FFDC)
+		sbefifo->async_ffdc = true;
+
+	return 0;
+}
+
+/* Don't flip endianness of data to/from FIFO, just pass through. */
+static int sbefifo_down_read(struct sbefifo *sbefifo, __be32 *word)
+{
+	return fsi_device_read(sbefifo->fsi_dev, SBEFIFO_DOWN, word,
+			       sizeof(*word));
+}
+
+static int sbefifo_up_write(struct sbefifo *sbefifo, __be32 word)
+{
+	return fsi_device_write(sbefifo->fsi_dev, SBEFIFO_UP, &word,
+				sizeof(word));
+}
+
+static int sbefifo_request_reset(struct sbefifo *sbefifo)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	u32 status, timeout;
+	int rc;
+
+	dev_dbg(dev, "Requesting FIFO reset\n");
+
+	/* Mark broken first, will be cleared if reset succeeds */
+	sbefifo->broken = true;
+
+	/* Send reset request */
+	rc = sbefifo_regw(sbefifo, SBEFIFO_UP | SBEFIFO_REQ_RESET, 1);
+	if (rc) {
+		dev_err(dev, "Sending reset request failed, rc=%d\n", rc);
+		return rc;
+	}
+
+	/* Wait for it to complete */
+	for (timeout = 0; timeout < SBEFIFO_RESET_TIMEOUT; timeout++) {
+		rc = sbefifo_regr(sbefifo, SBEFIFO_UP | SBEFIFO_STS, &status);
+		if (rc) {
+			dev_err(dev, "Failed to read UP fifo status during reset"
+				" , rc=%d\n", rc);
+			return rc;
+		}
+
+		if (!(status & SBEFIFO_STS_RESET_REQ)) {
+			dev_dbg(dev, "FIFO reset done\n");
+			sbefifo->broken = false;
+			return 0;
+		}
+
+		msleep(1);
+	}
+	dev_err(dev, "FIFO reset timed out\n");
+
+	return -ETIMEDOUT;
+}
+
+static int sbefifo_cleanup_hw(struct sbefifo *sbefifo)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	u32 up_status, down_status;
+	bool need_reset = false;
+	int rc;
+
+	rc = sbefifo_check_sbe_state(sbefifo);
+	if (rc) {
+		dev_dbg(dev, "SBE state=%d\n", rc);
+		return rc;
+	}
+
+	/* If broken, we don't need to look at status, go straight to reset */
+	if (sbefifo->broken)
+		goto do_reset;
+
+	rc = sbefifo_regr(sbefifo, SBEFIFO_UP | SBEFIFO_STS, &up_status);
+	if (rc) {
+		dev_err(dev, "Cleanup: Reading UP status failed, rc=%d\n", rc);
+
+		/* Will try reset again on next attempt at using it */
+		sbefifo->broken = true;
+		return rc;
+	}
+
+	rc = sbefifo_regr(sbefifo, SBEFIFO_DOWN | SBEFIFO_STS, &down_status);
+	if (rc) {
+		dev_err(dev, "Cleanup: Reading DOWN status failed, rc=%d\n", rc);
+
+		/* Will try reset again on next attempt at using it */
+		sbefifo->broken = true;
+		return rc;
+	}
+
+	/* The FIFO already contains a reset request from the SBE ? */
+	if (down_status & SBEFIFO_STS_RESET_REQ) {
+		dev_info(dev, "Cleanup: FIFO reset request set, resetting\n");
+		rc = sbefifo_regw(sbefifo, SBEFIFO_UP, SBEFIFO_PERFORM_RESET);
+		if (rc) {
+			sbefifo->broken = true;
+			dev_err(dev, "Cleanup: Reset reg write failed, rc=%d\n", rc);
+			return rc;
+		}
+		sbefifo->broken = false;
+		return 0;
+	}
+
+	/* Parity error on either FIFO ? */
+	if ((up_status | down_status) & SBEFIFO_STS_PARITY_ERR)
+		need_reset = true;
+
+	/* Either FIFO not empty ? */
+	if (!((up_status & down_status) & SBEFIFO_STS_EMPTY))
+		need_reset = true;
+
+	if (!need_reset)
+		return 0;
+
+	dev_info(dev, "Cleanup: FIFO not clean (up=0x%08x down=0x%08x)\n",
+		 up_status, down_status);
+
+ do_reset:
+
+	/* Mark broken, will be cleared if/when reset succeeds */
+	return sbefifo_request_reset(sbefifo);
+}
+
+static int sbefifo_wait(struct sbefifo *sbefifo, bool up,
+			u32 *status, unsigned long timeout)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	unsigned long end_time;
+	bool ready = false;
+	u32 addr, sts = 0;
+	int rc;
+
+	dev_vdbg(dev, "Wait on %s fifo...\n", up ? "up" : "down");
+
+	addr = (up ? SBEFIFO_UP : SBEFIFO_DOWN) | SBEFIFO_STS;
+
+	end_time = jiffies + timeout;
+	while (!time_after(jiffies, end_time)) {
+		cond_resched();
+		rc = sbefifo_regr(sbefifo, addr, &sts);
+		if (rc < 0) {
+			dev_err(dev, "FSI error %d reading status register\n", rc);
+			return rc;
+		}
+		if (!up && sbefifo_parity_err(sts)) {
+			dev_err(dev, "Parity error in DOWN FIFO\n");
+			return -ENXIO;
+		}
+		ready = !(up ? sbefifo_full(sts) : sbefifo_empty(sts));
+		if (ready)
+			break;
+	}
+	if (!ready) {
+		dev_err(dev, "%s FIFO Timeout ! status=%08x\n", up ? "UP" : "DOWN", sts);
+		return -ETIMEDOUT;
+	}
+	dev_vdbg(dev, "End of wait status: %08x\n", sts);
+
+	*status = sts;
+
+	return 0;
+}
+
+static int sbefifo_send_command(struct sbefifo *sbefifo,
+				const __be32 *command, size_t cmd_len)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	size_t len, chunk, vacant = 0, remaining = cmd_len;
+	unsigned long timeout;
+	u32 status;
+	int rc;
+
+	dev_vdbg(dev, "sending command (%zd words, cmd=%04x)\n",
+		 cmd_len, be32_to_cpu(command[1]));
+
+	/* As long as there's something to send */
+	timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_START_CMD);
+	while (remaining) {
+		/* Wait for room in the FIFO */
+		rc = sbefifo_wait(sbefifo, true, &status, timeout);
+		if (rc < 0)
+			return rc;
+		timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_IN_CMD);
+
+		vacant = sbefifo_vacant(status);
+		len = chunk = min(vacant, remaining);
+
+		dev_vdbg(dev, "  status=%08x vacant=%zd chunk=%zd\n",
+			 status, vacant, chunk);
+
+		/* Write as much as we can */
+		while (len--) {
+			rc = sbefifo_up_write(sbefifo, *(command++));
+			if (rc) {
+				dev_err(dev, "FSI error %d writing UP FIFO\n", rc);
+				return rc;
+			}
+		}
+		remaining -= chunk;
+		vacant -= chunk;
+	}
+
+	/* If there's no room left, wait for some to write EOT */
+	if (!vacant) {
+		rc = sbefifo_wait(sbefifo, true, &status, timeout);
+		if (rc)
+			return rc;
+	}
+
+	/* Send an EOT */
+	rc = sbefifo_regw(sbefifo, SBEFIFO_UP | SBEFIFO_EOT_RAISE, 0);
+	if (rc)
+		dev_err(dev, "FSI error %d writing EOT\n", rc);
+	return rc;
+}
+
+static int sbefifo_read_response(struct sbefifo *sbefifo, struct iov_iter *response)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	u32 data, status, eot_set;
+	unsigned long timeout;
+	bool overflow = false;
+	size_t len;
+	int rc;
+
+	dev_vdbg(dev, "reading response, buflen = %zd\n", iov_iter_count(response));
+
+	timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_START_RSP);
+	for (;;) {
+		/* Grab FIFO status (this will handle parity errors) */
+		rc = sbefifo_wait(sbefifo, false, &status, timeout);
+		if (rc < 0)
+			return rc;
+		timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_IN_RSP);
+
+		/* Decode status */
+		len = sbefifo_populated(status);
+		eot_set = sbefifo_eot_set(status);
+
+		dev_vdbg(dev, "  chunk size %zd eot_set=0x%x\n", len, eot_set);
+
+		/* Go through the chunk */
+		while(len--) {
+			/* Read the data */
+			rc = sbefifo_down_read(sbefifo, &data);
+			if (rc < 0)
+				return rc;
+
+			/* Was it an EOT ? */
+			if (eot_set & 0x80) {
+				/*
+				 * There should be nothing else in the FIFO,
+				 * if there is, mark broken, this will force
+				 * a reset on next use, but don't fail the
+				 * command.
+				 */
+				if (len) {
+					dev_warn(dev, "FIFO read hit"
+						 " EOT with still %zd data\n",
+						 len);
+					sbefifo->broken = true;
+				}
+
+				/* We are done */
+				rc = sbefifo_regw(sbefifo,
+						  SBEFIFO_DOWN | SBEFIFO_EOT_ACK, 0);
+
+				/*
+				 * If that write fail, still complete the request but mark
+				 * the fifo as broken for subsequent reset (not much else
+				 * we can do here).
+				 */
+				if (rc) {
+					dev_err(dev, "FSI error %d ack'ing EOT\n", rc);
+					sbefifo->broken = true;
+				}
+
+				/* Tell whether we overflowed */
+				return overflow ? -EOVERFLOW : 0;
+			}
+
+			/* Store it if there is room */
+			if (iov_iter_count(response) >= sizeof(__be32)) {
+				if (copy_to_iter(&data, sizeof(__be32), response) < sizeof(__be32))
+					return -EFAULT;
+			} else {
+				dev_vdbg(dev, "Response overflowed !\n");
+
+				overflow = true;
+			}
+
+			/* Next EOT bit */
+			eot_set <<= 1;
+		}
+	}
+	/* Shouldn't happen */
+	return -EIO;
+}
+
+static int sbefifo_do_command(struct sbefifo *sbefifo,
+			      const __be32 *command, size_t cmd_len,
+			      struct iov_iter *response)
+{
+	/* Try sending the command */
+	int rc = sbefifo_send_command(sbefifo, command, cmd_len);
+	if (rc)
+		return rc;
+
+	/* Now, get the response */
+	return sbefifo_read_response(sbefifo, response);
+}
+
+static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+        struct iov_iter ffdc_iter;
+        struct kvec ffdc_iov;
+	__be32 *ffdc;
+	size_t ffdc_sz;
+	u32 cmd[2];
+	int rc;
+
+	sbefifo->async_ffdc = false;
+	ffdc = vmalloc(SBEFIFO_MAX_FFDC_SIZE);
+	if (!ffdc) {
+		dev_err(dev, "Failed to allocate SBE FFDC buffer\n");
+		return;
+	}
+        ffdc_iov.iov_base = ffdc;
+	ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;;
+        iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+	cmd[0] = cpu_to_be32(2);
+	cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
+	rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
+	if (rc != 0) {
+		dev_err(dev, "Error %d retrieving SBE FFDC\n", rc);
+		goto bail;
+	}
+	ffdc_sz = SBEFIFO_MAX_FFDC_SIZE - iov_iter_count(&ffdc_iter);
+	ffdc_sz /= sizeof(__be32);
+	rc = sbefifo_parse_status(dev, SBEFIFO_CMD_GET_SBE_FFDC, ffdc,
+				  ffdc_sz, &ffdc_sz);
+	if (rc != 0) {
+		dev_err(dev, "Error %d decoding SBE FFDC\n", rc);
+		goto bail;
+	}
+	if (ffdc_sz > 0)
+		sbefifo_dump_ffdc(dev, ffdc, ffdc_sz, true);
+ bail:
+	vfree(ffdc);
+
+}
+
+static int __sbefifo_submit(struct sbefifo *sbefifo,
+			    const __be32 *command, size_t cmd_len,
+			    struct iov_iter *response)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	int rc;
+
+	if (cmd_len < 2 || be32_to_cpu(command[0]) != cmd_len) {
+		dev_vdbg(dev, "Invalid command len %zd (header: %d)\n",
+			 cmd_len, be32_to_cpu(command[0]));
+		return -EINVAL;
+	}
+
+	/* First ensure the HW is in a clean state */
+	rc = sbefifo_cleanup_hw(sbefifo);
+	if (rc)
+		return rc;
+
+	/* Look for async FFDC first if any */
+	if (sbefifo->async_ffdc)
+		sbefifo_collect_async_ffdc(sbefifo);
+
+	rc = sbefifo_do_command(sbefifo, command, cmd_len, response);
+	if (rc != 0 && rc != -EOVERFLOW)
+		goto fail;
+	return rc;
+ fail:
+	/*
+	 * On failure, attempt a reset. Ignore the result, it will mark
+	 * the fifo broken if the reset fails
+	 */
+        sbefifo_request_reset(sbefifo);
+
+	/* Return original error */
+	return rc;
+}
+
+/**
+ * sbefifo_submit() - Submit and SBE fifo command and receive response
+ * @dev: The sbefifo device
+ * @command: The raw command data
+ * @cmd_len: The command size (in 32-bit words)
+ * @response: The output response buffer
+ * @resp_len: In: Response buffer size, Out: Response size
+ *
+ * This will perform the entire operation. If the reponse buffer
+ * overflows, returns -EOVERFLOW
+ */
+int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
+		   __be32 *response, size_t *resp_len)
+{
+	struct sbefifo *sbefifo = dev_get_drvdata(dev);
+        struct iov_iter resp_iter;
+        struct kvec resp_iov;
+	size_t rbytes;
+	int rc;
+
+	if (!dev || !sbefifo)
+		return -ENODEV;
+	if (WARN_ON_ONCE(sbefifo->magic != SBEFIFO_MAGIC))
+		return -ENODEV;
+	if (!resp_len || !command || !response || cmd_len > SBEFIFO_MAX_CMD_LEN)
+		return -EINVAL;
+
+	/* Prepare iov iterator */
+	rbytes = (*resp_len) * sizeof(__be32);
+	resp_iov.iov_base = response;
+	resp_iov.iov_len = rbytes;
+        iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes);
+
+	/* Perform the command */
+	mutex_lock(&sbefifo->lock);
+	rc = __sbefifo_submit(sbefifo, command, cmd_len, &resp_iter);
+	mutex_unlock(&sbefifo->lock);
+
+	/* Extract the response length */
+	rbytes -= iov_iter_count(&resp_iter);
+	*resp_len = rbytes / sizeof(__be32);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(sbefifo_submit);
+
+/*
+ * Char device interface
+ */
+static int sbefifo_user_open(struct inode *inode, struct file *file)
+{
+	struct sbefifo *sbefifo = container_of(file->private_data,
+					       struct sbefifo, mdev);
+	struct sbefifo_user *user;
+
+	user = kzalloc(sizeof(struct sbefifo_user), GFP_KERNEL);
+	if (!user)
+		return -ENOMEM;
+
+	file->private_data = user;
+	user->sbefifo = sbefifo;
+	user->pending_cmd = (void *)__get_free_page(GFP_KERNEL);
+	if (!user->pending_cmd) {
+		kfree(user);
+		return -ENOMEM;
+	}
+	mutex_init(&user->file_lock);
+
+	return 0;
+}
+
+static ssize_t sbefifo_user_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *offset)
+{
+	struct sbefifo_user *user = file->private_data;
+	struct sbefifo *sbefifo;
+	struct iov_iter resp_iter;
+        struct iovec resp_iov;
+	size_t cmd_len;
+	int rc;
+
+	if (!user)
+		return -EINVAL;
+	sbefifo = user->sbefifo;
+	if (len & 3)
+		return -EINVAL;
+
+	mutex_lock(&user->file_lock);
+
+	/* Cronus relies on -EAGAIN after a short read */
+	if (user->pending_len == 0) {
+		rc = -EAGAIN;
+		goto bail;
+	}
+	if (user->pending_len < 8) {
+		rc = -EINVAL;
+		goto bail;
+	}
+	cmd_len = user->pending_len >> 2;
+
+	/* Prepare iov iterator */
+	resp_iov.iov_base = buf;
+	resp_iov.iov_len = len;
+	iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len);
+
+	/* Perform the command */
+	mutex_lock(&sbefifo->lock);
+	rc = __sbefifo_submit(sbefifo, user->pending_cmd, cmd_len, &resp_iter);
+	mutex_unlock(&sbefifo->lock);
+	if (rc < 0)
+		goto bail;
+
+	/* Extract the response length */
+	rc = len - iov_iter_count(&resp_iter);
+ bail:
+	user->pending_len = 0;
+	mutex_unlock(&user->file_lock);
+	return rc;
+}
+
+static ssize_t sbefifo_user_write(struct file *file, const char __user *buf,
+				  size_t len, loff_t *offset)
+{
+	struct sbefifo_user *user = file->private_data;
+	struct sbefifo *sbefifo;
+	int rc = len;
+
+	if (!user)
+		return -EINVAL;
+	sbefifo = user->sbefifo;
+	if (len > SBEFIFO_MAX_CMD_LEN)
+		return -EINVAL;
+	if (len & 3)
+		return -EINVAL;
+
+	mutex_lock(&user->file_lock);
+
+	/* Copy the command into the staging buffer */
+	if (copy_from_user(user->pending_cmd, buf, len)) {
+		rc = -EFAULT;
+		goto bail;
+	}
+
+	/* Check for the magic reset command */
+	if (len == 4 && be32_to_cpu(*(__be32 *)user->pending_cmd) ==
+	    SBEFIFO_RESET_MAGIC)  {
+
+		/* Clear out any pending command */
+		user->pending_len = 0;
+
+		/* Trigger reset request */
+		mutex_lock(&sbefifo->lock);
+		rc = sbefifo_request_reset(user->sbefifo);
+		mutex_unlock(&sbefifo->lock);
+		if (rc == 0)
+			rc = 4;
+		goto bail;
+	}
+
+	/* Update the staging buffer size */
+	user->pending_len = len;
+ bail:
+	mutex_unlock(&user->file_lock);
+
+	/* And that's it, we'll issue the command on a read */
+	return rc;
+}
+
+static int sbefifo_user_release(struct inode *inode, struct file *file)
+{
+	struct sbefifo_user *user = file->private_data;
+
+	if (!user)
+		return -EINVAL;
+
+	free_page((unsigned long)user->pending_cmd);
+	kfree(user);
+
+	return 0;
+}
+
+static const struct file_operations sbefifo_fops = {
+	.owner		= THIS_MODULE,
+	.open		= sbefifo_user_open,
+	.read		= sbefifo_user_read,
+	.write		= sbefifo_user_write,
+	.release	= sbefifo_user_release,
+};
+
+/*
+ * Probe/remove
+ */
+
+static int sbefifo_probe(struct device *dev)
+{
+	struct fsi_device *fsi_dev = to_fsi_dev(dev);
+	struct sbefifo *sbefifo;
+	struct device_node *np;
+	struct platform_device *child;
+	char child_name[32];
+	int rc, child_idx = 0;
+
+	dev_dbg(dev, "Found sbefifo device\n");
+
+	sbefifo = devm_kzalloc(dev, sizeof(*sbefifo), GFP_KERNEL);
+	if (!sbefifo)
+		return -ENOMEM;
+	sbefifo->magic = SBEFIFO_MAGIC;
+	sbefifo->fsi_dev = fsi_dev;
+	mutex_init(&sbefifo->lock);
+
+	/*
+	 * Try cleaning up the FIFO. If this fails, we still register the
+	 * driver and will try cleaning things up again on the next access.
+	 */
+	rc = sbefifo_cleanup_hw(sbefifo);
+	if (rc && rc != -ESHUTDOWN)
+		dev_err(dev, "Initial HW cleanup failed, will retry later\n");
+
+	sbefifo->idx = ida_simple_get(&sbefifo_ida, 1, INT_MAX, GFP_KERNEL);
+	snprintf(sbefifo->name, sizeof(sbefifo->name), "sbefifo%d",
+		 sbefifo->idx);
+
+	dev_set_drvdata(dev, sbefifo);
+
+	/* Create misc chardev for userspace access */
+	sbefifo->mdev.minor = MISC_DYNAMIC_MINOR;
+	sbefifo->mdev.fops = &sbefifo_fops;
+	sbefifo->mdev.name = sbefifo->name;
+	sbefifo->mdev.parent = dev;
+	rc = misc_register(&sbefifo->mdev);
+	if (rc) {
+		dev_err(dev, "Failed to register miscdevice: %d\n", rc);
+		ida_simple_remove(&sbefifo_ida, sbefifo->idx);
+		return rc;
+	}
+
+	/* Create platform devs for dts child nodes (occ, etc) */
+	for_each_available_child_of_node(dev->of_node, np) {
+		snprintf(child_name, sizeof(child_name), "%s-dev%d",
+			 sbefifo->name, child_idx++);
+		child = of_platform_device_create(np, child_name, dev);
+		if (!child)
+			dev_warn(dev, "failed to create child %s dev\n",
+				 child_name);
+	}
+
+	return 0;
+}
+
+static int sbefifo_unregister_child(struct device *dev, void *data)
+{
+	struct platform_device *child = to_platform_device(dev);
+
+	of_device_unregister(child);
+	if (dev->of_node)
+		of_node_clear_flag(dev->of_node, OF_POPULATED);
+
+	return 0;
+}
+
+static int sbefifo_remove(struct device *dev)
+{
+	struct sbefifo *sbefifo = dev_get_drvdata(dev);
+
+	dev_dbg(dev, "Removing sbefifo device...\n");
+
+	misc_deregister(&sbefifo->mdev);
+	device_for_each_child(dev, NULL, sbefifo_unregister_child);
+
+	ida_simple_remove(&sbefifo_ida, sbefifo->idx);
+
+	return 0;
+}
+
+static struct fsi_device_id sbefifo_ids[] = {
+	{
+		.engine_type = FSI_ENGID_SBE,
+		.version = FSI_VERSION_ANY,
+	},
+	{ 0 }
+};
+
+static struct fsi_driver sbefifo_drv = {
+	.id_table = sbefifo_ids,
+	.drv = {
+		.name = DEVICE_NAME,
+		.bus = &fsi_bus_type,
+		.probe = sbefifo_probe,
+		.remove = sbefifo_remove,
+	}
+};
+
+static int sbefifo_init(void)
+{
+	return fsi_driver_register(&sbefifo_drv);
+}
+
+static void sbefifo_exit(void)
+{
+	fsi_driver_unregister(&sbefifo_drv);
+
+	ida_destroy(&sbefifo_ida);
+}
+
+module_init(sbefifo_init);
+module_exit(sbefifo_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Brad Bishop <bradleyb@fuzziesquirrel.com>");
+MODULE_AUTHOR("Eddie James <eajames@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
+MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
+MODULE_DESCRIPTION("Linux device interface to the POWER Self Boot Engine");
diff --git a/include/linux/fsi-sbefifo.h b/include/linux/fsi-sbefifo.h
new file mode 100644
index 000000000000..13f9ebeaa25e
--- /dev/null
+++ b/include/linux/fsi-sbefifo.h
@@ -0,0 +1,33 @@
+/*
+ * SBEFIFO FSI Client device driver
+ *
+ * Copyright (C) IBM Corporation 2017
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERGCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef LINUX_FSI_SBEFIFO_H
+#define LINUX_FSI_SBEFIFO_H
+
+#define SBEFIFO_CMD_PUT_OCC_SRAM	0xa404
+#define SBEFIFO_CMD_GET_OCC_SRAM	0xa403
+#define SBEFIFO_CMD_GET_SBE_FFDC	0xa801
+
+#define SBEFIFO_MAX_FFDC_SIZE		0x2000
+
+struct device;
+
+int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
+		   __be32 *response, size_t *resp_len);
+
+int sbefifo_parse_status(struct device *dev, u16 cmd, __be32 *response,
+			 size_t resp_len, size_t *data_len);
+
+#endif /* LINUX_FSI_SBEFIFO_H */
-- 
cgit v1.2.3


From 2d4f545cb14fa2d9865124183f3100d46dc9b3b5 Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Mon, 4 Jun 2018 23:34:06 +0200
Subject: rfkill: Fixes and cleanup of kernel-doc in the header file

Fixes kerneldoc parameter names to match implementation,
rfkill_set_hw_state(), rfkill_set_sw_state().
Fix description of rfkill_resume_polling().
Fix typos in documentation of rfkill_find_type().
Consistently start kerneldoc description with uppercase
letter.

Signed-off-by: Peter Meerwald-Stadler <pmeerw@pmeerw.net>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/rfkill.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index e6a0031d1b1f..8ad2487a86d5 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -66,7 +66,7 @@ struct rfkill_ops {
 
 #if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
 /**
- * rfkill_alloc - allocate rfkill structure
+ * rfkill_alloc - Allocate rfkill structure
  * @name: name of the struct -- the string is not copied internally
  * @parent: device that has rf switch on it
  * @type: type of the switch (RFKILL_TYPE_*)
@@ -112,7 +112,7 @@ void rfkill_pause_polling(struct rfkill *rfkill);
 /**
  * rfkill_resume_polling(struct rfkill *rfkill)
  *
- * Pause polling -- say transmitter is off for other reasons.
+ * Resume polling
  * NOTE: not necessary for suspend/resume -- in that case the
  * core stops polling anyway
  */
@@ -130,7 +130,7 @@ void rfkill_resume_polling(struct rfkill *rfkill);
 void rfkill_unregister(struct rfkill *rfkill);
 
 /**
- * rfkill_destroy - free rfkill structure
+ * rfkill_destroy - Free rfkill structure
  * @rfkill: rfkill structure to be destroyed
  *
  * Destroys the rfkill structure.
@@ -140,7 +140,7 @@ void rfkill_destroy(struct rfkill *rfkill);
 /**
  * rfkill_set_hw_state - Set the internal rfkill hardware block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current hardware block state to set
+ * @blocked: the current hardware block state to set
  *
  * rfkill drivers that get events when the hard-blocked state changes
  * use this function to notify the rfkill core (and through that also
@@ -161,7 +161,7 @@ bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
 /**
  * rfkill_set_sw_state - Set the internal rfkill software block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current software block state to set
+ * @blocked: the current software block state to set
  *
  * rfkill drivers that get events when the soft-blocked state changes
  * (yes, some platforms directly act on input but allow changing again)
@@ -183,7 +183,7 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked);
 /**
  * rfkill_init_sw_state - Initialize persistent software block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current software block state to set
+ * @blocked: the current software block state to set
  *
  * rfkill drivers that preserve their software block state over power off
  * use this function to notify the rfkill core (and through that also
@@ -208,17 +208,17 @@ void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked);
 void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
 
 /**
- * rfkill_blocked - query rfkill block
+ * rfkill_blocked - Query rfkill block state
  *
  * @rfkill: rfkill struct to query
  */
 bool rfkill_blocked(struct rfkill *rfkill);
 
 /**
- * rfkill_find_type - Helpper for finding rfkill type by name
+ * rfkill_find_type - Helper for finding rfkill type by name
  * @name: the name of the type
  *
- * Returns enum rfkill_type that conrresponds the name.
+ * Returns enum rfkill_type that corresponds to the name.
  */
 enum rfkill_type rfkill_find_type(const char *name);
 
@@ -296,7 +296,7 @@ static inline enum rfkill_type rfkill_find_type(const char *name)
 const char *rfkill_get_led_trigger_name(struct rfkill *rfkill);
 
 /**
- * rfkill_set_led_trigger_name -- set the LED trigger name
+ * rfkill_set_led_trigger_name - Set the LED trigger name
  * @rfkill: rfkill struct
  * @name: LED trigger name
  *
-- 
cgit v1.2.3


From c4cbaf7973a794839af080f13748335976cf3f3f Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 9 Jun 2018 09:14:42 +0300
Subject: cfg80211: Add support for HE

Add support for the HE in cfg80211 and also add userspace API to
nl80211 to send rate information out, conforming with P802.11ax_D2.0.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/ieee80211.h    | 427 +++++++++++++++++++++++++++++++++++++++++++
 include/net/cfg80211.h       | 106 ++++++++++-
 include/uapi/linux/nl80211.h |  87 ++++++++-
 net/wireless/core.c          |  21 ++-
 net/wireless/nl80211.c       |  99 +++++++++-
 net/wireless/util.c          |  82 +++++++++
 6 files changed, 817 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8fe7e4306816..e6a6503bfa33 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1539,6 +1539,106 @@ struct ieee80211_vht_operation {
 	__le16 basic_mcs_set;
 } __packed;
 
+/**
+ * struct ieee80211_he_cap_elem - HE capabilities element
+ *
+ * This structure is the "HE capabilities element" fixed fields as
+ * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3
+ */
+struct ieee80211_he_cap_elem {
+	u8 mac_cap_info[5];
+	u8 phy_cap_info[9];
+} __packed;
+
+#define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN	5
+
+/**
+ * enum ieee80211_he_mcs_support - HE MCS support definitions
+ * @IEEE80211_HE_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the
+ *	number of streams
+ * @IEEE80211_HE_MCS_SUPPORT_0_9: MCSes 0-9 are supported
+ * @IEEE80211_HE_MCS_SUPPORT_0_11: MCSes 0-11 are supported
+ * @IEEE80211_HE_MCS_NOT_SUPPORTED: This number of streams isn't supported
+ *
+ * These definitions are used in each 2-bit subfield of the rx_mcs_*
+ * and tx_mcs_* fields of &struct ieee80211_he_mcs_nss_supp, which are
+ * both split into 8 subfields by number of streams. These values indicate
+ * which MCSes are supported for the number of streams the value appears
+ * for.
+ */
+enum ieee80211_he_mcs_support {
+	IEEE80211_HE_MCS_SUPPORT_0_7	= 0,
+	IEEE80211_HE_MCS_SUPPORT_0_9	= 1,
+	IEEE80211_HE_MCS_SUPPORT_0_11	= 2,
+	IEEE80211_HE_MCS_NOT_SUPPORTED	= 3,
+};
+
+/**
+ * struct ieee80211_he_mcs_nss_supp - HE Tx/Rx HE MCS NSS Support Field
+ *
+ * This structure holds the data required for the Tx/Rx HE MCS NSS Support Field
+ * described in P802.11ax_D2.0 section 9.4.2.237.4
+ *
+ * @rx_mcs_80: Rx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     widths less than 80MHz.
+ * @tx_mcs_80: Tx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     widths less than 80MHz.
+ * @rx_mcs_160: Rx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     width 160MHz.
+ * @tx_mcs_160: Tx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     width 160MHz.
+ * @rx_mcs_80p80: Rx MCS map 2 bits for each stream, total 8 streams, for
+ *     channel width 80p80MHz.
+ * @tx_mcs_80p80: Tx MCS map 2 bits for each stream, total 8 streams, for
+ *     channel width 80p80MHz.
+ */
+struct ieee80211_he_mcs_nss_supp {
+	__le16 rx_mcs_80;
+	__le16 tx_mcs_80;
+	__le16 rx_mcs_160;
+	__le16 tx_mcs_160;
+	__le16 rx_mcs_80p80;
+	__le16 tx_mcs_80p80;
+} __packed;
+
+/**
+ * struct ieee80211_he_operation - HE capabilities element
+ *
+ * This structure is the "HE operation element" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.238
+ */
+struct ieee80211_he_operation {
+	__le32 he_oper_params;
+	__le16 he_mcs_nss_set;
+	/* Optional 0,1,3 or 4 bytes: depends on @he_oper_params */
+	u8 optional[0];
+} __packed;
+
+/**
+ * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field
+ *
+ * This structure is the "MU AC Parameter Record" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.240
+ */
+struct ieee80211_he_mu_edca_param_ac_rec {
+	u8 aifsn;
+	u8 ecw_min_max;
+	u8 mu_edca_timer;
+} __packed;
+
+/**
+ * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element
+ *
+ * This structure is the "MU EDCA Parameter Set element" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.240
+ */
+struct ieee80211_mu_edca_param_set {
+	u8 mu_qos_info;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_be;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_bk;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_vi;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_vo;
+} __packed;
 
 /* 802.11ac VHT Capabilities */
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895			0x00000000
@@ -1577,6 +1677,328 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN			0x10000000
 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN			0x20000000
 
+/* 802.11ax HE MAC capabilities */
+#define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
+#define IEEE80211_HE_MAC_CAP0_TWT_REQ				0x02
+#define IEEE80211_HE_MAC_CAP0_TWT_RES				0x04
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_NOT_SUPP		0x00
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_1		0x08
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_2		0x10
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_3		0x18
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK			0x18
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_1		0x00
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_2		0x20
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_4		0x40
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_8		0x60
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_16		0x80
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_32		0xa0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_64		0xc0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_UNLIMITED	0xe0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_MASK		0xe0
+
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_UNLIMITED		0x00
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_128			0x01
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_256			0x02
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_512			0x03
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_MASK		0x03
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_0US		0x00
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US		0x04
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US		0x08
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK		0x0c
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1		0x00
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2		0x10
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3		0x20
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4		0x30
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5		0x40
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6		0x50
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7		0x60
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8		0x70
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK		0x70
+
+/* Link adaptation is split between byte HE_MAC_CAP1 and
+ * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE
+ * in which case the following values apply:
+ * 0 = No feedback.
+ * 1 = reserved.
+ * 2 = Unsolicited feedback.
+ * 3 = both
+ */
+#define IEEE80211_HE_MAC_CAP1_LINK_ADAPTATION			0x80
+
+#define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION			0x01
+#define IEEE80211_HE_MAC_CAP2_ALL_ACK				0x02
+#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED			0x04
+#define IEEE80211_HE_MAC_CAP2_BSR				0x08
+#define IEEE80211_HE_MAC_CAP2_BCAST_TWT				0x10
+#define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP			0x20
+#define IEEE80211_HE_MAC_CAP2_MU_CASCADING			0x40
+#define IEEE80211_HE_MAC_CAP2_ACK_EN				0x80
+
+#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU	0x01
+#define IEEE80211_HE_MAC_CAP3_OMI_CONTROL			0x02
+#define IEEE80211_HE_MAC_CAP3_OFDMA_RA				0x04
+
+/* The maximum length of an A-MDPU is defined by the combination of the Maximum
+ * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the
+ * same field in the HE capabilities.
+ */
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT	0x00
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1		0x08
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2		0x10
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED	0x18
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK		0x18
+#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG			0x20
+#define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED			0x40
+#define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS		0x80
+
+#define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG		0x01
+#define IEEE80211_HE_MAC_CAP4_QTP				0x02
+#define IEEE80211_HE_MAC_CAP4_BQR				0x04
+#define IEEE80211_HE_MAC_CAP4_SR_RESP				0x08
+#define IEEE80211_HE_MAC_CAP4_NDP_FB_REP			0x10
+#define IEEE80211_HE_MAC_CAP4_OPS				0x20
+#define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU			0x40
+
+/* 802.11ax HE PHY capabilities */
+#define IEEE80211_HE_PHY_CAP0_DUAL_BAND					0x01
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G		0x02
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G	0x04
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G		0x08
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G	0x10
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G	0x20
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G	0x40
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK			0xfe
+
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ	0x01
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ	0x02
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ	0x04
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ	0x08
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK			0x0f
+#define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A				0x10
+#define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD			0x20
+#define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US		0x40
+/* Midamble RX Max NSTS is split between byte #2 and byte #3 */
+#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS			0x80
+
+#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS			0x01
+#define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US			0x02
+#define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ			0x04
+#define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ			0x08
+#define IEEE80211_HE_PHY_CAP2_DOPPLER_TX				0x10
+#define IEEE80211_HE_PHY_CAP2_DOPPLER_RX				0x20
+
+/* Note that the meaning of UL MU below is different between an AP and a non-AP
+ * sta, where in the AP case it indicates support for Rx and in the non-AP sta
+ * case it indicates support for Tx.
+ */
+#define IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO			0x40
+#define IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO			0x80
+
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM			0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK			0x01
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK			0x02
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM			0x03
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK			0x03
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1				0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2				0x04
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM			0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK			0x08
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK			0x10
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM			0x18
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK			0x18
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1				0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2				0x20
+#define IEEE80211_HE_PHY_CAP3_RX_HE_MU_PPDU_FROM_NON_AP_STA		0x40
+#define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER				0x80
+
+#define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE				0x01
+#define IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER				0x02
+
+/* Minimal allowed value of Max STS under 80MHz is 3 */
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4		0x0c
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5		0x10
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_6		0x14
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_7		0x18
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8		0x1c
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK	0x1c
+
+/* Minimal allowed value of Max STS above 80MHz is 3 */
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4		0x60
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5		0x80
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_6		0xa0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_7		0xc0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8		0xe0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_MASK	0xe0
+
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_1	0x00
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2	0x01
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_3	0x02
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_4	0x03
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_5	0x04
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_6	0x05
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_7	0x06
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_8	0x07
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK	0x07
+
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_1	0x00
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2	0x08
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_3	0x10
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_4	0x18
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_5	0x20
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_6	0x28
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_7	0x30
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_8	0x38
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK	0x38
+
+#define IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK				0x40
+#define IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK				0x80
+
+#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU			0x01
+#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU			0x02
+#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB			0x04
+#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB			0x08
+#define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB				0x10
+#define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE			0x20
+#define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO		0x40
+#define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT			0x80
+
+#define IEEE80211_HE_PHY_CAP7_SRP_BASED_SR				0x01
+#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR			0x02
+#define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI		0x04
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_1					0x08
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_2					0x10
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_3					0x18
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_4					0x20
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_5					0x28
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_6					0x30
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_7					0x38
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_MASK				0x38
+#define IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ			0x40
+#define IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ			0x80
+
+#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI		0x01
+#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G		0x02
+#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU			0x04
+#define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU			0x08
+#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI		0x10
+#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF			0x20
+
+/* 802.11ax HE TX/RX MCS NSS Support  */
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS			(3)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_POS			(6)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_POS			(11)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_MASK			0x07c0
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_MASK			0xf800
+
+/* TX/RX HE MCS Support field Highest MCS subfield encoding */
+enum ieee80211_he_highest_mcs_supported_subfield_enc {
+	HIGHEST_MCS_SUPPORTED_MCS7 = 0,
+	HIGHEST_MCS_SUPPORTED_MCS8,
+	HIGHEST_MCS_SUPPORTED_MCS9,
+	HIGHEST_MCS_SUPPORTED_MCS10,
+	HIGHEST_MCS_SUPPORTED_MCS11,
+};
+
+/* Calculate 802.11ax HE capabilities IE Tx/Rx HE MCS NSS Support Field size */
+static inline u8
+ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap)
+{
+	u8 count = 4;
+
+	if (he_cap->phy_cap_info[0] &
+	    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)
+		count += 4;
+
+	if (he_cap->phy_cap_info[0] &
+	    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)
+		count += 4;
+
+	return count;
+}
+
+/* 802.11ax HE PPE Thresholds */
+#define IEEE80211_PPE_THRES_NSS_SUPPORT_2NSS			(1)
+#define IEEE80211_PPE_THRES_NSS_POS				(0)
+#define IEEE80211_PPE_THRES_NSS_MASK				(7)
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_2x966_AND_966_RU	\
+	(BIT(5) | BIT(6))
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK		0x78
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS		(3)
+#define IEEE80211_PPE_THRES_INFO_PPET_SIZE			(3)
+
+/*
+ * Calculate 802.11ax HE capabilities IE PPE field size
+ * Input: Header byte of ppe_thres (first byte), and HE capa IE's PHY cap u8*
+ */
+static inline u8
+ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
+{
+	u8 n;
+
+	if ((phy_cap_info[6] &
+	     IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0)
+		return 0;
+
+	n = hweight8(ppe_thres_hdr &
+		     IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK);
+	n *= (1 + ((ppe_thres_hdr & IEEE80211_PPE_THRES_NSS_MASK) >>
+		   IEEE80211_PPE_THRES_NSS_POS));
+
+	/*
+	 * Each pair is 6 bits, and we need to add the 7 "header" bits to the
+	 * total size.
+	 */
+	n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7;
+	n = DIV_ROUND_UP(n, 8);
+
+	return n;
+}
+
+/* HE Operation defines */
+#define IEEE80211_HE_OPERATION_BSS_COLOR_MASK			0x0000003f
+#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK		0x000001c0
+#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET		6
+#define IEEE80211_HE_OPERATION_TWT_REQUIRED			0x00000200
+#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK		0x000ffc00
+#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET		10
+#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR		0x000100000
+#define IEEE80211_HE_OPERATION_VHT_OPER_INFO			0x000200000
+#define IEEE80211_HE_OPERATION_MULTI_BSSID_AP			0x10000000
+#define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR		0x20000000
+#define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED		0x40000000
+
+/*
+ * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
+ * @he_oper_ie: byte data of the He Operations IE, stating from the the byte
+ *	after the ext ID byte. It is assumed that he_oper_ie has at least
+ *	sizeof(struct ieee80211_he_operation) bytes, checked already in
+ *	ieee802_11_parse_elems_crc()
+ * @return the actual size of the IE data (not including header), or 0 on error
+ */
+static inline u8
+ieee80211_he_oper_size(const u8 *he_oper_ie)
+{
+	struct ieee80211_he_operation *he_oper = (void *)he_oper_ie;
+	u8 oper_len = sizeof(struct ieee80211_he_operation);
+	u32 he_oper_params;
+
+	/* Make sure the input is not NULL */
+	if (!he_oper_ie)
+		return 0;
+
+	/* Calc required length */
+	he_oper_params = le32_to_cpu(he_oper->he_oper_params);
+	if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO)
+		oper_len += 3;
+	if (he_oper_params & IEEE80211_HE_OPERATION_MULTI_BSSID_AP)
+		oper_len++;
+
+	/* Add the first byte (extension ID) to the total length */
+	oper_len++;
+
+	return oper_len;
+}
+
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
@@ -1992,6 +2414,11 @@ enum ieee80211_eid_ext {
 	WLAN_EID_EXT_FILS_WRAPPED_DATA = 8,
 	WLAN_EID_EXT_FILS_PUBLIC_KEY = 12,
 	WLAN_EID_EXT_FILS_NONCE = 13,
+	WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14,
+	WLAN_EID_EXT_HE_CAPABILITY = 35,
+	WLAN_EID_EXT_HE_OPERATION = 36,
+	WLAN_EID_EXT_UORA = 37,
+	WLAN_EID_EXT_HE_MU_EDCA = 38,
 };
 
 /* Action category code */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5fbfe61f41c6..9ba1f289c439 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -285,6 +285,41 @@ struct ieee80211_sta_vht_cap {
 	struct ieee80211_vht_mcs_info vht_mcs;
 };
 
+#define IEEE80211_HE_PPE_THRES_MAX_LEN		25
+
+/**
+ * struct ieee80211_sta_he_cap - STA's HE capabilities
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11ax HE capabilities for a STA.
+ *
+ * @has_he: true iff HE data is valid.
+ * @he_cap_elem: Fixed portion of the HE capabilities element.
+ * @he_mcs_nss_supp: The supported NSS/MCS combinations.
+ * @ppe_thres: Holds the PPE Thresholds data.
+ */
+struct ieee80211_sta_he_cap {
+	bool has_he;
+	struct ieee80211_he_cap_elem he_cap_elem;
+	struct ieee80211_he_mcs_nss_supp he_mcs_nss_supp;
+	u8 ppe_thres[IEEE80211_HE_PPE_THRES_MAX_LEN];
+};
+
+/**
+ * struct ieee80211_sband_iftype_data
+ *
+ * This structure encapsulates sband data that is relevant for the
+ * interface types defined in @types_mask.  Each type in the
+ * @types_mask must be unique across all instances of iftype_data.
+ *
+ * @types_mask: interface types mask
+ * @he_cap: holds the HE capabilities
+ */
+struct ieee80211_sband_iftype_data {
+	u16 types_mask;
+	struct ieee80211_sta_he_cap he_cap;
+};
+
 /**
  * struct ieee80211_supported_band - frequency band definition
  *
@@ -301,6 +336,11 @@ struct ieee80211_sta_vht_cap {
  * @n_bitrates: Number of bitrates in @bitrates
  * @ht_cap: HT capabilities in this band
  * @vht_cap: VHT capabilities in this band
+ * @n_iftype_data: number of iftype data entries
+ * @iftype_data: interface type data entries.  Note that the bits in
+ *	@types_mask inside this structure cannot overlap (i.e. only
+ *	one occurrence of each type is allowed across all instances of
+ *	iftype_data).
  */
 struct ieee80211_supported_band {
 	struct ieee80211_channel *channels;
@@ -310,8 +350,55 @@ struct ieee80211_supported_band {
 	int n_bitrates;
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
+	u16 n_iftype_data;
+	const struct ieee80211_sband_iftype_data *iftype_data;
 };
 
+/**
+ * ieee80211_get_sband_iftype_data - return sband data for a given iftype
+ * @sband: the sband to search for the STA on
+ * @iftype: enum nl80211_iftype
+ *
+ * Return: pointer to struct ieee80211_sband_iftype_data, or NULL is none found
+ */
+static inline const struct ieee80211_sband_iftype_data *
+ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
+				u8 iftype)
+{
+	int i;
+
+	if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
+		return NULL;
+
+	for (i = 0; i < sband->n_iftype_data; i++)  {
+		const struct ieee80211_sband_iftype_data *data =
+			&sband->iftype_data[i];
+
+		if (data->types_mask & BIT(iftype))
+			return data;
+	}
+
+	return NULL;
+}
+
+/**
+ * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
+ * @sband: the sband to search for the STA on
+ *
+ * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
+ */
+static inline const struct ieee80211_sta_he_cap *
+ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
+{
+	const struct ieee80211_sband_iftype_data *data =
+		ieee80211_get_sband_iftype_data(sband, NL80211_IFTYPE_STATION);
+
+	if (data && data->he_cap.has_he)
+		return &data->he_cap;
+
+	return NULL;
+}
+
 /**
  * wiphy_read_of_freq_limits - read frequency limits from device tree
  *
@@ -899,6 +986,8 @@ enum station_parameters_apply_mask {
  * @opmode_notif: operating mode field from Operating Mode Notification
  * @opmode_notif_used: information if operating mode field is used
  * @support_p2p_ps: information if station supports P2P PS mechanism
+ * @he_capa: HE capabilities of station
+ * @he_capa_len: the length of the HE capabilities
  */
 struct station_parameters {
 	const u8 *supported_rates;
@@ -926,6 +1015,8 @@ struct station_parameters {
 	u8 opmode_notif;
 	bool opmode_notif_used;
 	int support_p2p_ps;
+	const struct ieee80211_he_cap_elem *he_capa;
+	u8 he_capa_len;
 };
 
 /**
@@ -1000,12 +1091,14 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
+ * @RATE_INFO_FLAGS_HE_MCS: HE MCS information
  */
 enum rate_info_flags {
 	RATE_INFO_FLAGS_MCS			= BIT(0),
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
 	RATE_INFO_FLAGS_SHORT_GI		= BIT(2),
 	RATE_INFO_FLAGS_60G			= BIT(3),
+	RATE_INFO_FLAGS_HE_MCS			= BIT(4),
 };
 
 /**
@@ -1019,6 +1112,7 @@ enum rate_info_flags {
  * @RATE_INFO_BW_40: 40 MHz bandwidth
  * @RATE_INFO_BW_80: 80 MHz bandwidth
  * @RATE_INFO_BW_160: 160 MHz bandwidth
+ * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
  */
 enum rate_info_bw {
 	RATE_INFO_BW_20 = 0,
@@ -1027,6 +1121,7 @@ enum rate_info_bw {
 	RATE_INFO_BW_40,
 	RATE_INFO_BW_80,
 	RATE_INFO_BW_160,
+	RATE_INFO_BW_HE_RU,
 };
 
 /**
@@ -1035,10 +1130,14 @@ enum rate_info_bw {
  * Information about a receiving or transmitting bitrate
  *
  * @flags: bitflag of flags from &enum rate_info_flags
- * @mcs: mcs index if struct describes a 802.11n bitrate
+ * @mcs: mcs index if struct describes an HT/VHT/HE rate
  * @legacy: bitrate in 100kbit/s for 802.11abg
- * @nss: number of streams (VHT only)
+ * @nss: number of streams (VHT & HE only)
  * @bw: bandwidth (from &enum rate_info_bw)
+ * @he_gi: HE guard interval (from &enum nl80211_he_gi)
+ * @he_dcm: HE DCM value
+ * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc,
+ *	only valid if bw is %RATE_INFO_BW_HE_RU)
  */
 struct rate_info {
 	u8 flags;
@@ -1046,6 +1145,9 @@ struct rate_info {
 	u16 legacy;
 	u8 nss;
 	u8 bw;
+	u8 he_gi;
+	u8 he_dcm;
+	u8 he_ru_alloc;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 49f718e821a3..f82ce3c89ab7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2237,6 +2237,9 @@ enum nl80211_commands {
  *      enforced.
  * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes
  *      a flow is assigned on each round of the DRR scheduler.
+ * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from
+ *	association request when used with NL80211_CMD_NEW_STATION). Can be set
+ *	only if %NL80211_STA_FLAG_WME is set.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2677,6 +2680,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_TXQ_MEMORY_LIMIT,
 	NL80211_ATTR_TXQ_QUANTUM,
 
+	NL80211_ATTR_HE_CAPABILITY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2726,7 +2731,8 @@ enum nl80211_attrs {
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
 #define NL80211_HT_CAPABILITY_LEN		26
 #define NL80211_VHT_CAPABILITY_LEN		12
-
+#define NL80211_HE_MIN_CAPABILITY_LEN           16
+#define NL80211_HE_MAX_CAPABILITY_LEN           51
 #define NL80211_MAX_NR_CIPHER_SUITES		5
 #define NL80211_MAX_NR_AKM_SUITES		2
 
@@ -2853,6 +2859,38 @@ struct nl80211_sta_flag_update {
 	__u32 set;
 } __attribute__((packed));
 
+/**
+ * enum nl80211_he_gi - HE guard interval
+ * @NL80211_RATE_INFO_HE_GI_0_8: 0.8 usec
+ * @NL80211_RATE_INFO_HE_GI_1_6: 1.6 usec
+ * @NL80211_RATE_INFO_HE_GI_3_2: 3.2 usec
+ */
+enum nl80211_he_gi {
+	NL80211_RATE_INFO_HE_GI_0_8,
+	NL80211_RATE_INFO_HE_GI_1_6,
+	NL80211_RATE_INFO_HE_GI_3_2,
+};
+
+/**
+ * enum nl80211_he_ru_alloc - HE RU allocation values
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_106: 106-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_242: 242-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_484: 484-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_996: 996-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_2x996: 2x996-tone RU allocation
+ */
+enum nl80211_he_ru_alloc {
+	NL80211_RATE_INFO_HE_RU_ALLOC_26,
+	NL80211_RATE_INFO_HE_RU_ALLOC_52,
+	NL80211_RATE_INFO_HE_RU_ALLOC_106,
+	NL80211_RATE_INFO_HE_RU_ALLOC_242,
+	NL80211_RATE_INFO_HE_RU_ALLOC_484,
+	NL80211_RATE_INFO_HE_RU_ALLOC_996,
+	NL80211_RATE_INFO_HE_RU_ALLOC_2x996,
+};
+
 /**
  * enum nl80211_rate_info - bitrate information
  *
@@ -2885,6 +2923,13 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
  *	a legacy rate and will be reported as the actual bitrate, i.e.
  *	a quarter of the base (20 MHz) rate
+ * @NL80211_RATE_INFO_HE_MCS: HE MCS index (u8, 0-11)
+ * @NL80211_RATE_INFO_HE_NSS: HE NSS value (u8, 1-8)
+ * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier
+ *	(u8, see &enum nl80211_he_gi)
+ * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1)
+ * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then
+ *	non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc)
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
 enum nl80211_rate_info {
@@ -2901,6 +2946,11 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_160_MHZ_WIDTH,
 	NL80211_RATE_INFO_10_MHZ_WIDTH,
 	NL80211_RATE_INFO_5_MHZ_WIDTH,
+	NL80211_RATE_INFO_HE_MCS,
+	NL80211_RATE_INFO_HE_NSS,
+	NL80211_RATE_INFO_HE_GI,
+	NL80211_RATE_INFO_HE_DCM,
+	NL80211_RATE_INFO_HE_RU_ALLOC,
 
 	/* keep last */
 	__NL80211_RATE_INFO_AFTER_LAST,
@@ -3166,6 +3216,38 @@ enum nl80211_mpath_info {
 	NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_band_iftype_attr - Interface type data attributes
+ *
+ * @__NL80211_BAND_IFTYPE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BAND_IFTYPE_ATTR_IFTYPES: nested attribute containing a flag attribute
+ *     for each interface type that supports the band data
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC: HE MAC capabilities as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY: HE PHY capabilities as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET: HE supported NSS/MCS as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as
+ *     defined in HE capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently
+ *     defined
+ * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_band_iftype_attr {
+	__NL80211_BAND_IFTYPE_ATTR_INVALID,
+
+	NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+
+	/* keep last */
+	__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
+	NL80211_BAND_IFTYPE_ATTR_MAX = __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_band_attr - band attributes
  * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved
@@ -3181,6 +3263,8 @@ enum nl80211_mpath_info {
  * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as
  *	defined in 802.11ac
  * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using
+ *	attributes from &enum nl80211_band_iftype_attr
  * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined
  * @__NL80211_BAND_ATTR_AFTER_LAST: internal use
  */
@@ -3196,6 +3280,7 @@ enum nl80211_band_attr {
 
 	NL80211_BAND_ATTR_VHT_MCS_SET,
 	NL80211_BAND_ATTR_VHT_CAPA,
+	NL80211_BAND_ATTR_IFTYPE_DATA,
 
 	/* keep last */
 	__NL80211_BAND_ATTR_AFTER_LAST,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5fe35aafdd9c..d23abc619e77 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright 2015	Intel Deutschland GmbH
+ * Copyright 2015-2017	Intel Deutschland GmbH
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -744,6 +744,8 @@ int wiphy_register(struct wiphy *wiphy)
 
 	/* sanity check supported bands/channels */
 	for (band = 0; band < NUM_NL80211_BANDS; band++) {
+		u16 types = 0;
+
 		sband = wiphy->bands[band];
 		if (!sband)
 			continue;
@@ -788,6 +790,23 @@ int wiphy_register(struct wiphy *wiphy)
 			sband->channels[i].band = band;
 		}
 
+		for (i = 0; i < sband->n_iftype_data; i++) {
+			const struct ieee80211_sband_iftype_data *iftd;
+
+			iftd = &sband->iftype_data[i];
+
+			if (WARN_ON(!iftd->types_mask))
+				return -EINVAL;
+			if (WARN_ON(types & iftd->types_mask))
+				return -EINVAL;
+
+			/* at least one piece of information must be present */
+			if (WARN_ON(!iftd->he_cap.has_he))
+				return -EINVAL;
+
+			types |= iftd->types_mask;
+		}
+
 		have_band = true;
 	}
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7b21914ae18b..0ccce338a66e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -428,6 +428,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
+	[NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
+					 .len = NL80211_HE_MAX_CAPABILITY_LEN },
 };
 
 /* policy for the key attributes */
@@ -1324,6 +1326,34 @@ static int nl80211_send_coalesce(struct sk_buff *msg,
 	return 0;
 }
 
+static int
+nl80211_send_iftype_data(struct sk_buff *msg,
+			 const struct ieee80211_sband_iftype_data *iftdata)
+{
+	const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap;
+
+	if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+				iftdata->types_mask))
+		return -ENOBUFS;
+
+	if (he_cap->has_he) {
+		if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+			    sizeof(he_cap->he_cap_elem.mac_cap_info),
+			    he_cap->he_cap_elem.mac_cap_info) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+			    sizeof(he_cap->he_cap_elem.phy_cap_info),
+			    he_cap->he_cap_elem.phy_cap_info) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+			    sizeof(he_cap->he_mcs_nss_supp),
+			    &he_cap->he_mcs_nss_supp) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+			    sizeof(he_cap->ppe_thres), he_cap->ppe_thres))
+			return -ENOBUFS;
+	}
+
+	return 0;
+}
+
 static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 				      struct ieee80211_supported_band *sband)
 {
@@ -1353,6 +1383,32 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 			 sband->vht_cap.cap)))
 		return -ENOBUFS;
 
+	if (sband->n_iftype_data) {
+		struct nlattr *nl_iftype_data =
+			nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA);
+		int err;
+
+		if (!nl_iftype_data)
+			return -ENOBUFS;
+
+		for (i = 0; i < sband->n_iftype_data; i++) {
+			struct nlattr *iftdata;
+
+			iftdata = nla_nest_start(msg, i + 1);
+			if (!iftdata)
+				return -ENOBUFS;
+
+			err = nl80211_send_iftype_data(msg,
+						       &sband->iftype_data[i]);
+			if (err)
+				return err;
+
+			nla_nest_end(msg, iftdata);
+		}
+
+		nla_nest_end(msg, nl_iftype_data);
+	}
+
 	/* add bitrates */
 	nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
 	if (!nl_rates)
@@ -4472,6 +4528,9 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	case RATE_INFO_BW_160:
 		rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH;
 		break;
+	case RATE_INFO_BW_HE_RU:
+		rate_flg = 0;
+		WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS));
 	}
 
 	if (rate_flg && nla_put_flag(msg, rate_flg))
@@ -4491,6 +4550,19 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
+	} else if (info->flags & RATE_INFO_FLAGS_HE_MCS) {
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_MCS, info->mcs))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_NSS, info->nss))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_GI, info->he_gi))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_DCM, info->he_dcm))
+			return false;
+		if (info->bw == RATE_INFO_BW_HE_RU &&
+		    nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC,
+			       info->he_ru_alloc))
+			return false;
 	}
 
 	nla_nest_end(msg, rate);
@@ -4887,7 +4959,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 			return -EINVAL;
 		if (params->supported_rates)
 			return -EINVAL;
-		if (params->ext_capab || params->ht_capa || params->vht_capa)
+		if (params->ext_capab || params->ht_capa || params->vht_capa ||
+		    params->he_capa)
 			return -EINVAL;
 	}
 
@@ -5093,6 +5166,15 @@ static int nl80211_set_station_tdls(struct genl_info *info,
 	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
 		params->vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+	if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+		params->he_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+		params->he_capa_len =
+			nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+		if (params->he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+			return -EINVAL;
+	}
 
 	err = nl80211_parse_sta_channel_info(info, params);
 	if (err)
@@ -5320,6 +5402,17 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
 
+	if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+		params.he_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+		params.he_capa_len =
+			nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+		/* max len is validated in nla policy */
+		if (params.he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+			return -EINVAL;
+	}
+
 	if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
 		params.opmode_notif_used = true;
 		params.opmode_notif =
@@ -5352,6 +5445,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
 		params.ht_capa = NULL;
 		params.vht_capa = NULL;
+
+		/* HE requires WME */
+		if (params.he_capa_len)
+			return -EINVAL;
 	}
 
 	/* When you run into this, adjust the code below for the new flag */
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b91597a8baa2..4ed06b271f32 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -4,6 +4,7 @@
  *
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2017	Intel Deutschland GmbH
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
@@ -1142,6 +1143,85 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	return 0;
 }
 
+static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
+{
+#define SCALE 2048
+	u16 mcs_divisors[12] = {
+		34133, /* 16.666666... */
+		17067, /*  8.333333... */
+		11378, /*  5.555555... */
+		 8533, /*  4.166666... */
+		 5689, /*  2.777777... */
+		 4267, /*  2.083333... */
+		 3923, /*  1.851851... */
+		 3413, /*  1.666666... */
+		 2844, /*  1.388888... */
+		 2560, /*  1.250000... */
+		 2276, /*  1.111111... */
+		 2048, /*  1.000000... */
+	};
+	u32 rates_160M[3] = { 960777777, 907400000, 816666666 };
+	u32 rates_969[3] =  { 480388888, 453700000, 408333333 };
+	u32 rates_484[3] =  { 229411111, 216666666, 195000000 };
+	u32 rates_242[3] =  { 114711111, 108333333,  97500000 };
+	u32 rates_106[3] =  {  40000000,  37777777,  34000000 };
+	u32 rates_52[3]  =  {  18820000,  17777777,  16000000 };
+	u32 rates_26[3]  =  {   9411111,   8888888,   8000000 };
+	u64 tmp;
+	u32 result;
+
+	if (WARN_ON_ONCE(rate->mcs > 11))
+		return 0;
+
+	if (WARN_ON_ONCE(rate->he_gi > NL80211_RATE_INFO_HE_GI_3_2))
+		return 0;
+	if (WARN_ON_ONCE(rate->he_ru_alloc >
+			 NL80211_RATE_INFO_HE_RU_ALLOC_2x996))
+		return 0;
+	if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8))
+		return 0;
+
+	if (rate->bw == RATE_INFO_BW_160)
+		result = rates_160M[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_80 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996))
+		result = rates_969[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_40 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484))
+		result = rates_484[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_20 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_242))
+		result = rates_242[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_106)
+		result = rates_106[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_52)
+		result = rates_52[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
+		result = rates_26[rate->he_gi];
+	else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+		      rate->bw, rate->he_ru_alloc))
+		return 0;
+
+	/* now scale to the appropriate MCS */
+	tmp = result;
+	tmp *= SCALE;
+	do_div(tmp, mcs_divisors[rate->mcs]);
+	result = tmp;
+
+	/* and take NSS, DCM into account */
+	result = (result * rate->nss) / 8;
+	if (rate->he_dcm)
+		result /= 2;
+
+	return result;
+}
+
 u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 {
 	if (rate->flags & RATE_INFO_FLAGS_MCS)
@@ -1150,6 +1230,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 		return cfg80211_calculate_bitrate_60g(rate);
 	if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
 		return cfg80211_calculate_bitrate_vht(rate);
+	if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
+		return cfg80211_calculate_bitrate_he(rate);
 
 	return rate->legacy;
 }
-- 
cgit v1.2.3


From a85857633b04d57f4524cca0a2bfaf87b2543f9f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 25 Apr 2018 13:26:23 -0400
Subject: nfsd4: support change_attr_type attribute

The change attribute is what is used by clients to revalidate their
caches.  Our server may use i_version or ctime for that purpose.  Those
choices behave slightly differently, and it may be useful to the client
to know which we're using.  This attribute tells the client that.  The
Linux client doesn't yet use this attribute yet, though.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c    | 10 ++++++++++
 fs/nfsd/nfsd.h       |  1 +
 include/linux/nfs4.h |  8 ++++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4161031ae14e..fb4991889f89 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2891,6 +2891,16 @@ out_acl:
 			goto out;
 	}
 
+	if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			goto out_resource;
+		if (IS_I_VERSION(d_inode(dentry)))
+			*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR);
+		else
+			*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
+	}
+
 	if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
 		status = nfsd4_encode_security_label(xdr, rqstp, context,
 								contextlen);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 3fce905d0365..066899929863 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -360,6 +360,7 @@ void		nfsd_lockd_shutdown(void);
 
 #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
 	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+	FATTR4_WORD2_CHANGE_ATTR_TYPE | \
 	FATTR4_WORD2_MODE_UMASK | \
 	NFSD4_2_SECURITY_ATTRS)
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 57ffaa20d564..0877ed333733 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -374,6 +374,13 @@ enum lock_type4 {
 	NFS4_WRITEW_LT = 4
 };
 
+enum change_attr_type4 {
+	NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+	NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+	NFS4_CHANGE_TYPE_IS_UNDEFINED = 4
+};
 
 /* Mandatory Attributes */
 #define FATTR4_WORD0_SUPPORTED_ATTRS    (1UL << 0)
@@ -441,6 +448,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE	(1UL << 15)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 #define FATTR4_WORD2_MODE_UMASK		(1UL << 17)
 
-- 
cgit v1.2.3


From eb3744a2dd01cb07ce9f556d56d6fe451f0c313a Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 13 Jun 2018 09:10:37 +0530
Subject: gpio: davinci: Do not assume continuous IRQ numbering

Currently the driver assumes that the interrupts are continuous
and does platform_get_irq only once and assumes the rest are continuous,
instead call platform_get_irq for all the interrupts and store them
in an array for later use.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-davinci.c                | 63 ++++++++++++++++++++----------
 include/linux/platform_data/gpio-davinci.h |  3 +-
 2 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 423d37c95e6b..a5ece8ea79bc 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -55,7 +55,7 @@ static inline struct davinci_gpio_regs __iomem *irq2regs(struct irq_data *d)
 	return g;
 }
 
-static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq);
+static int davinci_gpio_irq_setup(struct platform_device *pdev);
 
 /*--------------------------------------------------------------------------*/
 
@@ -167,8 +167,8 @@ of_err:
 static int davinci_gpio_probe(struct platform_device *pdev)
 {
 	static int ctrl_num, bank_base;
-	int gpio, bank, bank_irq, ret = 0;
-	unsigned ngpio, nbank;
+	int gpio, bank, i, ret = 0;
+	unsigned int ngpio, nbank, nirq;
 	struct davinci_gpio_controller *chips;
 	struct davinci_gpio_platform_data *pdata;
 	struct device *dev = &pdev->dev;
@@ -197,6 +197,16 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 	if (WARN_ON(ARCH_NR_GPIOS < ngpio))
 		ngpio = ARCH_NR_GPIOS;
 
+	/*
+	 * If there are unbanked interrupts then the number of
+	 * interrupts is equal to number of gpios else all are banked so
+	 * number of interrupts is equal to number of banks(each with 16 gpios)
+	 */
+	if (pdata->gpio_unbanked)
+		nirq = pdata->gpio_unbanked;
+	else
+		nirq = DIV_ROUND_UP(ngpio, 16);
+
 	nbank = DIV_ROUND_UP(ngpio, 32);
 	chips = devm_kcalloc(dev,
 			     nbank, sizeof(struct davinci_gpio_controller),
@@ -209,10 +219,13 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(gpio_base))
 		return PTR_ERR(gpio_base);
 
-	bank_irq = platform_get_irq(pdev, 0);
-	if (bank_irq < 0) {
-		dev_dbg(dev, "IRQ not populated\n");
-		return bank_irq;
+	for (i = 0; i < nirq; i++) {
+		chips->irqs[i] = platform_get_irq(pdev, i);
+		if (chips->irqs[i] < 0) {
+			dev_info(dev, "IRQ not populated, err = %d\n",
+				 chips->irqs[i]);
+			return chips->irqs[i];
+		}
 	}
 
 	snprintf(label, MAX_LABEL_SIZE, "davinci_gpio.%d", ctrl_num++);
@@ -249,7 +262,7 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 		goto err;
 
 	platform_set_drvdata(pdev, chips);
-	ret = davinci_gpio_irq_setup(pdev, bank_irq);
+	ret = davinci_gpio_irq_setup(pdev);
 	if (ret)
 		goto err;
 
@@ -383,7 +396,7 @@ static int gpio_to_irq_unbanked(struct gpio_chip *chip, unsigned offset)
 	 * can provide direct-mapped IRQs to AINTC (up to 32 GPIOs).
 	 */
 	if (offset < d->gpio_unbanked)
-		return d->base_irq + offset;
+		return d->irqs[offset];
 	else
 		return -ENODEV;
 }
@@ -392,11 +405,18 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger)
 {
 	struct davinci_gpio_controller *d;
 	struct davinci_gpio_regs __iomem *g;
-	u32 mask;
+	u32 mask, i;
 
 	d = (struct davinci_gpio_controller *)irq_data_get_irq_handler_data(data);
 	g = (struct davinci_gpio_regs __iomem *)d->regs[0];
-	mask = __gpio_mask(data->irq - d->base_irq);
+	for (i = 0; i < MAX_INT_PER_BANK; i++)
+		if (data->irq == d->irqs[i])
+			break;
+
+	if (i == MAX_INT_PER_BANK)
+		return -EINVAL;
+
+	mask = __gpio_mask(i);
 
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
 		return -EINVAL;
@@ -458,7 +478,7 @@ static const struct of_device_id davinci_gpio_ids[];
  * (dm6446) can be set appropriately for GPIOV33 pins.
  */
 
-static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
+static int davinci_gpio_irq_setup(struct platform_device *pdev)
 {
 	unsigned	gpio, bank;
 	int		irq;
@@ -492,6 +512,7 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 		dev_err(dev, "Error %ld getting gpio clock\n", PTR_ERR(clk));
 		return PTR_ERR(clk);
 	}
+
 	ret = clk_prepare_enable(clk);
 	if (ret)
 		return ret;
@@ -531,12 +552,11 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 	if (pdata->gpio_unbanked) {
 		/* pass "bank 0" GPIO IRQs to AINTC */
 		chips->chip.to_irq = gpio_to_irq_unbanked;
-		chips->base_irq = bank_irq;
 		chips->gpio_unbanked = pdata->gpio_unbanked;
 		binten = GENMASK(pdata->gpio_unbanked / 16, 0);
 
 		/* AINTC handles mask/unmask; GPIO handles triggering */
-		irq = bank_irq;
+		irq = chips->irqs[0];
 		irq_chip = gpio_get_irq_chip(irq);
 		irq_chip->name = "GPIO-AINTC";
 		irq_chip->irq_set_type = gpio_irq_type_unbanked;
@@ -547,10 +567,11 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 		writel_relaxed(~0, &g->set_rising);
 
 		/* set the direct IRQs up to use that irqchip */
-		for (gpio = 0; gpio < pdata->gpio_unbanked; gpio++, irq++) {
-			irq_set_chip(irq, irq_chip);
-			irq_set_handler_data(irq, chips);
-			irq_set_status_flags(irq, IRQ_TYPE_EDGE_BOTH);
+		for (gpio = 0; gpio < pdata->gpio_unbanked; gpio++) {
+			irq_set_chip(chips->irqs[gpio], irq_chip);
+			irq_set_handler_data(chips->irqs[gpio], chips);
+			irq_set_status_flags(chips->irqs[gpio],
+					     IRQ_TYPE_EDGE_BOTH);
 		}
 
 		goto done;
@@ -560,7 +581,7 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 	 * Or, AINTC can handle IRQs for banks of 16 GPIO IRQs, which we
 	 * then chain through our own handler.
 	 */
-	for (gpio = 0, bank = 0; gpio < ngpio; bank++, bank_irq++, gpio += 16) {
+	for (gpio = 0, bank = 0; gpio < ngpio; bank++, gpio += 16) {
 		/* disabled by default, enabled only as needed
 		 * There are register sets for 32 GPIOs. 2 banks of 16
 		 * GPIOs are covered by each set of registers hence divide by 2
@@ -587,8 +608,8 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 		irqdata->bank_num = bank;
 		irqdata->chip = chips;
 
-		irq_set_chained_handler_and_data(bank_irq, gpio_irq_handler,
-						 irqdata);
+		irq_set_chained_handler_and_data(chips->irqs[bank],
+						 gpio_irq_handler, irqdata);
 
 		binten |= BIT(bank);
 	}
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 90ae19ca828f..57a5a35e0073 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -22,6 +22,7 @@
 #include <asm-generic/gpio.h>
 
 #define MAX_REGS_BANKS		5
+#define MAX_INT_PER_BANK 32
 
 struct davinci_gpio_platform_data {
 	u32	ngpio;
@@ -41,7 +42,7 @@ struct davinci_gpio_controller {
 	spinlock_t		lock;
 	void __iomem		*regs[MAX_REGS_BANKS];
 	int			gpio_unbanked;
-	unsigned int		base_irq;
+	int			irqs[MAX_INT_PER_BANK];
 	unsigned int		base;
 };
 
-- 
cgit v1.2.3


From 90b39402e9f31c4aab48dc1a43d85a724065793f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 1 Jun 2018 13:21:27 +0200
Subject: gpio: Add API to explicitly name a consumer

The GPIO (descriptor) API registers a "label" naming what is
currently using the GPIO line. Typically this is taken from
things like the device tree node, so "reset-gpios" will result
in he line being labeled "reset".

The technical effect is pretty much zero: the use is for
debug and introspection, such as "lsgpio" and debugfs files.

However sometimes the user want this cuddly feeling of
listing all GPIO lines and seeing exactly what they are for
and it gives a very fulfilling sense of control. Especially
in the cases when the device tree node doesn't provide a
good name, or anonymous GPIO lines assigned just to
"gpios" in the device tree because the usage is implicit.

For these cases it may be nice to be able to label the
line directly and explicitly.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c        | 13 +++++++++++++
 include/linux/gpio/consumer.h |  7 +++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e11a3bb03820..c6f77e806cb8 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3193,6 +3193,19 @@ int gpiod_cansleep(const struct gpio_desc *desc)
 }
 EXPORT_SYMBOL_GPL(gpiod_cansleep);
 
+/**
+ * gpiod_set_consumer_name() - set the consumer name for the descriptor
+ * @desc: gpio to set the consumer name on
+ * @name: the new consumer name
+ */
+void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name)
+{
+	VALIDATE_DESC_VOID(desc);
+	/* Just overwrite whatever the previous name was */
+	desc->label = name;
+}
+EXPORT_SYMBOL_GPL(gpiod_set_consumer_name);
+
 /**
  * gpiod_to_irq() - return the IRQ corresponding to a GPIO
  * @desc: gpio whose IRQ will be returned (already requested)
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 243112c7fa7d..e8aaf34dd65d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -145,6 +145,7 @@ int gpiod_is_active_low(const struct gpio_desc *desc);
 int gpiod_cansleep(const struct gpio_desc *desc);
 
 int gpiod_to_irq(const struct gpio_desc *desc);
+void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name);
 
 /* Convert between the old gpio_ and new gpiod_ interfaces */
 struct gpio_desc *gpio_to_desc(unsigned gpio);
@@ -467,6 +468,12 @@ static inline int gpiod_to_irq(const struct gpio_desc *desc)
 	return -EINVAL;
 }
 
+static inline void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
 static inline struct gpio_desc *gpio_to_desc(unsigned gpio)
 {
 	return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From f2b1d2f94af887c91bb8a0cfb495e546331bc5ed Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 30 May 2018 17:25:13 +0200
Subject: soc: renesas: rcar-sysc: Provide helpers to power up/down CPUs

Provide helpers to control CPU power areas from platform code, taking
just a CPU index.  This will avoid having to pass full CPU power area
parameter blocks, and thus duplicating information already provided by
SoC-specific SYSC drivers.

This will be used on R-Car H1 only.
Later R-Car generations rely on APMU/RST for CPU power area control.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/soc/renesas/rcar-sysc.c       | 40 +++++++++++++++++++++++++++++++++++
 include/linux/soc/renesas/rcar-sysc.h |  2 ++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c
index 95120acc4d80..4ad6dcd19420 100644
--- a/drivers/soc/renesas/rcar-sysc.c
+++ b/drivers/soc/renesas/rcar-sysc.c
@@ -310,6 +310,8 @@ struct rcar_pm_domains {
 	struct generic_pm_domain *domains[RCAR_PD_ALWAYS_ON + 1];
 };
 
+static struct genpd_onecell_data *rcar_sysc_onecell_data;
+
 static int __init rcar_sysc_pd_init(void)
 {
 	const struct rcar_sysc_info *info;
@@ -356,6 +358,7 @@ static int __init rcar_sysc_pd_init(void)
 
 	domains->onecell_data.domains = domains->domains;
 	domains->onecell_data.num_domains = ARRAY_SIZE(domains->domains);
+	rcar_sysc_onecell_data = &domains->onecell_data;
 
 	for (i = 0, syscier = 0; i < info->num_areas; i++)
 		syscier |= BIT(info->areas[i].isr_bit);
@@ -449,3 +452,40 @@ void __init rcar_sysc_init(phys_addr_t base, u32 syscier)
 	pr_debug("%s: syscier = 0x%08x\n", __func__, syscier);
 	iowrite32(syscier, rcar_sysc_base + SYSCIER);
 }
+
+#ifdef CONFIG_ARCH_R8A7779
+static int rcar_sysc_power_cpu(unsigned int idx, bool on)
+{
+	struct generic_pm_domain *genpd;
+	struct rcar_sysc_pd *pd;
+	unsigned int i;
+
+	if (!rcar_sysc_onecell_data)
+		return -ENODEV;
+
+	for (i = 0; i < rcar_sysc_onecell_data->num_domains; i++) {
+		genpd = rcar_sysc_onecell_data->domains[i];
+		if (!genpd)
+			continue;
+
+		pd = to_rcar_pd(genpd);
+		if (!(pd->flags & PD_CPU) || pd->ch.chan_bit != idx)
+			continue;
+
+		return on ? rcar_sysc_power_up(&pd->ch)
+			  : rcar_sysc_power_down(&pd->ch);
+	}
+
+	return -ENOENT;
+}
+
+int rcar_sysc_power_down_cpu(unsigned int cpu)
+{
+	return rcar_sysc_power_cpu(cpu, false);
+}
+
+int rcar_sysc_power_up_cpu(unsigned int cpu)
+{
+	return rcar_sysc_power_cpu(cpu, true);
+}
+#endif /* CONFIG_ARCH_R8A7779 */
diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h
index 8a6086d2e9c3..9020da2111fd 100644
--- a/include/linux/soc/renesas/rcar-sysc.h
+++ b/include/linux/soc/renesas/rcar-sysc.h
@@ -13,5 +13,7 @@ struct rcar_sysc_ch {
 int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch);
 int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch);
 void rcar_sysc_init(phys_addr_t base, u32 syscier);
+int rcar_sysc_power_down_cpu(unsigned int cpu);
+int rcar_sysc_power_up_cpu(unsigned int cpu);
 
 #endif /* __LINUX_SOC_RENESAS_RCAR_SYSC_H__ */
-- 
cgit v1.2.3


From 7e8a50df26f4e7003d09f7e8d1e57fbbb7ebb750 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 30 May 2018 17:25:16 +0200
Subject: soc: renesas: rcar-sysc: Drop legacy handling

Now the R-Car platform code no longer supports DTBs lacking a SYSC
device node in DT, all legacy handling can be dropped from the R-Car
SYSC driver:
  - Make rcar_sysc_ch private to the driver,
  - Make rcar_sysc_power_{down,up}() static (they have been replaced by
    rcar_sysc_power_{down,up}_cpu()),
  - Remove the legacy wrapper rcar_sysc_init(), and the check for double
    initialization (only the early_initcall is left).

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/soc/renesas/rcar-sysc.c       | 38 ++++++++---------------------------
 include/linux/soc/renesas/rcar-sysc.h | 11 ----------
 2 files changed, 8 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c
index 4ad6dcd19420..41af9c7b912f 100644
--- a/drivers/soc/renesas/rcar-sysc.c
+++ b/drivers/soc/renesas/rcar-sysc.c
@@ -58,6 +58,12 @@
 
 #define RCAR_PD_ALWAYS_ON	32	/* Always-on power area */
 
+struct rcar_sysc_ch {
+	u16 chan_offs;
+	u8 chan_bit;
+	u8 isr_bit;
+};
+
 static void __iomem *rcar_sysc_base;
 static DEFINE_SPINLOCK(rcar_sysc_lock); /* SMP CPUs + I/O devices */
 
@@ -143,12 +149,12 @@ static int rcar_sysc_power(const struct rcar_sysc_ch *sysc_ch, bool on)
 	return ret;
 }
 
-int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch)
+static int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch)
 {
 	return rcar_sysc_power(sysc_ch, false);
 }
 
-int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch)
+static int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch)
 {
 	return rcar_sysc_power(sysc_ch, true);
 }
@@ -323,9 +329,6 @@ static int __init rcar_sysc_pd_init(void)
 	unsigned int i;
 	int error;
 
-	if (rcar_sysc_base)
-		return 0;
-
 	np = of_find_matching_node_and_match(NULL, rcar_sysc_matches, &match);
 	if (!np)
 		return -ENODEV;
@@ -428,31 +431,6 @@ void __init rcar_sysc_nullify(struct rcar_sysc_area *areas,
 		}
 }
 
-void __init rcar_sysc_init(phys_addr_t base, u32 syscier)
-{
-	u32 syscimr;
-
-	if (!rcar_sysc_pd_init())
-		return;
-
-	rcar_sysc_base = ioremap_nocache(base, PAGE_SIZE);
-
-	/*
-	 * Mask all interrupt sources to prevent the CPU from receiving them.
-	 * Make sure not to clear reserved bits that were set before.
-	 */
-	syscimr = ioread32(rcar_sysc_base + SYSCIMR);
-	syscimr |= syscier;
-	pr_debug("%s: syscimr = 0x%08x\n", __func__, syscimr);
-	iowrite32(syscimr, rcar_sysc_base + SYSCIMR);
-
-	/*
-	 * SYSC needs all interrupt sources enabled to control power.
-	 */
-	pr_debug("%s: syscier = 0x%08x\n", __func__, syscier);
-	iowrite32(syscier, rcar_sysc_base + SYSCIER);
-}
-
 #ifdef CONFIG_ARCH_R8A7779
 static int rcar_sysc_power_cpu(unsigned int idx, bool on)
 {
diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h
index 9020da2111fd..00fae6fd234d 100644
--- a/include/linux/soc/renesas/rcar-sysc.h
+++ b/include/linux/soc/renesas/rcar-sysc.h
@@ -2,17 +2,6 @@
 #ifndef __LINUX_SOC_RENESAS_RCAR_SYSC_H__
 #define __LINUX_SOC_RENESAS_RCAR_SYSC_H__
 
-#include <linux/types.h>
-
-struct rcar_sysc_ch {
-	u16 chan_offs;
-	u8 chan_bit;
-	u8 isr_bit;
-};
-
-int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch);
-int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch);
-void rcar_sysc_init(phys_addr_t base, u32 syscier);
 int rcar_sysc_power_down_cpu(unsigned int cpu);
 int rcar_sysc_power_up_cpu(unsigned int cpu);
 
-- 
cgit v1.2.3


From 297101ab85841319aac2c7843ca755d650c1964f Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 15 Jun 2018 13:44:53 +0200
Subject: regulator: pfuze100: add pfuze3001 support

This extends the pfuze100 driver with pfuze3001 support.

Latest datasheet:
https://www.nxp.com/docs/en/data-sheet/PF3001.pdf

Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig              |  4 +-
 drivers/regulator/pfuze100-regulator.c | 78 +++++++++++++++++++++++++++++++---
 include/linux/regulator/pfuze100.h     | 11 +++++
 3 files changed, 84 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 5dbccf5f3037..2964eaea94c0 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -633,12 +633,12 @@ config REGULATOR_PCF50633
 	 on PCF50633
 
 config REGULATOR_PFUZE100
-	tristate "Freescale PFUZE100/200/3000 regulator driver"
+	tristate "Freescale PFUZE100/200/3000/3001 regulator driver"
 	depends on I2C
 	select REGMAP_I2C
 	help
 	  Say y here to support the regulators found on the Freescale
-	  PFUZE100/200/3000 PMIC.
+	  PFUZE100/200/3000/3001 PMIC.
 
 config REGULATOR_PV88060
 	tristate "Powerventure Semiconductor PV88060 regulator"
diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
index 8d9dbcc775ea..32f9af7f87c4 100644
--- a/drivers/regulator/pfuze100-regulator.c
+++ b/drivers/regulator/pfuze100-regulator.c
@@ -44,7 +44,7 @@
 #define PFUZE100_VGEN5VOL	0x70
 #define PFUZE100_VGEN6VOL	0x71
 
-enum chips { PFUZE100, PFUZE200, PFUZE3000 = 3 };
+enum chips { PFUZE100, PFUZE200, PFUZE3000 = 3, PFUZE3001 = 0x31, };
 
 struct pfuze_regulator {
 	struct regulator_desc desc;
@@ -92,6 +92,7 @@ static const struct i2c_device_id pfuze_device_id[] = {
 	{.name = "pfuze100", .driver_data = PFUZE100},
 	{.name = "pfuze200", .driver_data = PFUZE200},
 	{.name = "pfuze3000", .driver_data = PFUZE3000},
+	{.name = "pfuze3001", .driver_data = PFUZE3001},
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, pfuze_device_id);
@@ -100,6 +101,7 @@ static const struct of_device_id pfuze_dt_ids[] = {
 	{ .compatible = "fsl,pfuze100", .data = (void *)PFUZE100},
 	{ .compatible = "fsl,pfuze200", .data = (void *)PFUZE200},
 	{ .compatible = "fsl,pfuze3000", .data = (void *)PFUZE3000},
+	{ .compatible = "fsl,pfuze3001", .data = (void *)PFUZE3001},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, pfuze_dt_ids);
@@ -108,10 +110,28 @@ static int pfuze100_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
 {
 	struct pfuze_chip *pfuze100 = rdev_get_drvdata(rdev);
 	int id = rdev_get_id(rdev);
+	bool reg_has_ramp_delay;
 	unsigned int ramp_bits;
 	int ret;
 
-	if (id < PFUZE100_SWBST) {
+	switch (pfuze100->chip_id) {
+	case PFUZE3001:
+		/* no dynamic voltage scaling for PF3001 */
+		reg_has_ramp_delay = false;
+		break;
+	case PFUZE3000:
+		reg_has_ramp_delay = (id < PFUZE3000_SWBST);
+		break;
+	case PFUZE200:
+		reg_has_ramp_delay = (id < PFUZE200_SWBST);
+		break;
+	case PFUZE100:
+	default:
+		reg_has_ramp_delay = (id < PFUZE100_SWBST);
+		break;
+	}
+
+	if (reg_has_ramp_delay) {
 		ramp_delay = 12500 / ramp_delay;
 		ramp_bits = (ramp_delay >> 1) - (ramp_delay >> 3);
 		ret = regmap_update_bits(pfuze100->regmap,
@@ -119,8 +139,9 @@ static int pfuze100_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
 					 0xc0, ramp_bits << 6);
 		if (ret < 0)
 			dev_err(pfuze100->dev, "ramp failed, err %d\n", ret);
-	} else
+	} else {
 		ret = -EACCES;
+	}
 
 	return ret;
 }
@@ -361,6 +382,19 @@ static struct pfuze_regulator pfuze3000_regulators[] = {
 	PFUZE100_VGEN_REG(PFUZE3000, VLDO4, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000),
 };
 
+static struct pfuze_regulator pfuze3001_regulators[] = {
+	PFUZE100_SWB_REG(PFUZE3001, SW1, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a),
+	PFUZE100_SWB_REG(PFUZE3001, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo),
+	PFUZE3000_SW3_REG(PFUZE3001, SW3, PFUZE100_SW3AVOL, 900000, 1650000, 50000),
+	PFUZE100_SWB_REG(PFUZE3001, VSNVS, PFUZE100_VSNVSVOL, 0x7, pfuze100_vsnvs),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO1, PFUZE100_VGEN1VOL, 1800000, 3300000, 100000),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO2, PFUZE100_VGEN2VOL, 800000, 1550000, 50000),
+	PFUZE3000_VCC_REG(PFUZE3001, VCCSD, PFUZE100_VGEN3VOL, 2850000, 3300000, 150000),
+	PFUZE3000_VCC_REG(PFUZE3001, V33, PFUZE100_VGEN4VOL, 2850000, 3300000, 150000),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO3, PFUZE100_VGEN5VOL, 1800000, 3300000, 100000),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO4, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000),
+};
+
 #ifdef CONFIG_OF
 /* PFUZE100 */
 static struct of_regulator_match pfuze100_matches[] = {
@@ -418,6 +452,21 @@ static struct of_regulator_match pfuze3000_matches[] = {
 	{ .name = "vldo4",	},
 };
 
+/* PFUZE3001 */
+static struct of_regulator_match pfuze3001_matches[] = {
+
+	{ .name = "sw1",	},
+	{ .name = "sw2",	},
+	{ .name = "sw3",	},
+	{ .name = "vsnvs",	},
+	{ .name = "vldo1",	},
+	{ .name = "vldo2",	},
+	{ .name = "vccsd",	},
+	{ .name = "v33",	},
+	{ .name = "vldo3",	},
+	{ .name = "vldo4",	},
+};
+
 static struct of_regulator_match *pfuze_matches;
 
 static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
@@ -437,6 +486,11 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
 	}
 
 	switch (chip->chip_id) {
+	case PFUZE3001:
+		pfuze_matches = pfuze3001_matches;
+		ret = of_regulator_match(dev, parent, pfuze3001_matches,
+					 ARRAY_SIZE(pfuze3001_matches));
+		break;
 	case PFUZE3000:
 		pfuze_matches = pfuze3000_matches;
 		ret = of_regulator_match(dev, parent, pfuze3000_matches,
@@ -508,7 +562,8 @@ static int pfuze_identify(struct pfuze_chip *pfuze_chip)
 		 */
 		dev_info(pfuze_chip->dev, "Assuming misprogrammed ID=0x8");
 	} else if ((value & 0x0f) != pfuze_chip->chip_id &&
-		   (value & 0xf0) >> 4 != pfuze_chip->chip_id) {
+		   (value & 0xf0) >> 4 != pfuze_chip->chip_id &&
+		   (value != pfuze_chip->chip_id)) {
 		/* device id NOT match with your setting */
 		dev_warn(pfuze_chip->dev, "Illegal ID: %x\n", value);
 		return -ENODEV;
@@ -588,6 +643,13 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 
 	/* use the right regulators after identify the right device */
 	switch (pfuze_chip->chip_id) {
+	case PFUZE3001:
+		pfuze_chip->pfuze_regulators = pfuze3001_regulators;
+		regulator_num = ARRAY_SIZE(pfuze3001_regulators);
+		sw_check_start = PFUZE3001_SW2;
+		sw_check_end = PFUZE3001_SW2;
+		sw_hi = 1 << 3;
+		break;
 	case PFUZE3000:
 		pfuze_chip->pfuze_regulators = pfuze3000_regulators;
 		regulator_num = ARRAY_SIZE(pfuze3000_regulators);
@@ -611,7 +673,8 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 	}
 	dev_info(&client->dev, "pfuze%s found.\n",
 		(pfuze_chip->chip_id == PFUZE100) ? "100" :
-		((pfuze_chip->chip_id == PFUZE200) ? "200" : "3000"));
+		(((pfuze_chip->chip_id == PFUZE200) ? "200" :
+		((pfuze_chip->chip_id == PFUZE3000) ? "3000" : "3001"))));
 
 	memcpy(pfuze_chip->regulator_descs, pfuze_chip->pfuze_regulators,
 		sizeof(pfuze_chip->regulator_descs));
@@ -636,7 +699,8 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 		if (i >= sw_check_start && i <= sw_check_end) {
 			regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val);
 			if (val & sw_hi) {
-				if (pfuze_chip->chip_id == PFUZE3000) {
+				if (pfuze_chip->chip_id == PFUZE3000 ||
+					pfuze_chip->chip_id == PFUZE3001) {
 					desc->volt_table = pfuze3000_sw2hi;
 					desc->n_voltages = ARRAY_SIZE(pfuze3000_sw2hi);
 				} else {
@@ -675,5 +739,5 @@ static struct i2c_driver pfuze_driver = {
 module_i2c_driver(pfuze_driver);
 
 MODULE_AUTHOR("Robin Gong <b38343@freescale.com>");
-MODULE_DESCRIPTION("Regulator Driver for Freescale PFUZE100/200/3000 PMIC");
+MODULE_DESCRIPTION("Regulator Driver for Freescale PFUZE100/200/3000/3001 PMIC");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h
index e0ccf46f66cf..cb5aecd40f07 100644
--- a/include/linux/regulator/pfuze100.h
+++ b/include/linux/regulator/pfuze100.h
@@ -64,6 +64,17 @@
 #define PFUZE3000_VLDO3		11
 #define PFUZE3000_VLDO4		12
 
+#define PFUZE3001_SW1		0
+#define PFUZE3001_SW2		1
+#define PFUZE3001_SW3		2
+#define PFUZE3001_VSNVS		3
+#define PFUZE3001_VLDO1		4
+#define PFUZE3001_VLDO2		5
+#define PFUZE3001_VCCSD		6
+#define PFUZE3001_V33		7
+#define PFUZE3001_VLDO3		8
+#define PFUZE3001_VLDO4		9
+
 struct regulator_init_data;
 
 struct pfuze_regulator_platform_data {
-- 
cgit v1.2.3


From 7bd0c7ba62e8a9840f15fc4ff0122b29fe1b6413 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Thu, 7 Jun 2018 16:51:38 -0700
Subject: regulator: Fix typo in comment of struct regulator_linear_range

regulator_map_linar_range() => regulator_map_linear_range()

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index fc2dc8df476f..dea96ee39fdc 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -46,7 +46,7 @@ enum regulator_status {
 /**
  * struct regulator_linear_range - specify linear voltage ranges
  *
- * Specify a range of voltages for regulator_map_linar_range() and
+ * Specify a range of voltages for regulator_map_linear_range() and
  * regulator_list_linear_range().
  *
  * @min_uV:  Lowest voltage in range
-- 
cgit v1.2.3


From 3e247b0fb540a9a503a71ef6b918a3443e49655f Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 19:40:32 +0000
Subject: spi: remove unused adi_spi3.h header

include/linux/spi/adi_spi3.h is unused since commit 47838669de9d ("spi: remove blackfin related host drivers")
Finish the cleaning by removing it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/adi_spi3.h | 254 -------------------------------------------
 1 file changed, 254 deletions(-)
 delete mode 100644 include/linux/spi/adi_spi3.h

(limited to 'include/linux')

diff --git a/include/linux/spi/adi_spi3.h b/include/linux/spi/adi_spi3.h
deleted file mode 100644
index c84123aa1d06..000000000000
--- a/include/linux/spi/adi_spi3.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Analog Devices SPI3 controller driver
- *
- * Copyright (c) 2014 Analog Devices Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef _ADI_SPI3_H_
-#define _ADI_SPI3_H_
-
-#include <linux/types.h>
-
-/* SPI_CONTROL */
-#define SPI_CTL_EN                  0x00000001    /* Enable */
-#define SPI_CTL_MSTR                0x00000002    /* Master/Slave */
-#define SPI_CTL_PSSE                0x00000004    /* controls modf error in master mode */
-#define SPI_CTL_ODM                 0x00000008    /* Open Drain Mode */
-#define SPI_CTL_CPHA                0x00000010    /* Clock Phase */
-#define SPI_CTL_CPOL                0x00000020    /* Clock Polarity */
-#define SPI_CTL_ASSEL               0x00000040    /* Slave Select Pin Control */
-#define SPI_CTL_SELST               0x00000080    /* Slave Select Polarity in-between transfers */
-#define SPI_CTL_EMISO               0x00000100    /* Enable MISO */
-#define SPI_CTL_SIZE                0x00000600    /* Word Transfer Size */
-#define SPI_CTL_SIZE08              0x00000000    /* SIZE: 8 bits */
-#define SPI_CTL_SIZE16              0x00000200    /* SIZE: 16 bits */
-#define SPI_CTL_SIZE32              0x00000400    /* SIZE: 32 bits */
-#define SPI_CTL_LSBF                0x00001000    /* LSB First */
-#define SPI_CTL_FCEN                0x00002000    /* Flow-Control Enable */
-#define SPI_CTL_FCCH                0x00004000    /* Flow-Control Channel Selection */
-#define SPI_CTL_FCPL                0x00008000    /* Flow-Control Polarity */
-#define SPI_CTL_FCWM                0x00030000    /* Flow-Control Water-Mark */
-#define SPI_CTL_FIFO0               0x00000000    /* FCWM: TFIFO empty or RFIFO Full */
-#define SPI_CTL_FIFO1               0x00010000    /* FCWM: TFIFO 75% or more empty or RFIFO 75% or more full */
-#define SPI_CTL_FIFO2               0x00020000    /* FCWM: TFIFO 50% or more empty or RFIFO 50% or more full */
-#define SPI_CTL_FMODE               0x00040000    /* Fast-mode Enable */
-#define SPI_CTL_MIOM                0x00300000    /* Multiple I/O Mode */
-#define SPI_CTL_MIO_DIS             0x00000000    /* MIOM: Disable */
-#define SPI_CTL_MIO_DUAL            0x00100000    /* MIOM: Enable DIOM (Dual I/O Mode) */
-#define SPI_CTL_MIO_QUAD            0x00200000    /* MIOM: Enable QUAD (Quad SPI Mode) */
-#define SPI_CTL_SOSI                0x00400000    /* Start on MOSI */
-/* SPI_RX_CONTROL */
-#define SPI_RXCTL_REN               0x00000001    /* Receive Channel Enable */
-#define SPI_RXCTL_RTI               0x00000004    /* Receive Transfer Initiate */
-#define SPI_RXCTL_RWCEN             0x00000008    /* Receive Word Counter Enable */
-#define SPI_RXCTL_RDR               0x00000070    /* Receive Data Request */
-#define SPI_RXCTL_RDR_DIS           0x00000000    /* RDR: Disabled */
-#define SPI_RXCTL_RDR_NE            0x00000010    /* RDR: RFIFO not empty */
-#define SPI_RXCTL_RDR_25            0x00000020    /* RDR: RFIFO 25% full */
-#define SPI_RXCTL_RDR_50            0x00000030    /* RDR: RFIFO 50% full */
-#define SPI_RXCTL_RDR_75            0x00000040    /* RDR: RFIFO 75% full */
-#define SPI_RXCTL_RDR_FULL          0x00000050    /* RDR: RFIFO full */
-#define SPI_RXCTL_RDO               0x00000100    /* Receive Data Over-Run */
-#define SPI_RXCTL_RRWM              0x00003000    /* FIFO Regular Water-Mark */
-#define SPI_RXCTL_RWM_0             0x00000000    /* RRWM: RFIFO Empty */
-#define SPI_RXCTL_RWM_25            0x00001000    /* RRWM: RFIFO 25% full */
-#define SPI_RXCTL_RWM_50            0x00002000    /* RRWM: RFIFO 50% full */
-#define SPI_RXCTL_RWM_75            0x00003000    /* RRWM: RFIFO 75% full */
-#define SPI_RXCTL_RUWM              0x00070000    /* FIFO Urgent Water-Mark */
-#define SPI_RXCTL_UWM_DIS           0x00000000    /* RUWM: Disabled */
-#define SPI_RXCTL_UWM_25            0x00010000    /* RUWM: RFIFO 25% full */
-#define SPI_RXCTL_UWM_50            0x00020000    /* RUWM: RFIFO 50% full */
-#define SPI_RXCTL_UWM_75            0x00030000    /* RUWM: RFIFO 75% full */
-#define SPI_RXCTL_UWM_FULL          0x00040000    /* RUWM: RFIFO full */
-/* SPI_TX_CONTROL */
-#define SPI_TXCTL_TEN               0x00000001    /* Transmit Channel Enable */
-#define SPI_TXCTL_TTI               0x00000004    /* Transmit Transfer Initiate */
-#define SPI_TXCTL_TWCEN             0x00000008    /* Transmit Word Counter Enable */
-#define SPI_TXCTL_TDR               0x00000070    /* Transmit Data Request */
-#define SPI_TXCTL_TDR_DIS           0x00000000    /* TDR: Disabled */
-#define SPI_TXCTL_TDR_NF            0x00000010    /* TDR: TFIFO not full */
-#define SPI_TXCTL_TDR_25            0x00000020    /* TDR: TFIFO 25% empty */
-#define SPI_TXCTL_TDR_50            0x00000030    /* TDR: TFIFO 50% empty */
-#define SPI_TXCTL_TDR_75            0x00000040    /* TDR: TFIFO 75% empty */
-#define SPI_TXCTL_TDR_EMPTY         0x00000050    /* TDR: TFIFO empty */
-#define SPI_TXCTL_TDU               0x00000100    /* Transmit Data Under-Run */
-#define SPI_TXCTL_TRWM              0x00003000    /* FIFO Regular Water-Mark */
-#define SPI_TXCTL_RWM_FULL          0x00000000    /* TRWM: TFIFO full */
-#define SPI_TXCTL_RWM_25            0x00001000    /* TRWM: TFIFO 25% empty */
-#define SPI_TXCTL_RWM_50            0x00002000    /* TRWM: TFIFO 50% empty */
-#define SPI_TXCTL_RWM_75            0x00003000    /* TRWM: TFIFO 75% empty */
-#define SPI_TXCTL_TUWM              0x00070000    /* FIFO Urgent Water-Mark */
-#define SPI_TXCTL_UWM_DIS           0x00000000    /* TUWM: Disabled */
-#define SPI_TXCTL_UWM_25            0x00010000    /* TUWM: TFIFO 25% empty */
-#define SPI_TXCTL_UWM_50            0x00020000    /* TUWM: TFIFO 50% empty */
-#define SPI_TXCTL_UWM_75            0x00030000    /* TUWM: TFIFO 75% empty */
-#define SPI_TXCTL_UWM_EMPTY         0x00040000    /* TUWM: TFIFO empty */
-/* SPI_CLOCK */
-#define SPI_CLK_BAUD                0x0000FFFF    /* Baud Rate */
-/* SPI_DELAY */
-#define SPI_DLY_STOP                0x000000FF    /* Transfer delay time in multiples of SCK period */
-#define SPI_DLY_LEADX               0x00000100    /* Extended (1 SCK) LEAD Control */
-#define SPI_DLY_LAGX                0x00000200    /* Extended (1 SCK) LAG control */
-/* SPI_SSEL */
-#define SPI_SLVSEL_SSE1             0x00000002    /* SPISSEL1 Enable */
-#define SPI_SLVSEL_SSE2             0x00000004    /* SPISSEL2 Enable */
-#define SPI_SLVSEL_SSE3             0x00000008    /* SPISSEL3 Enable */
-#define SPI_SLVSEL_SSE4             0x00000010    /* SPISSEL4 Enable */
-#define SPI_SLVSEL_SSE5             0x00000020    /* SPISSEL5 Enable */
-#define SPI_SLVSEL_SSE6             0x00000040    /* SPISSEL6 Enable */
-#define SPI_SLVSEL_SSE7             0x00000080    /* SPISSEL7 Enable */
-#define SPI_SLVSEL_SSEL1            0x00000200    /* SPISSEL1 Value */
-#define SPI_SLVSEL_SSEL2            0x00000400    /* SPISSEL2 Value */
-#define SPI_SLVSEL_SSEL3            0x00000800    /* SPISSEL3 Value */
-#define SPI_SLVSEL_SSEL4            0x00001000    /* SPISSEL4 Value */
-#define SPI_SLVSEL_SSEL5            0x00002000    /* SPISSEL5 Value */
-#define SPI_SLVSEL_SSEL6            0x00004000    /* SPISSEL6 Value */
-#define SPI_SLVSEL_SSEL7            0x00008000    /* SPISSEL7 Value */
-/* SPI_RWC */
-#define SPI_RWC_VALUE               0x0000FFFF    /* Received Word-Count */
-/* SPI_RWCR */
-#define SPI_RWCR_VALUE              0x0000FFFF    /* Received Word-Count Reload */
-/* SPI_TWC */
-#define SPI_TWC_VALUE               0x0000FFFF    /* Transmitted Word-Count */
-/* SPI_TWCR */
-#define SPI_TWCR_VALUE              0x0000FFFF    /* Transmitted Word-Count Reload */
-/* SPI_IMASK */
-#define SPI_IMSK_RUWM               0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_TUWM               0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_ROM                0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_TUM                0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_TCM                0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_MFM                0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_RSM                0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_TSM                0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_RFM                0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_TFM                0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_IMASKCL */
-#define SPI_IMSK_CLR_RUW            0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_CLR_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_CLR_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_CLR_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_CLR_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_CLR_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_CLR_RSM            0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_CLR_TSM            0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_CLR_RFM            0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_CLR_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_IMASKST */
-#define SPI_IMSK_SET_RUWM           0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_SET_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_SET_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_SET_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_SET_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_SET_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_SET_RSM            0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_SET_TSM            0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_SET_RFM            0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_SET_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_STATUS */
-#define SPI_STAT_SPIF               0x00000001    /* SPI Finished */
-#define SPI_STAT_RUWM               0x00000002    /* Receive Urgent Water-Mark Breached */
-#define SPI_STAT_TUWM               0x00000004    /* Transmit Urgent Water-Mark Breached */
-#define SPI_STAT_ROE                0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_STAT_TUE                0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_STAT_TCE                0x00000040    /* Transmit Collision Error Indication */
-#define SPI_STAT_MODF               0x00000080    /* Mode Fault Error Indication */
-#define SPI_STAT_RS                 0x00000100    /* Receive Start Indication */
-#define SPI_STAT_TS                 0x00000200    /* Transmit Start Indication */
-#define SPI_STAT_RF                 0x00000400    /* Receive Finish Indication */
-#define SPI_STAT_TF                 0x00000800    /* Transmit Finish Indication */
-#define SPI_STAT_RFS                0x00007000    /* SPI_RFIFO status */
-#define SPI_STAT_RFIFO_EMPTY        0x00000000    /* RFS: RFIFO Empty */
-#define SPI_STAT_RFIFO_25           0x00001000    /* RFS: RFIFO 25% Full */
-#define SPI_STAT_RFIFO_50           0x00002000    /* RFS: RFIFO 50% Full */
-#define SPI_STAT_RFIFO_75           0x00003000    /* RFS: RFIFO 75% Full */
-#define SPI_STAT_RFIFO_FULL         0x00004000    /* RFS: RFIFO Full */
-#define SPI_STAT_TFS                0x00070000    /* SPI_TFIFO status */
-#define SPI_STAT_TFIFO_FULL         0x00000000    /* TFS: TFIFO full */
-#define SPI_STAT_TFIFO_25           0x00010000    /* TFS: TFIFO 25% empty */
-#define SPI_STAT_TFIFO_50           0x00020000    /* TFS: TFIFO 50% empty */
-#define SPI_STAT_TFIFO_75           0x00030000    /* TFS: TFIFO 75% empty */
-#define SPI_STAT_TFIFO_EMPTY        0x00040000    /* TFS: TFIFO empty */
-#define SPI_STAT_FCS                0x00100000    /* Flow-Control Stall Indication */
-#define SPI_STAT_RFE                0x00400000    /* SPI_RFIFO Empty */
-#define SPI_STAT_TFF                0x00800000    /* SPI_TFIFO Full */
-/* SPI_ILAT */
-#define SPI_ILAT_RUWMI              0x00000002    /* Receive Urgent Water Mark Interrupt */
-#define SPI_ILAT_TUWMI              0x00000004    /* Transmit Urgent Water Mark Interrupt */
-#define SPI_ILAT_ROI                0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_ILAT_TUI                0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_ILAT_TCI                0x00000040    /* Transmit Collision Error Indication */
-#define SPI_ILAT_MFI                0x00000080    /* Mode Fault Error Indication */
-#define SPI_ILAT_RSI                0x00000100    /* Receive Start Indication */
-#define SPI_ILAT_TSI                0x00000200    /* Transmit Start Indication */
-#define SPI_ILAT_RFI                0x00000400    /* Receive Finish Indication */
-#define SPI_ILAT_TFI                0x00000800    /* Transmit Finish Indication */
-/* SPI_ILATCL */
-#define SPI_ILAT_CLR_RUWMI          0x00000002    /* Receive Urgent Water Mark Interrupt */
-#define SPI_ILAT_CLR_TUWMI          0x00000004    /* Transmit Urgent Water Mark Interrupt */
-#define SPI_ILAT_CLR_ROI            0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_ILAT_CLR_TUI            0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_ILAT_CLR_TCI            0x00000040    /* Transmit Collision Error Indication */
-#define SPI_ILAT_CLR_MFI            0x00000080    /* Mode Fault Error Indication */
-#define SPI_ILAT_CLR_RSI            0x00000100    /* Receive Start Indication */
-#define SPI_ILAT_CLR_TSI            0x00000200    /* Transmit Start Indication */
-#define SPI_ILAT_CLR_RFI            0x00000400    /* Receive Finish Indication */
-#define SPI_ILAT_CLR_TFI            0x00000800    /* Transmit Finish Indication */
-
-/*
- * adi spi3 registers layout
- */
-struct adi_spi_regs {
-	u32 revid;
-	u32 control;
-	u32 rx_control;
-	u32 tx_control;
-	u32 clock;
-	u32 delay;
-	u32 ssel;
-	u32 rwc;
-	u32 rwcr;
-	u32 twc;
-	u32 twcr;
-	u32 reserved0;
-	u32 emask;
-	u32 emaskcl;
-	u32 emaskst;
-	u32 reserved1;
-	u32 status;
-	u32 elat;
-	u32 elatcl;
-	u32 reserved2;
-	u32 rfifo;
-	u32 reserved3;
-	u32 tfifo;
-};
-
-#define MAX_CTRL_CS          8  /* cs in spi controller */
-
-/* device.platform_data for SSP controller devices */
-struct adi_spi3_master {
-	u16 num_chipselect;
-	u16 pin_req[7];
-};
-
-/* spi_board_info.controller_data for SPI slave devices,
- * copied to spi_device.platform_data ... mostly for dma tuning
- */
-struct adi_spi3_chip {
-	u32 control;
-	u16 cs_chg_udelay; /* Some devices require 16-bit delays */
-	u32 tx_dummy_val; /* tx value for rx only transfer */
-	bool enable_dma;
-};
-
-#endif /* _ADI_SPI3_H_ */
-- 
cgit v1.2.3


From cf85ec88701610f0de2e959ac447d9613462fbc7 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 9 Jun 2018 16:18:16 +0300
Subject: removed extra extern file_fdatawait_range

Jeff added this extern twice in commit a823e4589e68

Fixes: a823e4589e68 ("mm: add file_fdatawait_range and file_write_and_wait")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..f140c11d35dd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2623,8 +2623,6 @@ static inline int filemap_fdatawait(struct address_space *mapping)
 
 extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
 				  loff_t lend);
-extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
-						loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
-- 
cgit v1.2.3


From 3c60e868c31e4ff144776bf53ff0dfe9e9e4ec15 Mon Sep 17 00:00:00 2001
From: "willy@infradead.org" <willy@infradead.org>
Date: Wed, 13 Jun 2018 11:45:55 -0700
Subject: IDR: Expose the XArray lock

Allow users of the IDR to use the XArray lock for their own
synchronisation purposes.  The IDR continues to rely on the caller to
handle locking, but this lets the caller use the lock embedded in the
IDR data structure instead of allocating their own lock.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/idr.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..3e8215b2c371 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -98,6 +98,17 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  * period).
  */
 
+#define idr_lock(idr)		xa_lock(&(idr)->idr_rt)
+#define idr_unlock(idr)		xa_unlock(&(idr)->idr_rt)
+#define idr_lock_bh(idr)	xa_lock_bh(&(idr)->idr_rt)
+#define idr_unlock_bh(idr)	xa_unlock_bh(&(idr)->idr_rt)
+#define idr_lock_irq(idr)	xa_lock_irq(&(idr)->idr_rt)
+#define idr_unlock_irq(idr)	xa_unlock_irq(&(idr)->idr_rt)
+#define idr_lock_irqsave(idr, flags) \
+				xa_lock_irqsave(&(idr)->idr_rt, flags)
+#define idr_unlock_irqrestore(idr, flags) \
+				xa_unlock_irqrestore(&(idr)->idr_rt, flags)
+
 void idr_preload(gfp_t gfp_mask);
 
 int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t);
-- 
cgit v1.2.3


From aece27a2f01be4bb7683790f69cd1bed3a0929a2 Mon Sep 17 00:00:00 2001
From: Samuel Morris <samorris@lexmark.com>
Date: Tue, 29 May 2018 10:06:12 +0000
Subject: ata: ahci_platform: allow disabling of hotplug to save power

A number of resources remain powered to support hotplug. On platforms
I've worked with, allowing the ahci_platform to suspend saves about
150mW. This patch enables rpm and allows the device to be auto-suspended
through sysfs.

Signed-off-by: Samuel Morris <samorris@lexmark.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_platform.c    | 11 ++++--
 drivers/ata/libahci_platform.c | 82 +++++++++++++++++++++++++++++++++---------
 include/linux/ahci_platform.h  |  2 ++
 3 files changed, 76 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 99f9a895a459..757729376eda 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -68,8 +68,13 @@ disable_resources:
 	return rc;
 }
 
-static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_platform_suspend,
-			 ahci_platform_resume);
+#ifdef CONFIG_PM_SLEEP
+static const struct dev_pm_ops ahci_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(ahci_platform_suspend, ahci_platform_resume)
+	SET_RUNTIME_PM_OPS(ahci_platform_runtime_suspend,
+			ahci_platform_runtime_resume, NULL)
+};
+#endif
 
 static const struct of_device_id ahci_of_match[] = {
 	{ .compatible = "generic-ahci", },
@@ -98,7 +103,9 @@ static struct platform_driver ahci_driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
 		.acpi_match_table = ahci_acpi_match,
+#ifdef CONFIG_PM_SLEEP
 		.pm = &ahci_pm_ops,
+#endif
 	},
 };
 module_platform_driver(ahci_driver);
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 30cc8f1a31e1..feee2e11fb33 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -257,7 +257,7 @@ static void ahci_platform_put_resources(struct device *dev, void *res)
 	int c;
 
 	if (hpriv->got_runtime_pm) {
-		pm_runtime_put_sync(dev);
+		pm_runtime_allow(dev);
 		pm_runtime_disable(dev);
 	}
 
@@ -475,8 +475,10 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
 		if (rc == -EPROBE_DEFER)
 			goto err_out;
 	}
+
+	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
-	pm_runtime_get_sync(dev);
+	pm_runtime_forbid(dev);
 	hpriv->got_runtime_pm = true;
 
 	devres_remove_group(dev, NULL);
@@ -705,6 +707,21 @@ int ahci_platform_resume_host(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
 
+static int _ahci_platform_suspend(struct device *dev)
+{
+	struct ata_host *host = dev_get_drvdata(dev);
+	struct ahci_host_priv *hpriv = host->private_data;
+	int rc;
+
+	rc = ahci_platform_suspend_host(dev);
+	if (rc)
+		return rc;
+
+	ahci_platform_disable_resources(hpriv);
+
+	return 0;
+}
+
 /**
  * ahci_platform_suspend - Suspend an ahci-platform device
  * @dev: the platform device to suspend
@@ -716,20 +733,45 @@ EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
  * 0 on success otherwise a negative error code
  */
 int ahci_platform_suspend(struct device *dev)
+{
+	return _ahci_platform_suspend(dev);
+}
+EXPORT_SYMBOL_GPL(ahci_platform_suspend);
+
+/**
+ * ahci_platform_runtime_suspend - Runtime suspend an ahci-platform device
+ * @dev: the platform device to suspend
+ *
+ * This function suspends the host associated with the device, followed by
+ * disabling all the resources of the device.
+ *
+ * RETURNS:
+ * 0 on success otherwise a negative error code
+ */
+int ahci_platform_runtime_suspend(struct device *dev)
+{
+	return _ahci_platform_suspend(dev);
+}
+EXPORT_SYMBOL_GPL(ahci_platform_runtime_suspend);
+
+static int _ahci_platform_resume(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = ahci_platform_suspend_host(dev);
+	rc = ahci_platform_enable_resources(hpriv);
 	if (rc)
 		return rc;
 
-	ahci_platform_disable_resources(hpriv);
+	rc = ahci_platform_resume_host(dev);
+	if (rc) {
+		ahci_platform_disable_resources(hpriv);
+		return rc;
+	}
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ahci_platform_suspend);
 
 /**
  * ahci_platform_resume - Resume an ahci-platform device
@@ -743,31 +785,37 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend);
  */
 int ahci_platform_resume(struct device *dev)
 {
-	struct ata_host *host = dev_get_drvdata(dev);
-	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = ahci_platform_enable_resources(hpriv);
+	rc = _ahci_platform_resume(dev);
 	if (rc)
 		return rc;
 
-	rc = ahci_platform_resume_host(dev);
-	if (rc)
-		goto disable_resources;
-
 	/* We resumed so update PM runtime state */
 	pm_runtime_disable(dev);
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
 	return 0;
-
-disable_resources:
-	ahci_platform_disable_resources(hpriv);
-
-	return rc;
 }
 EXPORT_SYMBOL_GPL(ahci_platform_resume);
+
+/**
+ * ahci_platform_runtime_resume - Runtime resume an ahci-platform device
+ * @dev: the platform device to resume
+ *
+ * This function enables all the resources of the device followed by
+ * resuming the host associated with the device.
+ *
+ * RETURNS:
+ * 0 on success otherwise a negative error code
+ */
+int ahci_platform_runtime_resume(struct device *dev)
+{
+	return _ahci_platform_resume(dev);
+}
+EXPORT_SYMBOL_GPL(ahci_platform_runtime_resume);
+
 #endif
 
 MODULE_DESCRIPTION("AHCI SATA platform library");
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1b0a17b22cd3..6396e6982103 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -42,5 +42,7 @@ int ahci_platform_suspend_host(struct device *dev);
 int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
+int ahci_platform_runtime_suspend(struct device *dev);
+int ahci_platform_runtime_resume(struct device *dev);
 
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 420c0117db25db38b72b6230223f7a976d3070ea Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 17 Jun 2018 19:02:04 +0200
Subject: dmaengine: pxa: use a dma slave map

In order to remove the specific knowledge of the dma mapping from PXA
drivers, add a default slave map for pxa architectures.

This won't impact MMP architecture, but is aimed only at all PXA boards.

This is the first step, and once all drivers are converted,
pxad_filter_fn() will be made static, and the DMA resources removed from
device.c.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/pxa_dma.c                 | 10 +++++++++-
 include/linux/platform_data/mmp_dma.h |  4 ++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index b53fb618bbf6..9505334f9c6e 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -179,6 +179,8 @@ static unsigned int pxad_drcmr(unsigned int line)
 	return 0x1000 + line * 4;
 }
 
+bool pxad_filter_fn(struct dma_chan *chan, void *param);
+
 /*
  * Debug fs
  */
@@ -1396,9 +1398,10 @@ static int pxad_probe(struct platform_device *op)
 {
 	struct pxad_device *pdev;
 	const struct of_device_id *of_id;
+	const struct dma_slave_map *slave_map = NULL;
 	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
 	struct resource *iores;
-	int ret, dma_channels = 0, nb_requestors = 0;
+	int ret, dma_channels = 0, nb_requestors = 0, slave_map_cnt = 0;
 	const enum dma_slave_buswidth widths =
 		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
 		DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -1429,6 +1432,8 @@ static int pxad_probe(struct platform_device *op)
 	} else if (pdata && pdata->dma_channels) {
 		dma_channels = pdata->dma_channels;
 		nb_requestors = pdata->nb_requestors;
+		slave_map = pdata->slave_map;
+		slave_map_cnt = pdata->slave_map_cnt;
 	} else {
 		dma_channels = 32;	/* default 32 channel */
 	}
@@ -1440,6 +1445,9 @@ static int pxad_probe(struct platform_device *op)
 	pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy;
 	pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg;
 	pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic;
+	pdev->slave.filter.map = slave_map;
+	pdev->slave.filter.mapcnt = slave_map_cnt;
+	pdev->slave.filter.fn = pxad_filter_fn;
 
 	pdev->slave.copy_align = PDMA_ALIGNMENT;
 	pdev->slave.src_addr_widths = widths;
diff --git a/include/linux/platform_data/mmp_dma.h b/include/linux/platform_data/mmp_dma.h
index d1397c8ed94e..6397b9c8149a 100644
--- a/include/linux/platform_data/mmp_dma.h
+++ b/include/linux/platform_data/mmp_dma.h
@@ -12,9 +12,13 @@
 #ifndef MMP_DMA_H
 #define MMP_DMA_H
 
+struct dma_slave_map;
+
 struct mmp_dma_platdata {
 	int dma_channels;
 	int nb_requestors;
+	int slave_map_cnt;
+	const struct dma_slave_map *slave_map;
 };
 
 #endif /* MMP_DMA_H */
-- 
cgit v1.2.3


From b6d1a17f4729e4fda5740a855da91d202db2c118 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 17 Jun 2018 19:02:14 +0200
Subject: dmaengine: pxa: document pxad_param

Add some documentation for the pxad_param structure, and describe the
contract behind the minimal required priority of a DMA channel.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/pxa-dma.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h
index e56ec7af4fd7..9fc594f69eff 100644
--- a/include/linux/dma/pxa-dma.h
+++ b/include/linux/dma/pxa-dma.h
@@ -9,6 +9,15 @@ enum pxad_chan_prio {
 	PXAD_PRIO_LOWEST,
 };
 
+/**
+ * struct pxad_param - dma channel request parameters
+ * @drcmr: requestor line number
+ * @prio: minimal mandatory priority of the channel
+ *
+ * If a requested channel is granted, its priority will be at least @prio,
+ * ie. if PXAD_PRIO_LOW is required, the requested channel will be either
+ * PXAD_PRIO_LOW, PXAD_PRIO_NORMAL or PXAD_PRIO_HIGHEST.
+ */
 struct pxad_param {
 	unsigned int drcmr;
 	enum pxad_chan_prio prio;
-- 
cgit v1.2.3


From cd31b80736852d34bc1072f3e579a6fd73a244e7 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 17 Jun 2018 19:02:17 +0200
Subject: ARM: pxa: change SSP DMA channels allocation

Now the dma_slave_map is available for PXA architecture, switch the SSP
device to it.

This specifically means that :
- for platform data based machines, the DMA requestor channels are
  extracted from the slave map, where pxa-ssp-dai.<N> is a 1-1 match to
  ssp.<N>, and the channels are either "rx" or "tx".

- for device tree platforms, the dma node should be hooked into the
  pxa2xx-ac97 or pxa-ssp-dai node.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Daniel Mack <daniel@zonque.org>
---
 arch/arm/plat-pxa/ssp.c    | 47 ----------------------------------------------
 include/linux/pxa2xx_ssp.h |  2 --
 sound/soc/pxa/pxa-ssp.c    |  5 ++---
 3 files changed, 2 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index ba13f793fbce..ed36dcab80f1 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -127,53 +127,6 @@ static int pxa_ssp_probe(struct platform_device *pdev)
 	if (IS_ERR(ssp->clk))
 		return PTR_ERR(ssp->clk);
 
-	if (dev->of_node) {
-		struct of_phandle_args dma_spec;
-		struct device_node *np = dev->of_node;
-		int ret;
-
-		/*
-		 * FIXME: we should allocate the DMA channel from this
-		 * context and pass the channel down to the ssp users.
-		 * For now, we lookup the rx and tx indices manually
-		 */
-
-		/* rx */
-		ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells",
-						 0, &dma_spec);
-
-		if (ret) {
-			dev_err(dev, "Can't parse dmas property\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_rx = dma_spec.args[0];
-		of_node_put(dma_spec.np);
-
-		/* tx */
-		ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells",
-						 1, &dma_spec);
-		if (ret) {
-			dev_err(dev, "Can't parse dmas property\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_tx = dma_spec.args[0];
-		of_node_put(dma_spec.np);
-	} else {
-		res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-		if (res == NULL) {
-			dev_err(dev, "no SSP RX DRCMR defined\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_rx = res->start;
-
-		res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-		if (res == NULL) {
-			dev_err(dev, "no SSP TX DRCMR defined\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_tx = res->start;
-	}
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(dev, "no memory resource defined\n");
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 8461b18e4608..03a7ca46735b 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -212,8 +212,6 @@ struct ssp_device {
 	int		type;
 	int		use_count;
 	int		irq;
-	int		drcmr_rx;
-	int		drcmr_tx;
 
 	struct device_node	*of_node;
 };
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 6fc986080130..0b441338bdd4 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -105,9 +105,8 @@ static int pxa_ssp_startup(struct snd_pcm_substream *substream,
 	dma = kzalloc(sizeof(struct snd_dmaengine_dai_dma_data), GFP_KERNEL);
 	if (!dma)
 		return -ENOMEM;
-
-	dma->filter_data = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
-				&ssp->drcmr_tx : &ssp->drcmr_rx;
+	dma->chan_name = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		"tx" : "rx";
 
 	snd_soc_dai_set_dma_data(cpu_dai, substream, dma);
 
-- 
cgit v1.2.3


From b8042b3da925f390c1482bf9dc0898dc0b3ea7b5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 18 Jun 2018 22:39:29 +0200
Subject: ieee80211: bump IEEE80211_MAX_AMPDU_BUF to support HE

Bump the IEEE80211_MAX_AMPDU_BUF size to 0x100 for HE support
and - for now - use IEEE80211_MAX_AMPDU_BUF_HT everywhere.

This is derived from my internal patch, parts of which Luca
had sent upstream.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/realtek/rtlwifi/base.c |  2 +-
 drivers/staging/rtl8188eu/include/wifi.h    |  1 -
 drivers/staging/rtl8712/wifi.h              |  1 -
 drivers/staging/rtl8723bs/include/wifi.h    |  1 -
 drivers/staging/rtlwifi/base.c              |  2 +-
 include/linux/ieee80211.h                   | 10 ++++++----
 net/mac80211/agg-rx.c                       |  4 ++--
 net/mac80211/agg-tx.c                       |  2 +-
 net/mac80211/ht.c                           |  2 +-
 net/mac80211/main.c                         |  4 ++--
 10 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 39c817eddd78..31bd6f714052 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1904,7 +1904,7 @@ void rtl_rx_ampdu_apply(struct rtl_priv *rtlpriv)
 		 reject_agg, ctrl_agg_size, agg_size);
 
 	rtlpriv->hw->max_rx_aggregation_subframes =
-		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF);
+		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF_HT);
 }
 EXPORT_SYMBOL(rtl_rx_ampdu_apply);
 
diff --git a/drivers/staging/rtl8188eu/include/wifi.h b/drivers/staging/rtl8188eu/include/wifi.h
index 084a246eec19..6790b7c8cfb1 100644
--- a/drivers/staging/rtl8188eu/include/wifi.h
+++ b/drivers/staging/rtl8188eu/include/wifi.h
@@ -575,7 +575,6 @@ enum ht_cap_ampdu_factor {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 #define OP_MODE_PURE                    0
diff --git a/drivers/staging/rtl8712/wifi.h b/drivers/staging/rtl8712/wifi.h
index 0ed2f44ab4e9..00a4302e9983 100644
--- a/drivers/staging/rtl8712/wifi.h
+++ b/drivers/staging/rtl8712/wifi.h
@@ -574,7 +574,6 @@ struct ieee80211_ht_addt_info {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 /* Spatial Multiplexing Power Save Modes */
diff --git a/drivers/staging/rtl8723bs/include/wifi.h b/drivers/staging/rtl8723bs/include/wifi.h
index 08bc79840b23..559bf2606fb7 100644
--- a/drivers/staging/rtl8723bs/include/wifi.h
+++ b/drivers/staging/rtl8723bs/include/wifi.h
@@ -799,7 +799,6 @@ enum HT_CAP_AMPDU_FACTOR {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 /* Spatial Multiplexing Power Save Modes */
diff --git a/drivers/staging/rtlwifi/base.c b/drivers/staging/rtlwifi/base.c
index e46e47d93d7d..094827c1879a 100644
--- a/drivers/staging/rtlwifi/base.c
+++ b/drivers/staging/rtlwifi/base.c
@@ -1838,7 +1838,7 @@ void rtl_rx_ampdu_apply(struct rtl_priv *rtlpriv)
 		 reject_agg, ctrl_agg_size, agg_size);
 
 	rtlpriv->hw->max_rx_aggregation_subframes =
-		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF);
+		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF_HT);
 }
 
 /*********************************************************
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e6a6503bfa33..9c03a7d5e400 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1433,11 +1433,13 @@ struct ieee80211_ht_operation {
 #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
 
 /*
- * A-PMDU buffer sizes
- * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
+ * A-MPDU buffer sizes
+ * According to HT size varies from 8 to 64 frames
+ * HE adds the ability to have up to 256 frames.
  */
-#define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
+#define IEEE80211_MIN_AMPDU_BUF		0x8
+#define IEEE80211_MAX_AMPDU_BUF_HT	0x40
+#define IEEE80211_MAX_AMPDU_BUF		0x100
 
 
 /* Spatial Multiplexing Power Save Modes (for capability) */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index e83c19d4c292..3ffd853b483f 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -274,7 +274,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	/* XXX: check own ht delayed BA capability?? */
 	if (((ba_policy != 1) &&
 	     (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
-	    (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
+	    (buf_size > IEEE80211_MAX_AMPDU_BUF_HT)) {
 		status = WLAN_STATUS_INVALID_QOS_PARAM;
 		ht_dbg_ratelimited(sta->sdata,
 				   "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
@@ -283,7 +283,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	}
 	/* determine default buffer size */
 	if (buf_size == 0)
-		buf_size = IEEE80211_MAX_AMPDU_BUF;
+		buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
 
 	/* make sure the size doesn't exceed the maximum supported by the hw */
 	if (buf_size > sta->sta.max_rx_aggregation_subframes)
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ac4295296514..86c6bc0432ba 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -514,7 +514,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, params.ssn,
-				     IEEE80211_MAX_AMPDU_BUF,
+				     IEEE80211_MAX_AMPDU_BUF_HT,
 				     tid_tx->timeout);
 }
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 26a7ba3b698f..f849ea814993 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -352,7 +352,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 		    test_and_clear_bit(tid,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
 			___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
-							 IEEE80211_MAX_AMPDU_BUF,
+							 IEEE80211_MAX_AMPDU_BUF_HT,
 							 false, true);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a6f8e3a646d4..070f77862014 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -597,8 +597,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
 	local->hw.max_report_rates = 0;
-	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
-	local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
+	local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
 	local->hw.offchannel_tx_hw_queue = IEEE80211_INVAL_HW_QUEUE;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
-- 
cgit v1.2.3


From 0e622e80191e75c99b6ecc265c140a37d81e7a63 Mon Sep 17 00:00:00 2001
From: Sricharan R <sricharan@codeaurora.org>
Date: Mon, 4 Jun 2018 13:30:35 -0700
Subject: remoteproc: qcom: mdt_loader: Make the firmware authentication
 optional

qcom_mdt_load function loads the mdt type firmware and
initialises the secure memory as well. Make the initialisation only
when requested by the caller, so that the function can be used
by self-authenticating remoteproc as well.

Acked-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/soc/qcom/mdt_loader.c       | 87 ++++++++++++++++++++++++++-----------
 include/linux/soc/qcom/mdt_loader.h |  4 ++
 2 files changed, 66 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c
index dc09d7ac905f..1c488024c698 100644
--- a/drivers/soc/qcom/mdt_loader.c
+++ b/drivers/soc/qcom/mdt_loader.c
@@ -74,23 +74,10 @@ ssize_t qcom_mdt_get_size(const struct firmware *fw)
 }
 EXPORT_SYMBOL_GPL(qcom_mdt_get_size);
 
-/**
- * qcom_mdt_load() - load the firmware which header is loaded as fw
- * @dev:	device handle to associate resources with
- * @fw:		firmware object for the mdt file
- * @firmware:	name of the firmware, for construction of segment file names
- * @pas_id:	PAS identifier
- * @mem_region:	allocated memory region to load firmware into
- * @mem_phys:	physical address of allocated memory region
- * @mem_size:	size of the allocated memory region
- * @reloc_base:	adjusted physical address after relocation
- *
- * Returns 0 on success, negative errno otherwise.
- */
-int qcom_mdt_load(struct device *dev, const struct firmware *fw,
-		  const char *firmware, int pas_id, void *mem_region,
-		  phys_addr_t mem_phys, size_t mem_size,
-		  phys_addr_t *reloc_base)
+static int __qcom_mdt_load(struct device *dev, const struct firmware *fw,
+			   const char *firmware, int pas_id, void *mem_region,
+			   phys_addr_t mem_phys, size_t mem_size,
+			   phys_addr_t *reloc_base, bool pas_init)
 {
 	const struct elf32_phdr *phdrs;
 	const struct elf32_phdr *phdr;
@@ -121,10 +108,12 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 	if (!fw_name)
 		return -ENOMEM;
 
-	ret = qcom_scm_pas_init_image(pas_id, fw->data, fw->size);
-	if (ret) {
-		dev_err(dev, "invalid firmware metadata\n");
-		goto out;
+	if (pas_init) {
+		ret = qcom_scm_pas_init_image(pas_id, fw->data, fw->size);
+		if (ret) {
+			dev_err(dev, "invalid firmware metadata\n");
+			goto out;
+		}
 	}
 
 	for (i = 0; i < ehdr->e_phnum; i++) {
@@ -144,10 +133,13 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 	}
 
 	if (relocate) {
-		ret = qcom_scm_pas_mem_setup(pas_id, mem_phys, max_addr - min_addr);
-		if (ret) {
-			dev_err(dev, "unable to setup relocation\n");
-			goto out;
+		if (pas_init) {
+			ret = qcom_scm_pas_mem_setup(pas_id, mem_phys,
+						     max_addr - min_addr);
+			if (ret) {
+				dev_err(dev, "unable to setup relocation\n");
+				goto out;
+			}
 		}
 
 		/*
@@ -202,7 +194,52 @@ out:
 
 	return ret;
 }
+
+/**
+ * qcom_mdt_load() - load the firmware which header is loaded as fw
+ * @dev:	device handle to associate resources with
+ * @fw:		firmware object for the mdt file
+ * @firmware:	name of the firmware, for construction of segment file names
+ * @pas_id:	PAS identifier
+ * @mem_region:	allocated memory region to load firmware into
+ * @mem_phys:	physical address of allocated memory region
+ * @mem_size:	size of the allocated memory region
+ * @reloc_base:	adjusted physical address after relocation
+ *
+ * Returns 0 on success, negative errno otherwise.
+ */
+int qcom_mdt_load(struct device *dev, const struct firmware *fw,
+		  const char *firmware, int pas_id, void *mem_region,
+		  phys_addr_t mem_phys, size_t mem_size,
+		  phys_addr_t *reloc_base)
+{
+	return __qcom_mdt_load(dev, fw, firmware, pas_id, mem_region, mem_phys,
+			       mem_size, reloc_base, true);
+}
 EXPORT_SYMBOL_GPL(qcom_mdt_load);
 
+/**
+ * qcom_mdt_load_no_init() - load the firmware which header is loaded as fw
+ * @dev:	device handle to associate resources with
+ * @fw:		firmware object for the mdt file
+ * @firmware:	name of the firmware, for construction of segment file names
+ * @pas_id:	PAS identifier
+ * @mem_region:	allocated memory region to load firmware into
+ * @mem_phys:	physical address of allocated memory region
+ * @mem_size:	size of the allocated memory region
+ * @reloc_base:	adjusted physical address after relocation
+ *
+ * Returns 0 on success, negative errno otherwise.
+ */
+int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw,
+			  const char *firmware, int pas_id,
+			  void *mem_region, phys_addr_t mem_phys,
+			  size_t mem_size, phys_addr_t *reloc_base)
+{
+	return __qcom_mdt_load(dev, fw, firmware, pas_id, mem_region, mem_phys,
+			       mem_size, reloc_base, false);
+}
+EXPORT_SYMBOL_GPL(qcom_mdt_load_no_init);
+
 MODULE_DESCRIPTION("Firmware parser for Qualcomm MDT format");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h
index 5b98bbdabc25..944b06aefb0f 100644
--- a/include/linux/soc/qcom/mdt_loader.h
+++ b/include/linux/soc/qcom/mdt_loader.h
@@ -17,4 +17,8 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 		  phys_addr_t mem_phys, size_t mem_size,
 		  phys_addr_t *reloc_base);
 
+int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw,
+			  const char *fw_name, int pas_id, void *mem_region,
+			  phys_addr_t mem_phys, size_t mem_size,
+			  phys_addr_t *reloc_base);
 #endif
-- 
cgit v1.2.3


From f7859590d97614815b35a755c8213dfb8f2766bd Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 5 Jun 2018 19:20:39 -0400
Subject: audit: eliminate audit_enabled magic number comparison

Remove comparison of audit_enabled to magic numbers outside of audit.

Related: https://github.com/linux-audit/audit-kernel/issues/86

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 drivers/tty/tty_audit.c      | 2 +-
 include/linux/audit.h        | 5 ++++-
 include/net/xfrm.h           | 2 +-
 kernel/audit.c               | 3 ---
 net/netfilter/xt_AUDIT.c     | 2 +-
 net/netlabel/netlabel_user.c | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index e30aa6bf9ff9..50f567b6a66e 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -92,7 +92,7 @@ static void tty_audit_buf_push(struct tty_audit_buf *buf)
 {
 	if (buf->valid == 0)
 		return;
-	if (audit_enabled == 0) {
+	if (audit_enabled == AUDIT_OFF) {
 		buf->valid = 0;
 		return;
 	}
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 69c78477590b..9334fbef7bae 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -117,6 +117,9 @@ struct filename;
 
 extern void audit_log_session_info(struct audit_buffer *ab);
 
+#define AUDIT_OFF	0
+#define AUDIT_ON	1
+#define AUDIT_LOCKED	2
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
 				/* Public API */
@@ -202,7 +205,7 @@ static inline int audit_log_task_context(struct audit_buffer *ab)
 static inline void audit_log_task_info(struct audit_buffer *ab,
 				       struct task_struct *tsk)
 { }
-#define audit_enabled 0
+#define audit_enabled AUDIT_OFF
 #endif /* CONFIG_AUDIT */
 
 #ifdef CONFIG_AUDIT_COMPAT_GENERIC
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 557122846e0e..f7f297727ed8 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -735,7 +735,7 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op)
 {
 	struct audit_buffer *audit_buf = NULL;
 
-	if (audit_enabled == 0)
+	if (audit_enabled == AUDIT_OFF)
 		return NULL;
 	audit_buf = audit_log_start(audit_context(), GFP_ATOMIC,
 				    AUDIT_MAC_IPSEC_EVENT);
diff --git a/kernel/audit.c b/kernel/audit.c
index 5c0a1d7b0c7b..0f3222e4edde 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -83,9 +83,6 @@
 #define AUDIT_INITIALIZED	1
 static int	audit_initialized;
 
-#define AUDIT_OFF	0
-#define AUDIT_ON	1
-#define AUDIT_LOCKED	2
 u32		audit_enabled = AUDIT_OFF;
 bool		audit_ever_enabled = !!AUDIT_OFF;
 
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index f368ee6741db..af883f1b64f9 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -72,7 +72,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct audit_buffer *ab;
 	int fam = -1;
 
-	if (audit_enabled == 0)
+	if (audit_enabled == AUDIT_OFF)
 		goto errout;
 	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
 	if (ab == NULL)
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 2f328af91a52..4676f5bb16ae 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -101,7 +101,7 @@ struct audit_buffer *netlbl_audit_start_common(int type,
 	char *secctx;
 	u32 secctx_len;
 
-	if (audit_enabled == 0)
+	if (audit_enabled == AUDIT_OFF)
 		return NULL;
 
 	audit_buf = audit_log_start(audit_context(), GFP_ATOMIC, type);
-- 
cgit v1.2.3


From 6362f0a290023bafd7f991089e81dd9278f154b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Jun 2018 10:12:48 -0600
Subject: libata: add command iterator helpers

Now that we have the internal tag as a special (higher) value tag,
it gets a bit tricky to iterate the internal commands as some loops
will exceed ATA_MAX_QUEUE. Add explicit helpers for iterating pending
commands, both inflight and internal.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..a2257e380789 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1495,6 +1495,29 @@ static inline bool ata_tag_valid(unsigned int tag)
 	return tag < ATA_MAX_QUEUE || ata_tag_internal(tag);
 }
 
+#define __ata_qc_for_each(ap, qc, tag, max_tag, fn)		\
+	for ((tag) = 0; (tag) < (max_tag) &&			\
+	     ({ qc = fn((ap), (tag)); 1; }); (tag)++)		\
+
+/*
+ * Internal use only, iterate commands ignoring error handling and
+ * status of 'qc'.
+ */
+#define ata_qc_for_each_raw(ap, qc, tag)					\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, __ata_qc_from_tag)
+
+/*
+ * Iterate all potential commands that can be queued
+ */
+#define ata_qc_for_each(ap, qc, tag)					\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, ata_qc_from_tag)
+
+/*
+ * Like ata_qc_for_each, but with the internal tag included
+ */
+#define ata_qc_for_each_with_internal(ap, qc, tag)			\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE + 1, ata_qc_from_tag)
+
 /*
  * device helpers
  */
-- 
cgit v1.2.3


From 38b7ca927d6a12bf4466be22a90b7d998f5ae69a Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 8 Mar 2018 14:36:27 +0200
Subject: net/mlx5: Expose DEVX specification

This patch updates the mlx5_ifc structures and
command interface to support DEVX.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  4 ++
 include/linux/mlx5/device.h                   |  3 ++
 include/linux/mlx5/mlx5_ifc.h                 | 67 ++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index b99d6df3905b..d07f24de8fa3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -310,6 +310,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
 	case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_DESTROY_QP:
+	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -427,6 +428,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_FPGA_MODIFY_QP:
 	case MLX5_CMD_OP_FPGA_QUERY_QP:
 	case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -599,6 +601,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
 	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
 	MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
+	MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 02f72ebf31a7..f8671c0a43aa 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1071,6 +1071,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_GEN(mdev, cap) \
 	MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
 
+#define MLX5_CAP_GEN_64(mdev, cap) \
+	MLX5_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
 #define MLX5_CAP_GEN_MAX(mdev, cap) \
 	MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 27134c4fcb76..f810772e80c0 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -75,6 +75,15 @@ enum {
 	MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
 };
 
+enum {
+	MLX5_GENERAL_OBJ_TYPES_CAP_UCTX = (1ULL << 4),
+	MLX5_GENERAL_OBJ_TYPES_CAP_UMEM = (1ULL << 5),
+};
+
+enum {
+	MLX5_OBJ_TYPE_UCTX = 0x0004,
+};
+
 enum {
 	MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
 	MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
@@ -242,6 +251,8 @@ enum {
 	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
 	MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
 	MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
+	MLX5_CMD_OP_CREATE_GENERAL_OBJECT         = 0xa00,
+	MLX5_CMD_OP_DESTROY_GENERAL_OBJECT        = 0xa03,
 	MLX5_CMD_OP_MAX
 };
 
@@ -1113,7 +1124,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_3f8[0x3];
 	u8         log_max_current_uc_list[0x5];
 
-	u8         reserved_at_400[0x80];
+	u8         general_obj_types[0x40];
+
+	u8         reserved_at_440[0x40];
 
 	u8         reserved_at_480[0x3];
 	u8         log_max_l2_table[0x5];
@@ -9115,4 +9128,56 @@ struct mlx5_ifc_dealloc_memic_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         obj_type[0x10];
+
+	u8         obj_id[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         obj_id[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_umem_bits {
+	u8         modify_field_select[0x40];
+
+	u8         reserved_at_40[0x5b];
+	u8         log_page_size[0x5];
+
+	u8         page_offset[0x20];
+
+	u8         num_of_mtt[0x40];
+
+	struct mlx5_ifc_mtt_bits  mtt[0];
+};
+
+struct mlx5_ifc_uctx_bits {
+	u8         modify_field_select[0x40];
+
+	u8         reserved_at_40[0x1c0];
+};
+
+struct mlx5_ifc_create_umem_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
+	struct mlx5_ifc_umem_bits                     umem;
+};
+
+struct mlx5_ifc_create_uctx_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
+	struct mlx5_ifc_uctx_bits                     uctx;
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From e662e14d801b01a976e58bc3f8d9fe49b9fcec3a Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:02 +0300
Subject: IB/mlx5: Add DEVX support for modify and query commands

Add support in DEVX for modify and query commands, the required lock is
taken (i.e. READ/WRITE) by the KABI infrastructure accordingly.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c             | 350 +++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   5 +
 include/linux/mlx5/mlx5_ifc.h                 |   3 +
 include/uapi/rdma/mlx5_user_ioctl_cmds.h      |  14 ++
 4 files changed, 370 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 87116a3b7916..bb5e40a9edd8 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -73,6 +73,161 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
+static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+	u32 obj_id;
+
+	switch (opcode) {
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		break;
+	case MLX5_CMD_OP_QUERY_MKEY:
+		obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
+		break;
+	case MLX5_CMD_OP_QUERY_CQ:
+		obj_id = MLX5_GET(query_cq_in, in, cqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_CQ:
+		obj_id = MLX5_GET(modify_cq_in, in, cqn);
+		break;
+	case MLX5_CMD_OP_QUERY_SQ:
+		obj_id = MLX5_GET(query_sq_in, in, sqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_SQ:
+		obj_id = MLX5_GET(modify_sq_in, in, sqn);
+		break;
+	case MLX5_CMD_OP_QUERY_RQ:
+		obj_id = MLX5_GET(query_rq_in, in, rqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RQ:
+		obj_id = MLX5_GET(modify_rq_in, in, rqn);
+		break;
+	case MLX5_CMD_OP_QUERY_RMP:
+		obj_id = MLX5_GET(query_rmp_in, in, rmpn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RMP:
+		obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
+		break;
+	case MLX5_CMD_OP_QUERY_RQT:
+		obj_id = MLX5_GET(query_rqt_in, in, rqtn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RQT:
+		obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
+		break;
+	case MLX5_CMD_OP_QUERY_TIR:
+		obj_id = MLX5_GET(query_tir_in, in, tirn);
+		break;
+	case MLX5_CMD_OP_MODIFY_TIR:
+		obj_id = MLX5_GET(modify_tir_in, in, tirn);
+		break;
+	case MLX5_CMD_OP_QUERY_TIS:
+		obj_id = MLX5_GET(query_tis_in, in, tisn);
+		break;
+	case MLX5_CMD_OP_MODIFY_TIS:
+		obj_id = MLX5_GET(modify_tis_in, in, tisn);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+		obj_id = MLX5_GET(query_flow_table_in, in, table_id);
+		break;
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+		obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+		obj_id = MLX5_GET(query_flow_group_in, in, group_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+		obj_id = MLX5_GET(query_fte_in, in, flow_index);
+		break;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+		obj_id = MLX5_GET(set_fte_in, in, flow_index);
+		break;
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+		obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+		obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
+		break;
+	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		break;
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+		obj_id = MLX5_GET(query_scheduling_element_in, in,
+				  scheduling_element_id);
+		break;
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+		obj_id = MLX5_GET(modify_scheduling_element_in, in,
+				  scheduling_element_id);
+		break;
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+		obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+		break;
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+		obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
+		break;
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+		obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+		break;
+	case MLX5_CMD_OP_QUERY_QP:
+		obj_id = MLX5_GET(query_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RST2INIT_QP:
+		obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_INIT2RTR_QP:
+		obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RTR2RTS_QP:
+		obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RTS2RTS_QP:
+		obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+		obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_2ERR_QP:
+		obj_id = MLX5_GET(qp_2err_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_2RST_QP:
+		obj_id = MLX5_GET(qp_2rst_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_QUERY_DCT:
+		obj_id = MLX5_GET(query_dct_in, in, dctn);
+		break;
+	case MLX5_CMD_OP_QUERY_XRQ:
+		obj_id = MLX5_GET(query_xrq_in, in, xrqn);
+		break;
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+		obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
+		break;
+	case MLX5_CMD_OP_ARM_XRC_SRQ:
+		obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
+		break;
+	case MLX5_CMD_OP_QUERY_SRQ:
+		obj_id = MLX5_GET(query_srq_in, in, srqn);
+		break;
+	case MLX5_CMD_OP_ARM_RQ:
+		obj_id = MLX5_GET(arm_rq_in, in, srq_number);
+		break;
+	case MLX5_CMD_OP_DRAIN_DCT:
+	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+		obj_id = MLX5_GET(drain_dct_in, in, dctn);
+		break;
+	case MLX5_CMD_OP_ARM_XRQ:
+		obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
+		break;
+	default:
+		return false;
+	}
+
+	if (obj_id == obj->obj_id)
+		return true;
+
+	return false;
+}
+
 static bool devx_is_obj_create_cmd(const void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
@@ -118,7 +273,83 @@ static bool devx_is_obj_create_cmd(const void *in)
 	}
 }
 
-static bool devx_is_general_cmd(const void *in)
+static bool devx_is_obj_modify_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_MODIFY_CQ:
+	case MLX5_CMD_OP_MODIFY_RMP:
+	case MLX5_CMD_OP_MODIFY_SQ:
+	case MLX5_CMD_OP_MODIFY_RQ:
+	case MLX5_CMD_OP_MODIFY_RQT:
+	case MLX5_CMD_OP_MODIFY_TIR:
+	case MLX5_CMD_OP_MODIFY_TIS:
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_RST2INIT_QP:
+	case MLX5_CMD_OP_INIT2RTR_QP:
+	case MLX5_CMD_OP_RTR2RTS_QP:
+	case MLX5_CMD_OP_RTS2RTS_QP:
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+	case MLX5_CMD_OP_2ERR_QP:
+	case MLX5_CMD_OP_2RST_QP:
+	case MLX5_CMD_OP_ARM_XRC_SRQ:
+	case MLX5_CMD_OP_ARM_RQ:
+	case MLX5_CMD_OP_DRAIN_DCT:
+	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+	case MLX5_CMD_OP_ARM_XRQ:
+		return true;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	{
+		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+
+		if (op_mod == 1)
+			return true;
+		return false;
+	}
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_obj_query_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_MKEY:
+	case MLX5_CMD_OP_QUERY_CQ:
+	case MLX5_CMD_OP_QUERY_RMP:
+	case MLX5_CMD_OP_QUERY_SQ:
+	case MLX5_CMD_OP_QUERY_RQ:
+	case MLX5_CMD_OP_QUERY_RQT:
+	case MLX5_CMD_OP_QUERY_TIR:
+	case MLX5_CMD_OP_QUERY_TIS:
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_QP:
+	case MLX5_CMD_OP_QUERY_SRQ:
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+	case MLX5_CMD_OP_QUERY_DCT:
+	case MLX5_CMD_OP_QUERY_XRQ:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_general_cmd(void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
 
@@ -430,6 +661,89 @@ obj_free:
 	return err;
 }
 
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_dev,
+				   struct ib_uverbs_file *file,
+				   struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+							  MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
+	void *cmd_out;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_modify_cmd(cmd_in))
+		return -EINVAL;
+
+	if (!devx_is_valid_obj_id(uobj->object, cmd_in))
+		return -EINVAL;
+
+	cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+	if (!cmd_out)
+		return -ENOMEM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		goto other_cmd_free;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+			     cmd_out, cmd_out_len);
+
+other_cmd_free:
+	kvfree(cmd_out);
+	return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_dev,
+				   struct ib_uverbs_file *file,
+				   struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					      MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+							  MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
+	void *cmd_out;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_query_cmd(cmd_in))
+		return -EINVAL;
+
+	if (!devx_is_valid_obj_id(uobj->object, cmd_in))
+		return -EINVAL;
+
+	cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+	if (!cmd_out)
+		return -ENOMEM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		goto other_cmd_free;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len);
+
+other_cmd_free:
+	kvfree(cmd_out);
+	return err;
+}
+
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
 	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
 			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
@@ -463,13 +777,45 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_OBJ,
+			 UVERBS_ACCESS_WRITE,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_OBJ,
+			 UVERBS_ACCESS_READ,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
 static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER));
 
 static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 	&UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
-		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY));
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
 
 static DECLARE_UVERBS_OBJECT_TREE(devx_objects,
 	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d07f24de8fa3..9d03a202abb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -429,6 +429,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_FPGA_QUERY_QP:
 	case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
 	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -603,6 +605,9 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
 	MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
 	MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f810772e80c0..ac24ed87c67e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -246,12 +246,15 @@ enum {
 	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
 	MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
 	MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
 	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
 	MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
 	MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
 	MLX5_CMD_OP_CREATE_GENERAL_OBJECT         = 0xa00,
+	MLX5_CMD_OP_MODIFY_GENERAL_OBJECT         = 0xa01,
+	MLX5_CMD_OP_QUERY_GENERAL_OBJECT          = 0xa02,
 	MLX5_CMD_OP_DESTROY_GENERAL_OBJECT        = 0xa03,
 	MLX5_CMD_OP_MAX
 };
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 8d285f4555cd..97d216b8d053 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -64,9 +64,23 @@ enum mlx5_ib_devx_obj_destroy_attrs {
 	MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
 };
 
+enum mlx5_ib_devx_obj_modify_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_query_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+};
+
 enum mlx5_ib_devx_obj_methods {
 	MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 };
 
 enum mlx5_ib_devx_objects {
-- 
cgit v1.2.3


From aeae94579caf77406a8a235ea33fdb67abe9a57e Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:04 +0300
Subject: IB/mlx5: Add DEVX support for memory registration

Add support to register a memory with the firmware via the DEVX
interface.

The driver translates a given user address to ib_umem then it will
register the physical addresses with the firmware and get a unique id
for this registration to be used for this virtual address.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c        | 199 ++++++++++++++++++++++++++++++-
 include/linux/mlx5/mlx5_ifc.h            |   1 +
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  18 +++
 3 files changed, 217 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9b1804eb9924..162321f486eb 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -24,6 +24,22 @@ struct devx_obj {
 	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
 };
 
+struct devx_umem {
+	struct mlx5_core_dev		*mdev;
+	struct ib_umem			*umem;
+	u32				page_offset;
+	int				page_shift;
+	int				ncont;
+	u32				dinlen;
+	u32				dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+};
+
+struct devx_umem_reg_cmd {
+	void				*in;
+	u32				inlen;
+	u32				out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+};
+
 static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
 {
 	return to_mucontext(ib_uverbs_get_ucontext(file));
@@ -788,6 +804,181 @@ other_cmd_free:
 	return err;
 }
 
+static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
+			 struct uverbs_attr_bundle *attrs,
+			 struct devx_umem *obj)
+{
+	u64 addr;
+	size_t size;
+	int access;
+	int npages;
+	int err;
+	u32 page_mask;
+
+	if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
+	    uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN) ||
+	    uverbs_copy_from(&access, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS))
+		return -EFAULT;
+
+	err = ib_check_mr_access(access);
+	if (err)
+		return err;
+
+	obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
+	if (IS_ERR(obj->umem))
+		return PTR_ERR(obj->umem);
+
+	mlx5_ib_cont_pages(obj->umem, obj->umem->address,
+			   MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
+			   &obj->page_shift, &obj->ncont, NULL);
+
+	if (!npages) {
+		ib_umem_release(obj->umem);
+		return -EINVAL;
+	}
+
+	page_mask = (1 << obj->page_shift) - 1;
+	obj->page_offset = obj->umem->address & page_mask;
+
+	return 0;
+}
+
+static int devx_umem_reg_cmd_alloc(struct devx_umem *obj,
+				   struct devx_umem_reg_cmd *cmd)
+{
+	cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
+		    (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
+	cmd->in = kvzalloc(cmd->inlen, GFP_KERNEL);
+	return cmd->in ? 0 : -ENOMEM;
+}
+
+static void devx_umem_reg_cmd_free(struct devx_umem_reg_cmd *cmd)
+{
+	kvfree(cmd->in);
+}
+
+static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
+				    struct devx_umem *obj,
+				    struct devx_umem_reg_cmd *cmd)
+{
+	void *umem;
+	__be64 *mtt;
+
+	umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
+	mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM);
+	MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
+	MLX5_SET(umem, umem, log_page_size, obj->page_shift -
+					    MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET(umem, umem, page_offset, obj->page_offset);
+	mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+			     (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
+			     MLX5_IB_MTT_READ);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev,
+				 struct ib_uverbs_file *file,
+				 struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	struct devx_umem_reg_cmd cmd;
+	struct devx_umem *obj;
+	struct ib_uobject *uobj;
+	u32 obj_id;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+	obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
+	if (err)
+		goto err_obj_free;
+
+	err = devx_umem_reg_cmd_alloc(obj, &cmd);
+	if (err)
+		goto err_umem_release;
+
+	devx_umem_reg_cmd_build(dev, obj, &cmd);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
+			    sizeof(cmd.out));
+	if (err)
+		goto err_umem_reg_cmd_free;
+
+	obj->mdev = dev->mdev;
+	uobj->object = obj;
+	devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
+	if (err)
+		goto err_umem_destroy;
+
+	devx_umem_reg_cmd_free(&cmd);
+
+	return 0;
+
+err_umem_destroy:
+	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
+err_umem_reg_cmd_free:
+	devx_umem_reg_cmd_free(&cmd);
+err_umem_release:
+	ib_umem_release(obj->umem);
+err_obj_free:
+	kfree(obj);
+	return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_DEREG)(struct ib_device *ib_dev,
+				   struct ib_uverbs_file *file,
+				   struct uverbs_attr_bundle *attrs)
+{
+	return 0;
+}
+
+static int devx_umem_cleanup(struct ib_uobject *uobject,
+			     enum rdma_remove_reason why)
+{
+	struct devx_umem *obj = uobject->object;
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	int err;
+
+	err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+	if (err && why == RDMA_REMOVE_DESTROY)
+		return err;
+
+	ib_umem_release(obj->umem);
+	kfree(obj);
+	return 0;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_UMEM,
+			 UVERBS_ACCESS_NEW,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, UVERBS_ATTR_TYPE(u64),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_UMEM,
+			 UVERBS_ACCESS_DESTROY,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
 	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
@@ -868,6 +1059,12 @@ static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
 
+static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
+	&UVERBS_TYPE_ALLOC_IDR(0, devx_umem_cleanup),
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+
 static DECLARE_UVERBS_OBJECT_TREE(devx_objects,
 	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
-	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ));
+	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
+	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ac24ed87c67e..00b539303f5d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -82,6 +82,7 @@ enum {
 
 enum {
 	MLX5_OBJ_TYPE_UCTX = 0x0004,
+	MLX5_OBJ_TYPE_UMEM = 0x0005,
 };
 
 enum {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 1252695cd94b..dbc549a7bf50 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -89,9 +89,27 @@ enum mlx5_ib_devx_obj_methods {
 	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 };
 
+enum mlx5_ib_devx_umem_reg_attrs {
+	MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+};
+
+enum mlx5_ib_devx_umem_dereg_attrs {
+	MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_umem_methods {
+	MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+};
+
 enum mlx5_ib_devx_objects {
 	MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_OBJECT_DEVX_OBJ,
+	MLX5_IB_OBJECT_DEVX_UMEM,
 };
 
 #endif
-- 
cgit v1.2.3


From 9fba738a53dda20e748d6ee240b6c017c8146b4b Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 19 Jun 2018 16:41:41 +0200
Subject: clk: add duty cycle support

Add the possibility to apply and query the clock signal duty cycle ratio.

This is useful when the duty cycle of the clock signal depends on some
other parameters controlled by the clock framework.

For example, the duty cycle of a divider may depends on the raw divider
setting (ratio = N / div) , which is controlled by the CCF. In such case,
going through the pwm framework to control the duty cycle ratio of this
clock would be a burden.

A clock provider is not required to implement the operation to set and get
the duty cycle. If it does not implement .get_duty_cycle(), the ratio is
assumed to be 50%.

This change also adds a new flag, CLK_DUTY_CYCLE_PARENT. This flag should
be used to indicate that a clock, such as gates and muxes, may inherit
the duty cycle ratio of its parent clock. If a clock does not provide a
get_duty_cycle() callback and has CLK_DUTY_CYCLE_PARENT, then the call
will be directly forwarded to its parent clock, if any. For
set_duty_cycle(), the clock should also have CLK_SET_RATE_PARENT for the
call to be forwarded

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20180619144141.8506-1-jbrunet@baylibre.com
---
 drivers/clk/clk.c            | 199 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/clk-provider.h |  26 ++++++
 include/linux/clk.h          |  33 +++++++
 include/trace/events/clk.h   |  36 ++++++++
 4 files changed, 289 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 9760b526ca31..b0a2719d86f3 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -68,6 +68,7 @@ struct clk_core {
 	unsigned long		max_rate;
 	unsigned long		accuracy;
 	int			phase;
+	struct clk_duty		duty;
 	struct hlist_head	children;
 	struct hlist_node	child_node;
 	struct hlist_head	clks;
@@ -2402,6 +2403,172 @@ int clk_get_phase(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_get_phase);
 
+static void clk_core_reset_duty_cycle_nolock(struct clk_core *core)
+{
+	/* Assume a default value of 50% */
+	core->duty.num = 1;
+	core->duty.den = 2;
+}
+
+static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core);
+
+static int clk_core_update_duty_cycle_nolock(struct clk_core *core)
+{
+	struct clk_duty *duty = &core->duty;
+	int ret = 0;
+
+	if (!core->ops->get_duty_cycle)
+		return clk_core_update_duty_cycle_parent_nolock(core);
+
+	ret = core->ops->get_duty_cycle(core->hw, duty);
+	if (ret)
+		goto reset;
+
+	/* Don't trust the clock provider too much */
+	if (duty->den == 0 || duty->num > duty->den) {
+		ret = -EINVAL;
+		goto reset;
+	}
+
+	return 0;
+
+reset:
+	clk_core_reset_duty_cycle_nolock(core);
+	return ret;
+}
+
+static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core)
+{
+	int ret = 0;
+
+	if (core->parent &&
+	    core->flags & CLK_DUTY_CYCLE_PARENT) {
+		ret = clk_core_update_duty_cycle_nolock(core->parent);
+		memcpy(&core->duty, &core->parent->duty, sizeof(core->duty));
+	} else {
+		clk_core_reset_duty_cycle_nolock(core);
+	}
+
+	return ret;
+}
+
+static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core,
+						 struct clk_duty *duty);
+
+static int clk_core_set_duty_cycle_nolock(struct clk_core *core,
+					  struct clk_duty *duty)
+{
+	int ret;
+
+	lockdep_assert_held(&prepare_lock);
+
+	if (clk_core_rate_is_protected(core))
+		return -EBUSY;
+
+	trace_clk_set_duty_cycle(core, duty);
+
+	if (!core->ops->set_duty_cycle)
+		return clk_core_set_duty_cycle_parent_nolock(core, duty);
+
+	ret = core->ops->set_duty_cycle(core->hw, duty);
+	if (!ret)
+		memcpy(&core->duty, duty, sizeof(*duty));
+
+	trace_clk_set_duty_cycle_complete(core, duty);
+
+	return ret;
+}
+
+static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core,
+						 struct clk_duty *duty)
+{
+	int ret = 0;
+
+	if (core->parent &&
+	    core->flags & (CLK_DUTY_CYCLE_PARENT | CLK_SET_RATE_PARENT)) {
+		ret = clk_core_set_duty_cycle_nolock(core->parent, duty);
+		memcpy(&core->duty, &core->parent->duty, sizeof(core->duty));
+	}
+
+	return ret;
+}
+
+/**
+ * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @num: numerator of the duty cycle ratio to be applied
+ * @den: denominator of the duty cycle ratio to be applied
+ *
+ * Apply the duty cycle ratio if the ratio is valid and the clock can
+ * perform this operation
+ *
+ * Returns (0) on success, a negative errno otherwise.
+ */
+int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den)
+{
+	int ret;
+	struct clk_duty duty;
+
+	if (!clk)
+		return 0;
+
+	/* sanity check the ratio */
+	if (den == 0 || num > den)
+		return -EINVAL;
+
+	duty.num = num;
+	duty.den = den;
+
+	clk_prepare_lock();
+
+	if (clk->exclusive_count)
+		clk_core_rate_unprotect(clk->core);
+
+	ret = clk_core_set_duty_cycle_nolock(clk->core, &duty);
+
+	if (clk->exclusive_count)
+		clk_core_rate_protect(clk->core);
+
+	clk_prepare_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clk_set_duty_cycle);
+
+static int clk_core_get_scaled_duty_cycle(struct clk_core *core,
+					  unsigned int scale)
+{
+	struct clk_duty *duty = &core->duty;
+	int ret;
+
+	clk_prepare_lock();
+
+	ret = clk_core_update_duty_cycle_nolock(core);
+	if (!ret)
+		ret = mult_frac(scale, duty->num, duty->den);
+
+	clk_prepare_unlock();
+
+	return ret;
+}
+
+/**
+ * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @scale: scaling factor to be applied to represent the ratio as an integer
+ *
+ * Returns the duty cycle ratio of a clock node multiplied by the provided
+ * scaling factor, or negative errno on error.
+ */
+int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale)
+{
+	if (!clk)
+		return 0;
+
+	return clk_core_get_scaled_duty_cycle(clk->core, scale);
+}
+EXPORT_SYMBOL_GPL(clk_get_scaled_duty_cycle);
+
 /**
  * clk_is_match - check if two clk's point to the same hardware clock
  * @p: clk compared against q
@@ -2455,12 +2622,13 @@ static void clk_summary_show_one(struct seq_file *s, struct clk_core *c,
 	if (!c)
 		return;
 
-	seq_printf(s, "%*s%-*s %7d %8d %8d %11lu %10lu %-3d\n",
+	seq_printf(s, "%*s%-*s %7d %8d %8d %11lu %10lu %5d %6d\n",
 		   level * 3 + 1, "",
 		   30 - level * 3, c->name,
 		   c->enable_count, c->prepare_count, c->protect_count,
 		   clk_core_get_rate(c), clk_core_get_accuracy(c),
-		   clk_core_get_phase(c));
+		   clk_core_get_phase(c),
+		   clk_core_get_scaled_duty_cycle(c, 100000));
 }
 
 static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c,
@@ -2482,9 +2650,9 @@ static int clk_summary_show(struct seq_file *s, void *data)
 	struct clk_core *c;
 	struct hlist_head **lists = (struct hlist_head **)s->private;
 
-	seq_puts(s, "                                 enable  prepare  protect                               \n");
-	seq_puts(s, "   clock                          count    count    count        rate   accuracy   phase\n");
-	seq_puts(s, "----------------------------------------------------------------------------------------\n");
+	seq_puts(s, "                                 enable  prepare  protect                                duty\n");
+	seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle\n");
+	seq_puts(s, "---------------------------------------------------------------------------------------------\n");
 
 	clk_prepare_lock();
 
@@ -2511,6 +2679,8 @@ static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level)
 	seq_printf(s, "\"rate\": %lu,", clk_core_get_rate(c));
 	seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy(c));
 	seq_printf(s, "\"phase\": %d", clk_core_get_phase(c));
+	seq_printf(s, "\"duty_cycle\": %u",
+		   clk_core_get_scaled_duty_cycle(c, 100000));
 }
 
 static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level)
@@ -2572,6 +2742,7 @@ static const struct {
 	ENTRY(CLK_SET_RATE_UNGATE),
 	ENTRY(CLK_IS_CRITICAL),
 	ENTRY(CLK_OPS_PARENT_ENABLE),
+	ENTRY(CLK_DUTY_CYCLE_PARENT),
 #undef ENTRY
 };
 
@@ -2610,6 +2781,17 @@ static int possible_parents_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(possible_parents);
 
+static int clk_duty_cycle_show(struct seq_file *s, void *data)
+{
+	struct clk_core *core = s->private;
+	struct clk_duty *duty = &core->duty;
+
+	seq_printf(s, "%u/%u\n", duty->num, duty->den);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(clk_duty_cycle);
+
 static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 {
 	struct dentry *root;
@@ -2628,6 +2810,8 @@ static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 	debugfs_create_u32("clk_enable_count", 0444, root, &core->enable_count);
 	debugfs_create_u32("clk_protect_count", 0444, root, &core->protect_count);
 	debugfs_create_u32("clk_notifier_count", 0444, root, &core->notifier_count);
+	debugfs_create_file("clk_duty_cycle", 0444, root, core,
+			    &clk_duty_cycle_fops);
 
 	if (core->num_parents > 1)
 		debugfs_create_file("clk_possible_parents", 0444, root, core,
@@ -2845,6 +3029,11 @@ static int __clk_core_init(struct clk_core *core)
 	else
 		core->phase = 0;
 
+	/*
+	 * Set clk's duty cycle.
+	 */
+	clk_core_update_duty_cycle_nolock(core);
+
 	/*
 	 * Set clk's rate.  The preferred method is to use .recalc_rate.  For
 	 * simple clocks and lazy developers the default fallback is to use the
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index b7cfa037e593..08b1aa70a38d 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -38,6 +38,8 @@
 #define CLK_IS_CRITICAL		BIT(11) /* do not gate, ever */
 /* parents need enable during gate/ungate, set rate and re-parent */
 #define CLK_OPS_PARENT_ENABLE	BIT(12)
+/* duty cycle call may be forwarded to the parent clock */
+#define CLK_DUTY_CYCLE_PARENT	BIT(13)
 
 struct clk;
 struct clk_hw;
@@ -66,6 +68,17 @@ struct clk_rate_request {
 	struct clk_hw *best_parent_hw;
 };
 
+/**
+ * struct clk_duty - Struture encoding the duty cycle ratio of a clock
+ *
+ * @num:	Numerator of the duty cycle ratio
+ * @den:	Denominator of the duty cycle ratio
+ */
+struct clk_duty {
+	unsigned int num;
+	unsigned int den;
+};
+
 /**
  * struct clk_ops -  Callback operations for hardware clocks; these are to
  * be provided by the clock implementation, and will be called by drivers
@@ -169,6 +182,15 @@ struct clk_rate_request {
  *		by the second argument. Valid values for degrees are
  *		0-359. Return 0 on success, otherwise -EERROR.
  *
+ * @get_duty_cycle: Queries the hardware to get the current duty cycle ratio
+ *              of a clock. Returned values denominator cannot be 0 and must be
+ *              superior or equal to the numerator.
+ *
+ * @set_duty_cycle: Apply the duty cycle ratio to this clock signal specified by
+ *              the numerator (2nd argurment) and denominator (3rd  argument).
+ *              Argument must be a valid ratio (denominator > 0
+ *              and >= numerator) Return 0 on success, otherwise -EERROR.
+ *
  * @init:	Perform platform-specific initialization magic.
  *		This is not not used by any of the basic clock types.
  *		Please consider other ways of solving initialization problems
@@ -218,6 +240,10 @@ struct clk_ops {
 					   unsigned long parent_accuracy);
 	int		(*get_phase)(struct clk_hw *hw);
 	int		(*set_phase)(struct clk_hw *hw, int degrees);
+	int		(*get_duty_cycle)(struct clk_hw *hw,
+					  struct clk_duty *duty);
+	int		(*set_duty_cycle)(struct clk_hw *hw,
+					  struct clk_duty *duty);
 	void		(*init)(struct clk_hw *hw);
 	void		(*debug_init)(struct clk_hw *hw, struct dentry *dentry);
 };
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 0dbd0885b2c2..4f750c481b82 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -141,6 +141,27 @@ int clk_set_phase(struct clk *clk, int degrees);
  */
 int clk_get_phase(struct clk *clk);
 
+/**
+ * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @num: numerator of the duty cycle ratio to be applied
+ * @den: denominator of the duty cycle ratio to be applied
+ *
+ * Adjust the duty cycle of a clock signal by the specified ratio. Returns 0 on
+ * success, -EERROR otherwise.
+ */
+int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den);
+
+/**
+ * clk_get_duty_cycle - return the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @scale: scaling factor to be applied to represent the ratio as an integer
+ *
+ * Returns the duty cycle ratio multiplied by the scale provided, otherwise
+ * returns -EERROR.
+ */
+int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
+
 /**
  * clk_is_match - check if two clk's point to the same hardware clock
  * @p: clk compared against q
@@ -183,6 +204,18 @@ static inline long clk_get_phase(struct clk *clk)
 	return -ENOTSUPP;
 }
 
+static inline int clk_set_duty_cycle(struct clk *clk, unsigned int num,
+				     unsigned int den)
+{
+	return -ENOTSUPP;
+}
+
+static inline unsigned int clk_get_scaled_duty_cycle(struct clk *clk,
+						     unsigned int scale)
+{
+	return 0;
+}
+
 static inline bool clk_is_match(const struct clk *p, const struct clk *q)
 {
 	return p == q;
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h
index 2cd449328aee..9004ffff7f32 100644
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -192,6 +192,42 @@ DEFINE_EVENT(clk_phase, clk_set_phase_complete,
 	TP_ARGS(core, phase)
 );
 
+DECLARE_EVENT_CLASS(clk_duty_cycle,
+
+	TP_PROTO(struct clk_core *core, struct clk_duty *duty),
+
+	TP_ARGS(core, duty),
+
+	TP_STRUCT__entry(
+		__string(        name,           core->name              )
+		__field( unsigned int,           num                     )
+		__field( unsigned int,           den                     )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, core->name);
+		__entry->num = duty->num;
+		__entry->den = duty->den;
+	),
+
+	TP_printk("%s %u/%u", __get_str(name), (unsigned int)__entry->num,
+		  (unsigned int)__entry->den)
+);
+
+DEFINE_EVENT(clk_duty_cycle, clk_set_duty_cycle,
+
+	TP_PROTO(struct clk_core *core, struct clk_duty *duty),
+
+	TP_ARGS(core, duty)
+);
+
+DEFINE_EVENT(clk_duty_cycle, clk_set_duty_cycle_complete,
+
+	TP_PROTO(struct clk_core *core, struct clk_duty *duty),
+
+	TP_ARGS(core, duty)
+);
+
 #endif /* _TRACE_CLK_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 19e0c58f6552638c86395f0717210326fdf14fd2 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:10:56 -0700
Subject: iomap: generic inline data handling

Add generic inline data handling by adding a pointer to the inline data
region to struct iomap.  When handling a buffered IOMAP_INLINE write,
iomap_write_begin will copy the current inline data from the inline data
region into the page cache, and iomap_write_end will copy the changes in
the page cache back to the inline data region.

This doesn't cover inline data reads and direct I/O yet because so far,
we have no users.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[hch: small cleanups to better fit in with other iomap work]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++------
 include/linux/iomap.h |  1 +
 2 files changed, 56 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index 9c454459a1e9..4aecd7c5dbd8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -103,6 +103,26 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static void
+iomap_read_inline_data(struct inode *inode, struct page *page,
+		struct iomap *iomap)
+{
+	size_t size = i_size_read(inode);
+	void *addr;
+
+	if (PageUptodate(page))
+		return;
+
+	BUG_ON(page->index);
+	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+	addr = kmap_atomic(page);
+	memcpy(addr, iomap->inline_data, size);
+	memset(addr + size, 0, PAGE_SIZE - size);
+	kunmap_atomic(addr);
+	SetPageUptodate(page);
+}
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -133,7 +153,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	if (!page)
 		return -ENOMEM;
 
-	status = __block_write_begin_int(page, pos, len, NULL, iomap);
+	if (iomap->type == IOMAP_INLINE)
+		iomap_read_inline_data(inode, page, iomap);
+	else
+		status = __block_write_begin_int(page, pos, len, NULL, iomap);
+
 	if (unlikely(status)) {
 		unlock_page(page);
 		put_page(page);
@@ -146,14 +170,37 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	return status;
 }
 
+static int
+iomap_write_end_inline(struct inode *inode, struct page *page,
+		struct iomap *iomap, loff_t pos, unsigned copied)
+{
+	void *addr;
+
+	WARN_ON_ONCE(!PageUptodate(page));
+	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+	addr = kmap_atomic(page);
+	memcpy(iomap->inline_data + pos, addr + pos, copied);
+	kunmap_atomic(addr);
+
+	mark_inode_dirty(inode);
+	__generic_write_end(inode, pos, copied, page);
+	return copied;
+}
+
 static int
 iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
-		unsigned copied, struct page *page)
+		unsigned copied, struct page *page, struct iomap *iomap)
 {
 	int ret;
 
-	ret = generic_write_end(NULL, inode->i_mapping, pos, len,
-			copied, page, NULL);
+	if (iomap->type == IOMAP_INLINE) {
+		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
+	} else {
+		ret = generic_write_end(NULL, inode->i_mapping, pos, len,
+				copied, page, NULL);
+	}
+
 	if (ret < len)
 		iomap_write_failed(inode, pos, len);
 	return ret;
@@ -208,7 +255,8 @@ again:
 
 		flush_dcache_page(page);
 
-		status = iomap_write_end(inode, pos, bytes, copied, page);
+		status = iomap_write_end(inode, pos, bytes, copied, page,
+				iomap);
 		if (unlikely(status < 0))
 			break;
 		copied = status;
@@ -302,7 +350,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		WARN_ON_ONCE(!PageUptodate(page));
 
-		status = iomap_write_end(inode, pos, bytes, bytes, page);
+		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 		if (unlikely(status <= 0)) {
 			if (WARN_ON_ONCE(status == 0))
 				return -EIO;
@@ -354,7 +402,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
 	zero_user(page, offset, bytes);
 	mark_page_accessed(page);
 
-	return iomap_write_end(inode, pos, bytes, bytes, page);
+	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 }
 
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index a044a824da85..10d6cff7f69a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -55,6 +55,7 @@ struct iomap {
 	u16			flags;	/* flags for mapping */
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
+	void			*inline_data;
 };
 
 /*
-- 
cgit v1.2.3


From 63899c6f8851c32214b19390254fa1ae90b582df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:56 -0700
Subject: iomap: add a page_done callback

This will be used by gfs2 to attach data to transactions for the journaled
data mode.  But the concept is generic enough that we might be able to
use it for other purposes like encryption/integrity post-processing in the
future.

Based on a patch from Andreas Gruenbacher.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 3 +++
 include/linux/iomap.h | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4aecd7c5dbd8..a1f71e64ea49 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -201,6 +201,9 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 				copied, page, NULL);
 	}
 
+	if (iomap->page_done)
+		iomap->page_done(inode, pos, copied, page, iomap);
+
 	if (ret < len)
 		iomap_write_failed(inode, pos, len);
 	return ret;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 10d6cff7f69a..45f43865b0f0 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -9,6 +9,7 @@ struct fiemap_extent_info;
 struct inode;
 struct iov_iter;
 struct kiocb;
+struct page;
 struct vm_area_struct;
 struct vm_fault;
 
@@ -56,6 +57,14 @@ struct iomap {
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
 	void			*inline_data;
+
+	/*
+	 * Called when finished processing a page in the mapping returned in
+	 * this iomap.  At least for now this is only supported in the buffered
+	 * write path.
+	 */
+	void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
+			struct page *page, struct iomap *iomap);
 };
 
 /*
-- 
cgit v1.2.3


From e184fde6f3f5353040dd4b5637f823fc87436e55 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:10:57 -0700
Subject: iomap: add private pointer to struct iomap

Add a private pointer to struct iomap to allow filesystems to pass data
from iomap_begin to iomap_end.  Will be used by gfs2 for passing on the
on-disk inode buffer head.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/linux/iomap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 45f43865b0f0..1f36523d448a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -57,6 +57,7 @@ struct iomap {
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
 	void			*inline_data;
+	void			*private; /* filesystem private */
 
 	/*
 	 * Called when finished processing a page in the mapping returned in
-- 
cgit v1.2.3


From 72b4daa241295440f98e80ae21294a67b27ca091 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:57 -0700
Subject: iomap: add an iomap-based readpage and readpages implementation

Simply use iomap_apply to iterate over the file and a submit a bio for
each non-uptodate but mapped region and zero everything else.  Note that
as-is this can not be used for file systems with a blocksize smaller than
the page size, but that support will be added later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 214 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/iomap.h |   3 +
 2 files changed, 216 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index a1f71e64ea49..4f10c6b1cf6d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016 Christoph Hellwig.
+ * Copyright (c) 2016-2018 Christoph Hellwig.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
+#include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
@@ -123,6 +124,217 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
 	SetPageUptodate(page);
 }
 
+static void
+iomap_read_end_io(struct bio *bio)
+{
+	int error = blk_status_to_errno(bio->bi_status);
+	struct bio_vec *bvec;
+	int i;
+
+	bio_for_each_segment_all(bvec, bio, i)
+		page_endio(bvec->bv_page, false, error);
+	bio_put(bio);
+}
+
+struct iomap_readpage_ctx {
+	struct page		*cur_page;
+	bool			cur_page_in_bio;
+	bool			is_readahead;
+	struct bio		*bio;
+	struct list_head	*pages;
+};
+
+static loff_t
+iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	struct page *page = ctx->cur_page;
+	unsigned poff = pos & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	bool is_contig = false;
+	sector_t sector;
+
+	/* we don't support blocksize < PAGE_SIZE quite yet. */
+	WARN_ON_ONCE(pos != page_offset(page));
+	WARN_ON_ONCE(plen != PAGE_SIZE);
+
+	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
+		zero_user(page, poff, plen);
+		SetPageUptodate(page);
+		goto done;
+	}
+
+	ctx->cur_page_in_bio = true;
+
+	/*
+	 * Try to merge into a previous segment if we can.
+	 */
+	sector = iomap_sector(iomap, pos);
+	if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
+		if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+			goto done;
+		is_contig = true;
+	}
+
+	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
+		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+		if (ctx->bio)
+			submit_bio(ctx->bio);
+
+		if (ctx->is_readahead) /* same as readahead_gfp_mask */
+			gfp |= __GFP_NORETRY | __GFP_NOWARN;
+		ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+		ctx->bio->bi_opf = REQ_OP_READ;
+		if (ctx->is_readahead)
+			ctx->bio->bi_opf |= REQ_RAHEAD;
+		ctx->bio->bi_iter.bi_sector = sector;
+		bio_set_dev(ctx->bio, iomap->bdev);
+		ctx->bio->bi_end_io = iomap_read_end_io;
+	}
+
+	__bio_add_page(ctx->bio, page, plen, poff);
+done:
+	return plen;
+}
+
+int
+iomap_readpage(struct page *page, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = { .cur_page = page };
+	struct inode *inode = page->mapping->host;
+	unsigned poff;
+	loff_t ret;
+
+	WARN_ON_ONCE(page_has_buffers(page));
+
+	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
+		ret = iomap_apply(inode, page_offset(page) + poff,
+				PAGE_SIZE - poff, 0, ops, &ctx,
+				iomap_readpage_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			SetPageError(page);
+			break;
+		}
+	}
+
+	if (ctx.bio) {
+		submit_bio(ctx.bio);
+		WARN_ON_ONCE(!ctx.cur_page_in_bio);
+	} else {
+		WARN_ON_ONCE(ctx.cur_page_in_bio);
+		unlock_page(page);
+	}
+
+	/*
+	 * Just like mpage_readpages and block_read_full_page we always
+	 * return 0 and just mark the page as PageError on errors.  This
+	 * should be cleaned up all through the stack eventually.
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_readpage);
+
+static struct page *
+iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
+		loff_t length, loff_t *done)
+{
+	while (!list_empty(pages)) {
+		struct page *page = lru_to_page(pages);
+
+		if (page_offset(page) >= (u64)pos + length)
+			break;
+
+		list_del(&page->lru);
+		if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
+				GFP_NOFS))
+			return page;
+
+		/*
+		 * If we already have a page in the page cache at index we are
+		 * done.  Upper layers don't care if it is uptodate after the
+		 * readpages call itself as every page gets checked again once
+		 * actually needed.
+		 */
+		*done += PAGE_SIZE;
+		put_page(page);
+	}
+
+	return NULL;
+}
+
+static loff_t
+iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	loff_t done, ret;
+
+	for (done = 0; done < length; done += ret) {
+		if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+			ctx->cur_page = NULL;
+		}
+		if (!ctx->cur_page) {
+			ctx->cur_page = iomap_next_page(inode, ctx->pages,
+					pos, length, &done);
+			if (!ctx->cur_page)
+				break;
+			ctx->cur_page_in_bio = false;
+		}
+		ret = iomap_readpage_actor(inode, pos + done, length - done,
+				ctx, iomap);
+	}
+
+	return done;
+}
+
+int
+iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = {
+		.pages		= pages,
+		.is_readahead	= true,
+	};
+	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
+	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
+	loff_t length = last - pos + PAGE_SIZE, ret = 0;
+
+	while (length > 0) {
+		ret = iomap_apply(mapping->host, pos, length, 0, ops,
+				&ctx, iomap_readpages_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			goto done;
+		}
+		pos += ret;
+		length -= ret;
+	}
+	ret = 0;
+done:
+	if (ctx.bio)
+		submit_bio(ctx.bio);
+	if (ctx.cur_page) {
+		if (!ctx.cur_page_in_bio)
+			unlock_page(ctx.cur_page);
+		put_page(ctx.cur_page);
+	}
+
+	/*
+	 * Check that we didn't lose a page due to the arcance calling
+	 * conventions..
+	 */
+	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_readpages);
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 1f36523d448a..30d314407f66 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -99,6 +99,9 @@ struct iomap_ops {
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
+int iomap_readpage(struct page *page, const struct iomap_ops *ops);
+int iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From 693ba15c9202fe0283404abe4066e1b986e284eb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 12 Jun 2018 12:05:45 -0700
Subject: scsi: Remove percpu_ida

With its one user gone, remove the library code.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/percpu_ida.h |  83 ----------
 lib/Makefile               |   2 +-
 lib/percpu_ida.c           | 370 ---------------------------------------------
 3 files changed, 1 insertion(+), 454 deletions(-)
 delete mode 100644 include/linux/percpu_ida.h
 delete mode 100644 lib/percpu_ida.c

(limited to 'include/linux')

diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
deleted file mode 100644
index 07d78e4653bc..000000000000
--- a/include/linux/percpu_ida.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PERCPU_IDA_H__
-#define __PERCPU_IDA_H__
-
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/spinlock_types.h>
-#include <linux/wait.h>
-#include <linux/cpumask.h>
-
-struct percpu_ida_cpu;
-
-struct percpu_ida {
-	/*
-	 * number of tags available to be allocated, as passed to
-	 * percpu_ida_init()
-	 */
-	unsigned			nr_tags;
-	unsigned			percpu_max_size;
-	unsigned			percpu_batch_size;
-
-	struct percpu_ida_cpu __percpu	*tag_cpu;
-
-	/*
-	 * Bitmap of cpus that (may) have tags on their percpu freelists:
-	 * steal_tags() uses this to decide when to steal tags, and which cpus
-	 * to try stealing from.
-	 *
-	 * It's ok for a freelist to be empty when its bit is set - steal_tags()
-	 * will just keep looking - but the bitmap _must_ be set whenever a
-	 * percpu freelist does have tags.
-	 */
-	cpumask_t			cpus_have_tags;
-
-	struct {
-		spinlock_t		lock;
-		/*
-		 * When we go to steal tags from another cpu (see steal_tags()),
-		 * we want to pick a cpu at random. Cycling through them every
-		 * time we steal is a bit easier and more or less equivalent:
-		 */
-		unsigned		cpu_last_stolen;
-
-		/* For sleeping on allocation failure */
-		wait_queue_head_t	wait;
-
-		/*
-		 * Global freelist - it's a stack where nr_free points to the
-		 * top
-		 */
-		unsigned		nr_free;
-		unsigned		*freelist;
-	} ____cacheline_aligned_in_smp;
-};
-
-/*
- * Number of tags we move between the percpu freelist and the global freelist at
- * a time
- */
-#define IDA_DEFAULT_PCPU_BATCH_MOVE	32U
-/* Max size of percpu freelist, */
-#define IDA_DEFAULT_PCPU_SIZE	((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
-
-int percpu_ida_alloc(struct percpu_ida *pool, int state);
-void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
-
-void percpu_ida_destroy(struct percpu_ida *pool);
-int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
-	unsigned long max_size, unsigned long batch_size);
-static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
-{
-	return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
-		IDA_DEFAULT_PCPU_BATCH_MOVE);
-}
-
-typedef int (*percpu_ida_cb)(unsigned, void *);
-int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
-	void *data);
-
-unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu);
-#endif /* __PERCPU_IDA_H__ */
diff --git a/lib/Makefile b/lib/Makefile
index 956b320292fe..055420101965 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -40,7 +40,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
+	 percpu-refcount.o rhashtable.o reciprocal_div.o \
 	 once.o refcount.o usercopy.o errseq.o bucket_locks.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
deleted file mode 100644
index 9bbd9c5d375a..000000000000
--- a/lib/percpu_ida.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Percpu IDA library
- *
- * Copyright (C) 2013 Datera, Inc. Kent Overstreet
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/percpu.h>
-#include <linux/sched/signal.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/percpu_ida.h>
-
-struct percpu_ida_cpu {
-	/*
-	 * Even though this is percpu, we need a lock for tag stealing by remote
-	 * CPUs:
-	 */
-	spinlock_t			lock;
-
-	/* nr_free/freelist form a stack of free IDs */
-	unsigned			nr_free;
-	unsigned			freelist[];
-};
-
-static inline void move_tags(unsigned *dst, unsigned *dst_nr,
-			     unsigned *src, unsigned *src_nr,
-			     unsigned nr)
-{
-	*src_nr -= nr;
-	memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr);
-	*dst_nr += nr;
-}
-
-/*
- * Try to steal tags from a remote cpu's percpu freelist.
- *
- * We first check how many percpu freelists have tags
- *
- * Then we iterate through the cpus until we find some tags - we don't attempt
- * to find the "best" cpu to steal from, to keep cacheline bouncing to a
- * minimum.
- */
-static inline void steal_tags(struct percpu_ida *pool,
-			      struct percpu_ida_cpu *tags)
-{
-	unsigned cpus_have_tags, cpu = pool->cpu_last_stolen;
-	struct percpu_ida_cpu *remote;
-
-	for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
-	     cpus_have_tags; cpus_have_tags--) {
-		cpu = cpumask_next(cpu, &pool->cpus_have_tags);
-
-		if (cpu >= nr_cpu_ids) {
-			cpu = cpumask_first(&pool->cpus_have_tags);
-			if (cpu >= nr_cpu_ids)
-				BUG();
-		}
-
-		pool->cpu_last_stolen = cpu;
-		remote = per_cpu_ptr(pool->tag_cpu, cpu);
-
-		cpumask_clear_cpu(cpu, &pool->cpus_have_tags);
-
-		if (remote == tags)
-			continue;
-
-		spin_lock(&remote->lock);
-
-		if (remote->nr_free) {
-			memcpy(tags->freelist,
-			       remote->freelist,
-			       sizeof(unsigned) * remote->nr_free);
-
-			tags->nr_free = remote->nr_free;
-			remote->nr_free = 0;
-		}
-
-		spin_unlock(&remote->lock);
-
-		if (tags->nr_free)
-			break;
-	}
-}
-
-/*
- * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto
- * our percpu freelist:
- */
-static inline void alloc_global_tags(struct percpu_ida *pool,
-				     struct percpu_ida_cpu *tags)
-{
-	move_tags(tags->freelist, &tags->nr_free,
-		  pool->freelist, &pool->nr_free,
-		  min(pool->nr_free, pool->percpu_batch_size));
-}
-
-/**
- * percpu_ida_alloc - allocate a tag
- * @pool: pool to allocate from
- * @state: task state for prepare_to_wait
- *
- * Returns a tag - an integer in the range [0..nr_tags) (passed to
- * tag_pool_init()), or otherwise -ENOSPC on allocation failure.
- *
- * Safe to be called from interrupt context (assuming it isn't passed
- * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
- *
- * @gfp indicates whether or not to wait until a free id is available (it's not
- * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
- * however long it takes until another thread frees an id (same semantics as a
- * mempool).
- *
- * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE.
- */
-int percpu_ida_alloc(struct percpu_ida *pool, int state)
-{
-	DEFINE_WAIT(wait);
-	struct percpu_ida_cpu *tags;
-	unsigned long flags;
-	int tag = -ENOSPC;
-
-	tags = raw_cpu_ptr(pool->tag_cpu);
-	spin_lock_irqsave(&tags->lock, flags);
-
-	/* Fastpath */
-	if (likely(tags->nr_free >= 0)) {
-		tag = tags->freelist[--tags->nr_free];
-		spin_unlock_irqrestore(&tags->lock, flags);
-		return tag;
-	}
-	spin_unlock_irqrestore(&tags->lock, flags);
-
-	while (1) {
-		spin_lock_irqsave(&pool->lock, flags);
-		tags = this_cpu_ptr(pool->tag_cpu);
-
-		/*
-		 * prepare_to_wait() must come before steal_tags(), in case
-		 * percpu_ida_free() on another cpu flips a bit in
-		 * cpus_have_tags
-		 *
-		 * global lock held and irqs disabled, don't need percpu lock
-		 */
-		if (state != TASK_RUNNING)
-			prepare_to_wait(&pool->wait, &wait, state);
-
-		if (!tags->nr_free)
-			alloc_global_tags(pool, tags);
-		if (!tags->nr_free)
-			steal_tags(pool, tags);
-
-		if (tags->nr_free) {
-			tag = tags->freelist[--tags->nr_free];
-			if (tags->nr_free)
-				cpumask_set_cpu(smp_processor_id(),
-						&pool->cpus_have_tags);
-		}
-
-		spin_unlock_irqrestore(&pool->lock, flags);
-
-		if (tag >= 0 || state == TASK_RUNNING)
-			break;
-
-		if (signal_pending_state(state, current)) {
-			tag = -ERESTARTSYS;
-			break;
-		}
-
-		schedule();
-	}
-	if (state != TASK_RUNNING)
-		finish_wait(&pool->wait, &wait);
-
-	return tag;
-}
-EXPORT_SYMBOL_GPL(percpu_ida_alloc);
-
-/**
- * percpu_ida_free - free a tag
- * @pool: pool @tag was allocated from
- * @tag: a tag previously allocated with percpu_ida_alloc()
- *
- * Safe to be called from interrupt context.
- */
-void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
-{
-	struct percpu_ida_cpu *tags;
-	unsigned long flags;
-	unsigned nr_free;
-
-	BUG_ON(tag >= pool->nr_tags);
-
-	tags = raw_cpu_ptr(pool->tag_cpu);
-
-	spin_lock_irqsave(&tags->lock, flags);
-	tags->freelist[tags->nr_free++] = tag;
-
-	nr_free = tags->nr_free;
-
-	if (nr_free == 1) {
-		cpumask_set_cpu(smp_processor_id(),
-				&pool->cpus_have_tags);
-		wake_up(&pool->wait);
-	}
-	spin_unlock_irqrestore(&tags->lock, flags);
-
-	if (nr_free == pool->percpu_max_size) {
-		spin_lock_irqsave(&pool->lock, flags);
-		spin_lock(&tags->lock);
-
-		if (tags->nr_free == pool->percpu_max_size) {
-			move_tags(pool->freelist, &pool->nr_free,
-				  tags->freelist, &tags->nr_free,
-				  pool->percpu_batch_size);
-
-			wake_up(&pool->wait);
-		}
-		spin_unlock(&tags->lock);
-		spin_unlock_irqrestore(&pool->lock, flags);
-	}
-}
-EXPORT_SYMBOL_GPL(percpu_ida_free);
-
-/**
- * percpu_ida_destroy - release a tag pool's resources
- * @pool: pool to free
- *
- * Frees the resources allocated by percpu_ida_init().
- */
-void percpu_ida_destroy(struct percpu_ida *pool)
-{
-	free_percpu(pool->tag_cpu);
-	free_pages((unsigned long) pool->freelist,
-		   get_order(pool->nr_tags * sizeof(unsigned)));
-}
-EXPORT_SYMBOL_GPL(percpu_ida_destroy);
-
-/**
- * percpu_ida_init - initialize a percpu tag pool
- * @pool: pool to initialize
- * @nr_tags: number of tags that will be available for allocation
- *
- * Initializes @pool so that it can be used to allocate tags - integers in the
- * range [0, nr_tags). Typically, they'll be used by driver code to refer to a
- * preallocated array of tag structures.
- *
- * Allocation is percpu, but sharding is limited by nr_tags - for best
- * performance, the workload should not span more cpus than nr_tags / 128.
- */
-int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
-	unsigned long max_size, unsigned long batch_size)
-{
-	unsigned i, cpu, order;
-
-	memset(pool, 0, sizeof(*pool));
-
-	init_waitqueue_head(&pool->wait);
-	spin_lock_init(&pool->lock);
-	pool->nr_tags = nr_tags;
-	pool->percpu_max_size = max_size;
-	pool->percpu_batch_size = batch_size;
-
-	/* Guard against overflow */
-	if (nr_tags > (unsigned) INT_MAX + 1) {
-		pr_err("percpu_ida_init(): nr_tags too large\n");
-		return -EINVAL;
-	}
-
-	order = get_order(nr_tags * sizeof(unsigned));
-	pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order);
-	if (!pool->freelist)
-		return -ENOMEM;
-
-	for (i = 0; i < nr_tags; i++)
-		pool->freelist[i] = i;
-
-	pool->nr_free = nr_tags;
-
-	pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
-				       pool->percpu_max_size * sizeof(unsigned),
-				       sizeof(unsigned));
-	if (!pool->tag_cpu)
-		goto err;
-
-	for_each_possible_cpu(cpu)
-		spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock);
-
-	return 0;
-err:
-	percpu_ida_destroy(pool);
-	return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(__percpu_ida_init);
-
-/**
- * percpu_ida_for_each_free - iterate free ids of a pool
- * @pool: pool to iterate
- * @fn: interate callback function
- * @data: parameter for @fn
- *
- * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
- * ids might be missed, some might be iterated duplicated, and some might
- * be iterated and not free soon.
- */
-int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
-	void *data)
-{
-	unsigned long flags;
-	struct percpu_ida_cpu *remote;
-	unsigned cpu, i, err = 0;
-
-	for_each_possible_cpu(cpu) {
-		remote = per_cpu_ptr(pool->tag_cpu, cpu);
-		spin_lock_irqsave(&remote->lock, flags);
-		for (i = 0; i < remote->nr_free; i++) {
-			err = fn(remote->freelist[i], data);
-			if (err)
-				break;
-		}
-		spin_unlock_irqrestore(&remote->lock, flags);
-		if (err)
-			goto out;
-	}
-
-	spin_lock_irqsave(&pool->lock, flags);
-	for (i = 0; i < pool->nr_free; i++) {
-		err = fn(pool->freelist[i], data);
-		if (err)
-			break;
-	}
-	spin_unlock_irqrestore(&pool->lock, flags);
-out:
-	return err;
-}
-EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
-
-/**
- * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
- * @pool: pool related
- * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
- *
- * Note: this just returns a snapshot of free tags number.
- */
-unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
-{
-	struct percpu_ida_cpu *remote;
-	if (cpu == nr_cpu_ids)
-		return pool->nr_free;
-	remote = per_cpu_ptr(pool->tag_cpu, cpu);
-	return remote->nr_free;
-}
-EXPORT_SYMBOL_GPL(percpu_ida_free_tags);
-- 
cgit v1.2.3


From 2fa4a32613c9182b00e46872755b0662374424a7 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 10 May 2018 11:05:16 +0800
Subject: scsi: libsas: dynamically allocate and free ata host

Commit 2623c7a5f2 ("libata: add refcounting to ata_host") v4.17+ introduced
refcounting to ata_host and will increase or decrease the refcount when
adding or deleting transport ATA port.

Now the ata host for libsas is embedded in domain_device, and the ->kref
member is not initialized. Afer we add ata transport class, ata_host_get()
will be called when adding transport ATA port and a warning will be
triggered as below:

refcount_t: increment on 0; use-after-free.
WARNING: CPU: 2 PID: 103 at
lib/refcount.c:153 refcount_inc+0x40/0x48 ......  Call trace:
 refcount_inc+0x40/0x48
 ata_host_get+0x10/0x18
 ata_tport_add+0x40/0x120
 ata_sas_tport_add+0xc/0x14
 sas_ata_init+0x7c/0xc8
 sas_discover_domain+0x380/0x53c
 process_one_work+0x12c/0x288
 worker_thread+0x58/0x3f0
 kthread+0xfc/0x128
 ret_from_fork+0x10/0x18

And also when removing transport ATA port ata_host_put() will be called and
another similar warning will be triggered. If the refcount decreased to
zero, the ata host will be freed. But this ata host is only part of
domain_device, it cannot be freed directly.

So we have to change this embedded static ata host to a dynamically
allocated ata host and initialize the ->kref member. To use ata_host_get()
and ata_host_put() in libsas, we need to move the declaration of these
functions to the public libata.h and export them.

Fixes: b6240a4df018 ("scsi: libsas: add transport class for ATA devices")
Signed-off-by: Jason Yan <yanaijie@huawei.com>
CC: John Garry <john.garry@huawei.com>
CC: Taras Kondratiuk <takondra@cisco.com>
CC: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c          |  3 +++
 drivers/ata/libata.h               |  2 --
 drivers/scsi/libsas/sas_ata.c      | 40 +++++++++++++++++++++++++-------------
 drivers/scsi/libsas/sas_discover.c |  2 ++
 include/linux/libata.h             |  2 ++
 include/scsi/libsas.h              |  2 +-
 6 files changed, 34 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 27d15ed7fa3d..89cb4872a09c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6421,6 +6421,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
 	host->n_tags = ATA_MAX_QUEUE;
 	host->dev = dev;
 	host->ops = ops;
+	kref_init(&host->kref);
 }
 
 void __ata_port_probe(struct ata_port *ap)
@@ -7388,3 +7389,5 @@ EXPORT_SYMBOL_GPL(ata_cable_80wire);
 EXPORT_SYMBOL_GPL(ata_cable_unknown);
 EXPORT_SYMBOL_GPL(ata_cable_ignore);
 EXPORT_SYMBOL_GPL(ata_cable_sata);
+EXPORT_SYMBOL_GPL(ata_host_get);
+EXPORT_SYMBOL_GPL(ata_host_put);
\ No newline at end of file
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 9e21c49cf6be..f953cb4bb1ba 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -100,8 +100,6 @@ extern int ata_port_probe(struct ata_port *ap);
 extern void __ata_port_probe(struct ata_port *ap);
 extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
 				      u8 page, void *buf, unsigned int sectors);
-extern void ata_host_get(struct ata_host *host);
-extern void ata_host_put(struct ata_host *host);
 
 #define to_ata_port(d) container_of(d, struct ata_port, tdev)
 
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 2ac7395112b4..64a958a99f6a 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -552,34 +552,46 @@ int sas_ata_init(struct domain_device *found_dev)
 {
 	struct sas_ha_struct *ha = found_dev->port->ha;
 	struct Scsi_Host *shost = ha->core.shost;
+	struct ata_host *ata_host;
 	struct ata_port *ap;
 	int rc;
 
-	ata_host_init(&found_dev->sata_dev.ata_host, ha->dev, &sas_sata_ops);
-	ap = ata_sas_port_alloc(&found_dev->sata_dev.ata_host,
-				&sata_port_info,
-				shost);
+	ata_host = kzalloc(sizeof(*ata_host), GFP_KERNEL);
+	if (!ata_host)	{
+		SAS_DPRINTK("ata host alloc failed.\n");
+		return -ENOMEM;
+	}
+
+	ata_host_init(ata_host, ha->dev, &sas_sata_ops);
+
+	ap = ata_sas_port_alloc(ata_host, &sata_port_info, shost);
 	if (!ap) {
 		SAS_DPRINTK("ata_sas_port_alloc failed.\n");
-		return -ENODEV;
+		rc = -ENODEV;
+		goto free_host;
 	}
 
 	ap->private_data = found_dev;
 	ap->cbl = ATA_CBL_SATA;
 	ap->scsi_host = shost;
 	rc = ata_sas_port_init(ap);
-	if (rc) {
-		ata_sas_port_destroy(ap);
-		return rc;
-	}
-	rc = ata_sas_tport_add(found_dev->sata_dev.ata_host.dev, ap);
-	if (rc) {
-		ata_sas_port_destroy(ap);
-		return rc;
-	}
+	if (rc)
+		goto destroy_port;
+
+	rc = ata_sas_tport_add(ata_host->dev, ap);
+	if (rc)
+		goto destroy_port;
+
+	found_dev->sata_dev.ata_host = ata_host;
 	found_dev->sata_dev.ap = ap;
 
 	return 0;
+
+destroy_port:
+	ata_sas_port_destroy(ap);
+free_host:
+	ata_host_put(ata_host);
+	return rc;
 }
 
 void sas_ata_task_abort(struct sas_task *task)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 1ffca28fe6a8..0148ae62a52a 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -316,6 +316,8 @@ void sas_free_device(struct kref *kref)
 	if (dev_is_sata(dev) && dev->sata_dev.ap) {
 		ata_sas_tport_delete(dev->sata_dev.ap);
 		ata_sas_port_destroy(dev->sata_dev.ap);
+		ata_host_put(dev->sata_dev.ata_host);
+		dev->sata_dev.ata_host = NULL;
 		dev->sata_dev.ap = NULL;
 	}
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..33e9718397e2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1110,6 +1110,8 @@ extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
 extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
 			const struct ata_port_info * const * ppi, int n_ports);
 extern int ata_slave_link_init(struct ata_port *ap);
+extern void ata_host_get(struct ata_host *host);
+extern void ata_host_put(struct ata_host *host);
 extern int ata_host_start(struct ata_host *host);
 extern int ata_host_register(struct ata_host *host,
 			     struct scsi_host_template *sht);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 225ab7783dfd..3de3b10da19a 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -161,7 +161,7 @@ struct sata_device {
 	u8     port_no;        /* port number, if this is a PM (Port) */
 
 	struct ata_port *ap;
-	struct ata_host ata_host;
+	struct ata_host *ata_host;
 	struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */
 	u8     fis[ATA_RESP_FIS_SIZE];
 };
-- 
cgit v1.2.3


From 6519750210d9d3330e85d12e0128652edde448d2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 Jun 2018 10:34:50 +0200
Subject: sched/swait: Remove __prepare_to_swait

There is no public user of this API, remove it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: bigeasy@linutronix.de
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180612083909.157076812@infradead.org
---
 include/linux/swait.h | 1 -
 kernel/sched/swait.c  | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swait.h b/include/linux/swait.h
index bf8cb0dee23c..d6a5e949e4ca 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -161,7 +161,6 @@ extern void swake_up(struct swait_queue_head *q);
 extern void swake_up_all(struct swait_queue_head *q);
 extern void swake_up_locked(struct swait_queue_head *q);
 
-extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
 extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
 extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
 
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index b6fb2c3b3ff7..e68bb1398b05 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -69,7 +69,7 @@ void swake_up_all(struct swait_queue_head *q)
 }
 EXPORT_SYMBOL(swake_up_all);
 
-void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
+static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
 {
 	wait->task = current;
 	if (list_empty(&wait->task_list))
-- 
cgit v1.2.3


From 0abf17bc7790dd0467ed0e38522242f23c5da1c4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 Jun 2018 10:34:51 +0200
Subject: sched/swait: Switch to full exclusive mode

Linus noted that swait basically implements exclusive mode -- because
swake_up() only wakes a single waiter. And because of that it should
take care to properly deal with the interruptible case.

In short, the problem is that swake_up() can race with a signal. In
this this case it is possible the swake_up() 'wakes' the waiter that
is already on the way out because it just got a signal and the wakeup
gets lost.

The normal wait code is very careful and avoids this situation, make
sure we do too.

Copy the exact exclusive semantics from wait.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: bigeasy@linutronix.de
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180612083909.209762413@infradead.org
---
 include/linux/swait.h | 11 ++++++-----
 kernel/sched/swait.c  | 22 +++++++++++++++++-----
 2 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swait.h b/include/linux/swait.h
index d6a5e949e4ca..dd032061112d 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -38,8 +38,8 @@
  *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
  *    sleeper state.
  *
- *  - the exclusive mode; because this requires preserving the list order
- *    and this is hard.
+ *  - the !exclusive mode; because that leads to O(n) wakeups, everything is
+ *    exclusive.
  *
  *  - custom wake callback functions; because you cannot give any guarantees
  *    about random code. This also allows swait to be used in RT, such that
@@ -167,9 +167,10 @@ extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queu
 extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 
-/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+/* as per ___wait_event() but for swait, therefore "exclusive == 1" */
 #define ___swait_event(wq, condition, state, ret, cmd)			\
 ({									\
+	__label__ __out;						\
 	struct swait_queue __wait;					\
 	long __ret = ret;						\
 									\
@@ -182,13 +183,13 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 									\
 		if (___wait_is_interruptible(state) && __int) {		\
 			__ret = __int;					\
-			break;						\
+			goto __out;					\
 		}							\
 									\
 		cmd;							\
 	}								\
 	finish_swait(&wq, &__wait);					\
-	__ret;								\
+__out:	__ret;								\
 })
 
 #define __swait_event(wq, condition)					\
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index e68bb1398b05..66890de93ee5 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -73,7 +73,7 @@ static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *w
 {
 	wait->task = current;
 	if (list_empty(&wait->task_list))
-		list_add(&wait->task_list, &q->task_list);
+		list_add_tail(&wait->task_list, &q->task_list);
 }
 
 void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
@@ -89,12 +89,24 @@ EXPORT_SYMBOL(prepare_to_swait);
 
 long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
-	if (signal_pending_state(state, current))
-		return -ERESTARTSYS;
+	unsigned long flags;
+	long ret = 0;
 
-	prepare_to_swait(q, wait, state);
+	raw_spin_lock_irqsave(&q->lock, flags);
+	if (unlikely(signal_pending_state(state, current))) {
+		/*
+		 * See prepare_to_wait_event(). TL;DR, subsequent swake_up()
+		 * must not see us.
+		 */
+		list_del_init(&wait->task_list);
+		ret = -ERESTARTSYS;
+	} else {
+		__prepare_to_swait(q, wait);
+		set_current_state(state);
+	}
+	raw_spin_unlock_irqrestore(&q->lock, flags);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(prepare_to_swait_event);
 
-- 
cgit v1.2.3


From b3dae109fa89d67334bf3349babab3ad9b6f233f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 Jun 2018 10:34:52 +0200
Subject: sched/swait: Rename to exclusive

Since swait basically implemented exclusive waits only, make sure
the API reflects that.

  $ git grep -l -e "\<swake_up\>"
		-e "\<swait_event[^ (]*"
		-e "\<prepare_to_swait\>" | while read file;
    do
	sed -i -e 's/\<swake_up\>/&_one/g'
	       -e 's/\<swait_event[^ (]*/&_exclusive/g'
	       -e 's/\<prepare_to_swait\>/&_exclusive/g' $file;
    done

With a few manual touch-ups.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: bigeasy@linutronix.de
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180612083909.261946548@infradead.org
---
 arch/mips/kvm/mips.c         |  4 ++--
 arch/powerpc/kvm/book3s_hv.c |  6 +++---
 arch/s390/kvm/interrupt.c    |  2 +-
 arch/x86/kernel/kvm.c        |  4 ++--
 arch/x86/kvm/lapic.c         |  2 +-
 include/linux/swait.h        | 24 ++++++++++++------------
 kernel/power/suspend.c       |  4 ++--
 kernel/rcu/srcutiny.c        |  4 ++--
 kernel/rcu/tree.c            |  8 ++++----
 kernel/rcu/tree_exp.h        |  4 ++--
 kernel/rcu/tree_plugin.h     | 12 ++++++------
 kernel/sched/swait.c         | 10 +++++-----
 virt/kvm/arm/arm.c           |  4 ++--
 virt/kvm/arm/psci.c          |  2 +-
 virt/kvm/async_pf.c          |  2 +-
 virt/kvm/kvm_main.c          |  4 ++--
 16 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 7cd76f93a438..f7ea8e21656b 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -515,7 +515,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 	dvcpu->arch.wait = 0;
 
 	if (swq_has_sleeper(&dvcpu->wq))
-		swake_up(&dvcpu->wq);
+		swake_up_one(&dvcpu->wq);
 
 	return 0;
 }
@@ -1204,7 +1204,7 @@ static void kvm_mips_comparecount_func(unsigned long data)
 
 	vcpu->arch.wait = 0;
 	if (swq_has_sleeper(&vcpu->wq))
-		swake_up(&vcpu->wq);
+		swake_up_one(&vcpu->wq);
 }
 
 /* low level hrtimer wake routine */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de686b340f4a..ee4a8854985e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -216,7 +216,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
 	if (swq_has_sleeper(wqp)) {
-		swake_up(wqp);
+		swake_up_one(wqp);
 		++vcpu->stat.halt_wakeup;
 	}
 
@@ -3188,7 +3188,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		}
 	}
 
-	prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
 
 	if (kvmppc_vcore_check_block(vc)) {
 		finish_swait(&vc->wq, &wait);
@@ -3311,7 +3311,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			swake_up(&vc->wq);
+			swake_up_one(&vc->wq);
 		}
 
 	}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index daa09f89ca2d..fcb55b02990e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1145,7 +1145,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 		 * yield-candidate.
 		 */
 		vcpu->preempted = true;
-		swake_up(&vcpu->wq);
+		swake_up_one(&vcpu->wq);
 		vcpu->stat.halt_wakeup++;
 	}
 	/*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5b2300b818af..a37bda38d205 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -154,7 +154,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
 
 	for (;;) {
 		if (!n.halted)
-			prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+			prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
 		if (hlist_unhashed(&n.link))
 			break;
 
@@ -188,7 +188,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
 	if (n->halted)
 		smp_send_reschedule(n->cpu);
 	else if (swq_has_sleeper(&n->wq))
-		swake_up(&n->wq);
+		swake_up_one(&n->wq);
 }
 
 static void apf_task_wake_all(void)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..d536d457517b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1379,7 +1379,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
 	 * using swait_active() is safe.
 	 */
 	if (swait_active(q))
-		swake_up(q);
+		swake_up_one(q);
 
 	if (apic_lvtt_tscdeadline(apic))
 		ktimer->expired_tscdeadline = ktimer->tscdeadline;
diff --git a/include/linux/swait.h b/include/linux/swait.h
index dd032061112d..73e06e9986d4 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -16,7 +16,7 @@
  * wait-queues, but the semantics are actually completely different, and
  * every single user we have ever had has been buggy (or pointless).
  *
- * A "swake_up()" only wakes up _one_ waiter, which is not at all what
+ * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
  * "wake_up()" does, and has led to problems. In other cases, it has
  * been fine, because there's only ever one waiter (kvm), but in that
  * case gthe whole "simple" wait-queue is just pointless to begin with,
@@ -115,7 +115,7 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name
  *      CPU0 - waker                    CPU1 - waiter
  *
  *                                      for (;;) {
- *      @cond = true;                     prepare_to_swait(&wq_head, &wait, state);
+ *      @cond = true;                     prepare_to_swait_exclusive(&wq_head, &wait, state);
  *      smp_mb();                         // smp_mb() from set_current_state()
  *      if (swait_active(wq_head))        if (@cond)
  *        wake_up(wq_head);                      break;
@@ -157,11 +157,11 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
 	return swait_active(wq);
 }
 
-extern void swake_up(struct swait_queue_head *q);
+extern void swake_up_one(struct swait_queue_head *q);
 extern void swake_up_all(struct swait_queue_head *q);
 extern void swake_up_locked(struct swait_queue_head *q);
 
-extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
+extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
 extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
 
 extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
@@ -196,7 +196,7 @@ __out:	__ret;								\
 	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
 			    schedule())
 
-#define swait_event(wq, condition)					\
+#define swait_event_exclusive(wq, condition)				\
 do {									\
 	if (condition)							\
 		break;							\
@@ -208,7 +208,7 @@ do {									\
 		      TASK_UNINTERRUPTIBLE, timeout,			\
 		      __ret = schedule_timeout(__ret))
 
-#define swait_event_timeout(wq, condition, timeout)			\
+#define swait_event_timeout_exclusive(wq, condition, timeout)		\
 ({									\
 	long __ret = timeout;						\
 	if (!___wait_cond_timeout(condition))				\
@@ -220,7 +220,7 @@ do {									\
 	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
 		      schedule())
 
-#define swait_event_interruptible(wq, condition)			\
+#define swait_event_interruptible_exclusive(wq, condition)		\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
@@ -233,7 +233,7 @@ do {									\
 		      TASK_INTERRUPTIBLE, timeout,			\
 		      __ret = schedule_timeout(__ret))
 
-#define swait_event_interruptible_timeout(wq, condition, timeout)	\
+#define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\
 ({									\
 	long __ret = timeout;						\
 	if (!___wait_cond_timeout(condition))				\
@@ -246,7 +246,7 @@ do {									\
 	(void)___swait_event(wq, condition, TASK_IDLE, 0, schedule())
 
 /**
- * swait_event_idle - wait without system load contribution
+ * swait_event_idle_exclusive - wait without system load contribution
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
@@ -257,7 +257,7 @@ do {									\
  * condition and doesn't want to contribute to system load. Signals are
  * ignored.
  */
-#define swait_event_idle(wq, condition)					\
+#define swait_event_idle_exclusive(wq, condition)			\
 do {									\
 	if (condition)							\
 		break;							\
@@ -270,7 +270,7 @@ do {									\
 		       __ret = schedule_timeout(__ret))
 
 /**
- * swait_event_idle_timeout - wait up to timeout without load contribution
+ * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout at which we'll give up in jiffies
@@ -288,7 +288,7 @@ do {									\
  * or the remaining jiffies (at least 1) if the @condition evaluated
  * to %true before the @timeout elapsed.
  */
-#define swait_event_idle_timeout(wq, condition, timeout)		\
+#define swait_event_idle_timeout_exclusive(wq, condition, timeout)	\
 ({									\
 	long __ret = timeout;						\
 	if (!___wait_cond_timeout(condition))				\
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 87331565e505..70178f6ffdc4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -92,7 +92,7 @@ static void s2idle_enter(void)
 	/* Push all the CPUs into the idle loop. */
 	wake_up_all_idle_cpus();
 	/* Make the current CPU wait so it can enter the idle loop too. */
-	swait_event(s2idle_wait_head,
+	swait_event_exclusive(s2idle_wait_head,
 		    s2idle_state == S2IDLE_STATE_WAKE);
 
 	cpuidle_pause();
@@ -160,7 +160,7 @@ void s2idle_wake(void)
 	raw_spin_lock_irqsave(&s2idle_lock, flags);
 	if (s2idle_state > S2IDLE_STATE_NONE) {
 		s2idle_state = S2IDLE_STATE_WAKE;
-		swake_up(&s2idle_wait_head);
+		swake_up_one(&s2idle_wait_head);
 	}
 	raw_spin_unlock_irqrestore(&s2idle_lock, flags);
 }
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 622792abe41a..04fc2ed71af8 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -110,7 +110,7 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 
 	WRITE_ONCE(sp->srcu_lock_nesting[idx], newval);
 	if (!newval && READ_ONCE(sp->srcu_gp_waiting))
-		swake_up(&sp->srcu_wq);
+		swake_up_one(&sp->srcu_wq);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -140,7 +140,7 @@ void srcu_drive_gp(struct work_struct *wp)
 	idx = sp->srcu_idx;
 	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
 	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
-	swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
+	swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
 	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
 
 	/* Invoke the callbacks we removed above. */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index aa7cade1b9f3..91f888d3b23a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1727,7 +1727,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 	    !READ_ONCE(rsp->gp_flags) ||
 	    !rsp->gp_kthread)
 		return;
-	swake_up(&rsp->gp_wq);
+	swake_up_one(&rsp->gp_wq);
 }
 
 /*
@@ -2002,7 +2002,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
 }
 
 /*
- * Helper function for swait_event_idle() wakeup at force-quiescent-state
+ * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
  * time.
  */
 static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
@@ -2144,7 +2144,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 					       READ_ONCE(rsp->gpnum),
 					       TPS("reqwait"));
 			rsp->gp_state = RCU_GP_WAIT_GPS;
-			swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
+			swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
 						     RCU_GP_FLAG_INIT);
 			rsp->gp_state = RCU_GP_DONE_GPS;
 			/* Locking provides needed memory barrier. */
@@ -2176,7 +2176,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 					       READ_ONCE(rsp->gpnum),
 					       TPS("fqswait"));
 			rsp->gp_state = RCU_GP_WAIT_FQS;
-			ret = swait_event_idle_timeout(rsp->gp_wq,
+			ret = swait_event_idle_timeout_exclusive(rsp->gp_wq,
 					rcu_gp_fqs_check_wake(rsp, &gf), j);
 			rsp->gp_state = RCU_GP_DOING_FQS;
 			/* Locking provides needed memory barriers. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index d40708e8c5d6..d428cc1064c8 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -212,7 +212,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			if (wake) {
 				smp_mb(); /* EGP done before wake_up(). */
-				swake_up(&rsp->expedited_wq);
+				swake_up_one(&rsp->expedited_wq);
 			}
 			break;
 		}
@@ -518,7 +518,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 	jiffies_start = jiffies;
 
 	for (;;) {
-		ret = swait_event_timeout(
+		ret = swait_event_timeout_exclusive(
 				rsp->expedited_wq,
 				sync_rcu_preempt_exp_done_unlocked(rnp_root),
 				jiffies_stall);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7fd12039e512..ad53d133f709 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1854,8 +1854,8 @@ static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
 		del_timer(&rdp->nocb_timer);
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-		smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
-		swake_up(&rdp_leader->nocb_wq);
+		smp_mb(); /* ->nocb_leader_sleep before swake_up_one(). */
+		swake_up_one(&rdp_leader->nocb_wq);
 	} else {
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
 	}
@@ -2082,7 +2082,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 	 */
 	trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
 	for (;;) {
-		swait_event_interruptible(
+		swait_event_interruptible_exclusive(
 			rnp->nocb_gp_wq[c & 0x1],
 			(d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
 		if (likely(d))
@@ -2111,7 +2111,7 @@ wait_again:
 	/* Wait for callbacks to appear. */
 	if (!rcu_nocb_poll) {
 		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
-		swait_event_interruptible(my_rdp->nocb_wq,
+		swait_event_interruptible_exclusive(my_rdp->nocb_wq,
 				!READ_ONCE(my_rdp->nocb_leader_sleep));
 		raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
 		my_rdp->nocb_leader_sleep = true;
@@ -2176,7 +2176,7 @@ wait_again:
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
 		if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
 			/* List was empty, so wake up the follower.  */
-			swake_up(&rdp->nocb_wq);
+			swake_up_one(&rdp->nocb_wq);
 		}
 	}
 
@@ -2193,7 +2193,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
 {
 	for (;;) {
 		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
-		swait_event_interruptible(rdp->nocb_wq,
+		swait_event_interruptible_exclusive(rdp->nocb_wq,
 					 READ_ONCE(rdp->nocb_follower_head));
 		if (smp_load_acquire(&rdp->nocb_follower_head)) {
 			/* ^^^ Ensure CB invocation follows _head test. */
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 66890de93ee5..66b59ac77c22 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -32,7 +32,7 @@ void swake_up_locked(struct swait_queue_head *q)
 }
 EXPORT_SYMBOL(swake_up_locked);
 
-void swake_up(struct swait_queue_head *q)
+void swake_up_one(struct swait_queue_head *q)
 {
 	unsigned long flags;
 
@@ -40,7 +40,7 @@ void swake_up(struct swait_queue_head *q)
 	swake_up_locked(q);
 	raw_spin_unlock_irqrestore(&q->lock, flags);
 }
-EXPORT_SYMBOL(swake_up);
+EXPORT_SYMBOL(swake_up_one);
 
 /*
  * Does not allow usage from IRQ disabled, since we must be able to
@@ -76,7 +76,7 @@ static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *w
 		list_add_tail(&wait->task_list, &q->task_list);
 }
 
-void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
+void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
 	unsigned long flags;
 
@@ -85,7 +85,7 @@ void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int
 	set_current_state(state);
 	raw_spin_unlock_irqrestore(&q->lock, flags);
 }
-EXPORT_SYMBOL(prepare_to_swait);
+EXPORT_SYMBOL(prepare_to_swait_exclusive);
 
 long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
@@ -95,7 +95,7 @@ long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait
 	raw_spin_lock_irqsave(&q->lock, flags);
 	if (unlikely(signal_pending_state(state, current))) {
 		/*
-		 * See prepare_to_wait_event(). TL;DR, subsequent swake_up()
+		 * See prepare_to_wait_event(). TL;DR, subsequent swake_up_one()
 		 * must not see us.
 		 */
 		list_del_init(&wait->task_list);
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 04e554cae3a2..108250e4d376 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -604,7 +604,7 @@ void kvm_arm_resume_guest(struct kvm *kvm)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		vcpu->arch.pause = false;
-		swake_up(kvm_arch_vcpu_wq(vcpu));
+		swake_up_one(kvm_arch_vcpu_wq(vcpu));
 	}
 }
 
@@ -612,7 +612,7 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 {
 	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 
-	swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
 				       (!vcpu->arch.pause)));
 
 	if (vcpu->arch.power_off || vcpu->arch.pause) {
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index c95ab4c5a475..9b73d3ad918a 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -155,7 +155,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	smp_mb();		/* Make sure the above is visible */
 
 	wq = kvm_arch_vcpu_wq(vcpu);
-	swake_up(wq);
+	swake_up_one(wq);
 
 	return PSCI_RET_SUCCESS;
 }
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 57bcb27dcf30..23c2519c5b32 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -107,7 +107,7 @@ static void async_pf_execute(struct work_struct *work)
 	trace_kvm_async_pf_completed(addr, gva);
 
 	if (swq_has_sleeper(&vcpu->wq))
-		swake_up(&vcpu->wq);
+		swake_up_one(&vcpu->wq);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ada21f47f22b..940a4aed5b2d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2167,7 +2167,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	kvm_arch_vcpu_blocking(vcpu);
 
 	for (;;) {
-		prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
 		if (kvm_vcpu_check_block(vcpu) < 0)
 			break;
@@ -2209,7 +2209,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
 	if (swq_has_sleeper(wqp)) {
-		swake_up(wqp);
+		swake_up_one(wqp);
 		++vcpu->stat.halt_wakeup;
 		return true;
 	}
-- 
cgit v1.2.3


From c03cea42149de56fbae2301d7123daaa2cfe80e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:58 -0700
Subject: iomap: add initial support for writes without buffer heads

For now just limited to blocksize == PAGE_SIZE, where we can simply read
in the full page in write begin, and just set the whole page dirty after
copying data into it.  This code is enabled by default and XFS will now
be feed pages without buffer heads in ->writepage and ->writepages.

If a file system sets the IOMAP_F_BUFFER_HEAD flag on the iomap the old
path will still be used, this both helps the transition in XFS and
prepares for the gfs2 migration to the iomap infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 115 +++++++++++++++++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_iomap.c    |   6 ++-
 include/linux/iomap.h |   2 +
 3 files changed, 114 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4f10c6b1cf6d..2ebff76039b5 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -348,6 +348,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 		truncate_pagecache_range(inode, max(pos, i_size), pos + len);
 }
 
+static int
+iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+		unsigned poff, unsigned plen, unsigned from, unsigned to,
+		struct iomap *iomap)
+{
+	struct bio_vec bvec;
+	struct bio bio;
+
+	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
+		zero_user_segments(page, poff, from, to, poff + plen);
+		return 0;
+	}
+
+	bio_init(&bio, &bvec, 1);
+	bio.bi_opf = REQ_OP_READ;
+	bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
+	bio_set_dev(&bio, iomap->bdev);
+	__bio_add_page(&bio, page, plen, poff);
+	return submit_bio_wait(&bio);
+}
+
+static int
+__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+		struct page *page, struct iomap *iomap)
+{
+	loff_t block_size = i_blocksize(inode);
+	loff_t block_start = pos & ~(block_size - 1);
+	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
+	unsigned poff = block_start & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
+	unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
+
+	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+
+	if (PageUptodate(page))
+		return 0;
+	if (from <= poff && to >= poff + plen)
+		return 0;
+	return iomap_read_page_sync(inode, block_start, page,
+			poff, plen, from, to, iomap);
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap)
@@ -367,9 +409,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 
 	if (iomap->type == IOMAP_INLINE)
 		iomap_read_inline_data(inode, page, iomap);
-	else
+	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, iomap);
-
+	else
+		status = __iomap_write_begin(inode, pos, len, page, iomap);
 	if (unlikely(status)) {
 		unlock_page(page);
 		put_page(page);
@@ -382,6 +425,57 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	return status;
 }
 
+int
+iomap_set_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	int newly_dirty;
+
+	if (unlikely(!mapping))
+		return !TestSetPageDirty(page);
+
+	/*
+	 * Lock out page->mem_cgroup migration to keep PageDirty
+	 * synchronized with per-memcg dirty page counters.
+	 */
+	lock_page_memcg(page);
+	newly_dirty = !TestSetPageDirty(page);
+	if (newly_dirty)
+		__set_page_dirty(page, mapping, 0);
+	unlock_page_memcg(page);
+
+	if (newly_dirty)
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return newly_dirty;
+}
+EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
+
+static int
+__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+		unsigned copied, struct page *page, struct iomap *iomap)
+{
+	flush_dcache_page(page);
+
+	/*
+	 * The blocks that were entirely written will now be uptodate, so we
+	 * don't have to worry about a readpage reading them and overwriting a
+	 * partial write.  However if we have encountered a short write and only
+	 * partially written into a block, it will not be marked uptodate, so a
+	 * readpage might come in and destroy our partial write.
+	 *
+	 * Do the simplest thing, and just treat any short write to a non
+	 * uptodate page as a zero-length write, and force the caller to redo
+	 * the whole thing.
+	 */
+	if (unlikely(copied < len && !PageUptodate(page))) {
+		copied = 0;
+	} else {
+		SetPageUptodate(page);
+		iomap_set_page_dirty(page);
+	}
+	return __generic_write_end(inode, pos, copied, page);
+}
+
 static int
 iomap_write_end_inline(struct inode *inode, struct page *page,
 		struct iomap *iomap, loff_t pos, unsigned copied)
@@ -408,9 +502,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 
 	if (iomap->type == IOMAP_INLINE) {
 		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
-	} else {
+	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
 		ret = generic_write_end(NULL, inode->i_mapping, pos, len,
 				copied, page, NULL);
+	} else {
+		ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
 	}
 
 	if (iomap->page_done)
@@ -703,11 +799,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	struct page *page = data;
 	int ret;
 
-	ret = __block_write_begin_int(page, pos, length, NULL, iomap);
-	if (ret)
-		return ret;
+	if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+		ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+		if (ret)
+			return ret;
+		block_commit_write(page, 0, length);
+	} else {
+		WARN_ON_ONCE(!PageUptodate(page));
+		WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+	}
 
-	block_commit_write(page, 0, length);
 	return length;
 }
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49f5492eed3b..8a3613d576af 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -626,7 +626,7 @@ retry:
 	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
 	 * them out if the write happens to fail.
 	 */
-	iomap->flags = IOMAP_F_NEW;
+	iomap->flags |= IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1019,6 +1019,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
+	iomap->flags |= IOMAP_F_BUFFER_HEAD;
+
 	if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
 			!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		/* Reserve delalloc blocks for regular writeback. */
@@ -1119,7 +1121,7 @@ xfs_file_iomap_begin(
 	if (error)
 		return error;
 
-	iomap->flags = IOMAP_F_NEW;
+	iomap->flags |= IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 
 out_finish:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 30d314407f66..5eb9ca8d7ce5 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -30,6 +30,7 @@ struct vm_fault;
  */
 #define IOMAP_F_NEW		0x01	/* blocks have been newly allocated */
 #define IOMAP_F_DIRTY		0x02	/* uncommitted metadata */
+#define IOMAP_F_BUFFER_HEAD	0x04	/* file system requires buffer heads */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
@@ -102,6 +103,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
 int iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		unsigned nr_pages, const struct iomap_ops *ops);
+int iomap_set_page_dirty(struct page *page);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From 17dbca119312b4e8173d4e25ff64262119fcef38 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:26 -0700
Subject: x86/speculation/l1tf: Add sysfs reporting for l1tf

L1TF core kernel workarounds are cheap and normally always enabled, However
they still should be reported in sysfs if the system is vulnerable or
mitigated. Add the necessary CPU feature/bug bits.

- Extend the existing checks for Meltdowns to determine if the system is
  vulnerable. All CPUs which are not vulnerable to Meltdown are also not
  vulnerable to L1TF

- Check for 32bit non PAE and emit a warning as there is no practical way
  for mitigation due to the limited physical address bits

- If the system has more than MAX_PA/2 physical memory the invert page
  workarounds don't protect the system against the L1TF attack anymore,
  because an inverted physical address will also point to valid
  memory. Print a warning in this case and report that the system is
  vulnerable.

Add a function which returns the PFN limit for the L1TF mitigation, which
will be used in follow up patches for sanity and range checks.

[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/cpufeatures.h |  2 ++
 arch/x86/include/asm/processor.h   |  5 +++++
 arch/x86/kernel/cpu/bugs.c         | 40 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c       | 20 +++++++++++++++++++
 drivers/base/cpu.c                 |  8 ++++++++
 include/linux/cpu.h                |  2 ++
 6 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5701f5cecd31..f41cf9df4a83 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -219,6 +219,7 @@
 #define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+#define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -373,5 +374,6 @@
 #define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 #define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF			X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cfd29ee8c3da..7e3ac5eedcd6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -181,6 +181,11 @@ extern const struct seq_operations cpuinfo_op;
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+static inline unsigned long l1tf_pfn_limit(void)
+{
+	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+}
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index cd0fda1fff6d..3ce9674e4887 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -27,9 +27,11 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 #include <asm/intel-family.h>
+#include <asm/e820/api.h>
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
+static void __init l1tf_select_mitigation(void);
 
 /*
  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
@@ -81,6 +83,8 @@ void __init check_bugs(void)
 	 */
 	ssb_select_mitigation();
 
+	l1tf_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -205,6 +209,32 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -678,6 +708,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
 
+	case X86_BUG_L1TF:
+		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
+			return sprintf(buf, "Mitigation: Page Table Inversion\n");
+		break;
+
 	default:
 		break;
 	}
@@ -704,4 +739,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
 }
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0df7151cfef4..9baf684e2b55 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -984,6 +984,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
 	{}
 };
 
+static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
+	/* in addition to cpu_no_speculation */
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT1	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT2	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_AIRMONT		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MERRIFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MOOREFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GOLDMONT	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_DENVERTON	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GEMINI_LAKE	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNL		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNM		},
+	{}
+};
+
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
@@ -1010,6 +1025,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		return;
 
 	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+	if (x86_match_cpu(cpu_no_l1tf))
+		return;
+
+	setup_force_cpu_bug(X86_BUG_L1TF);
 }
 
 /*
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 30cc9c877ebb..eb9443d5bae1 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -540,16 +540,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
 	&dev_attr_spectre_v1.attr,
 	&dev_attr_spectre_v2.attr,
 	&dev_attr_spec_store_bypass.attr,
+	&dev_attr_l1tf.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..d3da5184aa1c 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -55,6 +55,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev,
 				   struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
 					  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
-- 
cgit v1.2.3


From 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:28 -0700
Subject: x86/speculation/l1tf: Limit swap file size to MAX_PA/2

For the L1TF workaround its necessary to limit the swap file size to below
MAX_PA/2, so that the higher bits of the swap offset inverted never point
to valid memory.

Add a mechanism for the architecture to override the swap file size check
in swapfile.c and add a x86 specific max swapfile check function that
enforces that limit.

The check is only enabled if the CPU is vulnerable to L1TF.

In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system
with 46bit PA it is 32TB. The limit is only per individual swap file, so
it's always possible to exceed these limits with multiple swap files or
partitions.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/mm/init.c       | 15 +++++++++++++++
 include/linux/swapfile.h |  2 ++
 mm/swapfile.c            | 46 ++++++++++++++++++++++++++++++----------------
 3 files changed, 47 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index cee58a972cb2..1b530197d114 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -4,6 +4,8 @@
 #include <linux/swap.h>
 #include <linux/memblock.h>
 #include <linux/bootmem.h>	/* for max_low_pfn */
+#include <linux/swapfile.h>
+#include <linux/swapops.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -880,3 +882,16 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 	__cachemode2pte_tbl[cache] = __cm_idx2pte(entry);
 	__pte2cachemode_tbl[entry] = cache;
 }
+
+unsigned long max_swapfile_size(void)
+{
+	unsigned long pages;
+
+	pages = generic_max_swapfile_size();
+
+	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
+	}
+	return pages;
+}
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 06bd7b096167..e06febf62978 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -10,5 +10,7 @@ extern spinlock_t swap_lock;
 extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
+extern unsigned long generic_max_swapfile_size(void);
+extern unsigned long max_swapfile_size(void);
 
 #endif /* _LINUX_SWAPFILE_H */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2cc2972eedaf..18185ae4f223 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2909,6 +2909,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 	return 0;
 }
 
+
+/*
+ * Find out how many pages are allowed for a single swap device. There
+ * are two limiting factors:
+ * 1) the number of bits for the swap offset in the swp_entry_t type, and
+ * 2) the number of bits in the swap pte, as defined by the different
+ * architectures.
+ *
+ * In order to find the largest possible bit mask, a swap entry with
+ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
+ * decoded to a swp_entry_t again, and finally the swap offset is
+ * extracted.
+ *
+ * This will mask all the bits from the initial ~0UL mask that can't
+ * be encoded in either the swp_entry_t or the architecture definition
+ * of a swap pte.
+ */
+unsigned long generic_max_swapfile_size(void)
+{
+	return swp_offset(pte_to_swp_entry(
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+}
+
+/* Can be overridden by an architecture for additional checks. */
+__weak unsigned long max_swapfile_size(void)
+{
+	return generic_max_swapfile_size();
+}
+
 static unsigned long read_swap_header(struct swap_info_struct *p,
 					union swap_header *swap_header,
 					struct inode *inode)
@@ -2944,22 +2973,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	p->cluster_next = 1;
 	p->cluster_nr = 0;
 
-	/*
-	 * Find out how many pages are allowed for a single swap
-	 * device. There are two limiting factors: 1) the number
-	 * of bits for the swap offset in the swp_entry_t type, and
-	 * 2) the number of bits in the swap pte as defined by the
-	 * different architectures. In order to find the
-	 * largest possible bit mask, a swap entry with swap type 0
-	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again, and finally the swap
-	 * offset is extracted. This will mask all the bits from
-	 * the initial ~0UL mask that can't be encoded in either
-	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.
-	 */
-	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+	maxpages = max_swapfile_size();
 	last_page = swap_header->info.last_page;
 	if (!last_page) {
 		pr_warn("Empty swap-file\n");
-- 
cgit v1.2.3


From 5a6cf77f5e35e7af35d36a1e7dc21a42f6412e4f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 20 Jun 2018 01:05:07 +0900
Subject: kprobes: Remove jprobe API implementation

Remove functionally empty jprobe API implementations and test cases.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-arch@vger.kernel.org
Link: https://lore.kernel.org/lkml/152942430705.15209.2307050500995264322.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kprobes.h |  3 --
 kernel/kprobes.c        | 78 +---------------------------------------
 kernel/test_kprobes.c   | 94 -------------------------------------------------
 lib/Kconfig.debug       |  2 +-
 4 files changed, 2 insertions(+), 175 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 9440a2fc8893..b520baa65682 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -389,9 +389,6 @@ int register_kprobe(struct kprobe *p);
 void unregister_kprobe(struct kprobe *p);
 int register_kprobes(struct kprobe **kps, int num);
 void unregister_kprobes(struct kprobe **kps, int num);
-int setjmp_pre_handler(struct kprobe *, struct pt_regs *);
-int longjmp_break_handler(struct kprobe *, struct pt_regs *);
-void jprobe_return(void);
 unsigned long arch_deref_entry_point(void *);
 
 int register_kretprobe(struct kretprobe *rp);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ea619021d901..69de130595f7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1272,7 +1272,7 @@ NOKPROBE_SYMBOL(cleanup_rp_inst);
 
 /*
 * Add the new probe to ap->list. Fail if this is the
-* second jprobe at the address - two jprobes can't coexist
+* second break_handler at the address
 */
 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
@@ -1812,77 +1812,6 @@ unsigned long __weak arch_deref_entry_point(void *entry)
 	return (unsigned long)entry;
 }
 
-#if 0
-int register_jprobes(struct jprobe **jps, int num)
-{
-	int ret = 0, i;
-
-	if (num <= 0)
-		return -EINVAL;
-
-	for (i = 0; i < num; i++) {
-		ret = register_jprobe(jps[i]);
-
-		if (ret < 0) {
-			if (i > 0)
-				unregister_jprobes(jps, i);
-			break;
-		}
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(register_jprobes);
-
-int register_jprobe(struct jprobe *jp)
-{
-	unsigned long addr, offset;
-	struct kprobe *kp = &jp->kp;
-
-	/*
-	 * Verify probepoint as well as the jprobe handler are
-	 * valid function entry points.
-	 */
-	addr = arch_deref_entry_point(jp->entry);
-
-	if (kallsyms_lookup_size_offset(addr, NULL, &offset) && offset == 0 &&
-	    kprobe_on_func_entry(kp->addr, kp->symbol_name, kp->offset)) {
-		kp->pre_handler = setjmp_pre_handler;
-		kp->break_handler = longjmp_break_handler;
-		return register_kprobe(kp);
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(register_jprobe);
-
-void unregister_jprobe(struct jprobe *jp)
-{
-	unregister_jprobes(&jp, 1);
-}
-EXPORT_SYMBOL_GPL(unregister_jprobe);
-
-void unregister_jprobes(struct jprobe **jps, int num)
-{
-	int i;
-
-	if (num <= 0)
-		return;
-	mutex_lock(&kprobe_mutex);
-	for (i = 0; i < num; i++)
-		if (__unregister_kprobe_top(&jps[i]->kp) < 0)
-			jps[i]->kp.addr = NULL;
-	mutex_unlock(&kprobe_mutex);
-
-	synchronize_sched();
-	for (i = 0; i < num; i++) {
-		if (jps[i]->kp.addr)
-			__unregister_kprobe_bottom(&jps[i]->kp);
-	}
-}
-EXPORT_SYMBOL_GPL(unregister_jprobes);
-#endif
-
 #ifdef CONFIG_KRETPROBES
 /*
  * This kprobe pre_handler is registered with every kretprobe. When probe
@@ -2329,8 +2258,6 @@ static void report_probe(struct seq_file *pi, struct kprobe *p,
 
 	if (p->pre_handler == pre_handler_kretprobe)
 		kprobe_type = "r";
-	else if (p->pre_handler == setjmp_pre_handler)
-		kprobe_type = "j";
 	else
 		kprobe_type = "k";
 
@@ -2637,6 +2564,3 @@ late_initcall(debugfs_kprobe_init);
 #endif /* CONFIG_DEBUG_FS */
 
 module_init(init_kprobes);
-
-/* defined in arch/.../kernel/kprobes.c */
-EXPORT_SYMBOL_GPL(jprobe_return);
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index dd53e354f630..7bca480151b0 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -162,90 +162,6 @@ static int test_kprobes(void)
 
 }
 
-#if 0
-static u32 jph_val;
-
-static u32 j_kprobe_target(u32 value)
-{
-	if (preemptible()) {
-		handler_errors++;
-		pr_err("jprobe-handler is preemptible\n");
-	}
-	if (value != rand1) {
-		handler_errors++;
-		pr_err("incorrect value in jprobe handler\n");
-	}
-
-	jph_val = rand1;
-	jprobe_return();
-	return 0;
-}
-
-static struct jprobe jp = {
-	.entry		= j_kprobe_target,
-	.kp.symbol_name = "kprobe_target"
-};
-
-static int test_jprobe(void)
-{
-	int ret;
-
-	ret = register_jprobe(&jp);
-	if (ret < 0) {
-		pr_err("register_jprobe returned %d\n", ret);
-		return ret;
-	}
-
-	ret = target(rand1);
-	unregister_jprobe(&jp);
-	if (jph_val == 0) {
-		pr_err("jprobe handler not called\n");
-		handler_errors++;
-	}
-
-	return 0;
-}
-
-static struct jprobe jp2 = {
-	.entry          = j_kprobe_target,
-	.kp.symbol_name = "kprobe_target2"
-};
-
-static int test_jprobes(void)
-{
-	int ret;
-	struct jprobe *jps[2] = {&jp, &jp2};
-
-	/* addr and flags should be cleard for reusing kprobe. */
-	jp.kp.addr = NULL;
-	jp.kp.flags = 0;
-	ret = register_jprobes(jps, 2);
-	if (ret < 0) {
-		pr_err("register_jprobes returned %d\n", ret);
-		return ret;
-	}
-
-	jph_val = 0;
-	ret = target(rand1);
-	if (jph_val == 0) {
-		pr_err("jprobe handler not called\n");
-		handler_errors++;
-	}
-
-	jph_val = 0;
-	ret = target2(rand1);
-	if (jph_val == 0) {
-		pr_err("jprobe handler2 not called\n");
-		handler_errors++;
-	}
-	unregister_jprobes(jps, 2);
-
-	return 0;
-}
-#else
-#define test_jprobe() (0)
-#define test_jprobes() (0)
-#endif
 #ifdef CONFIG_KRETPROBES
 static u32 krph_val;
 
@@ -383,16 +299,6 @@ int init_test_probes(void)
 	if (ret < 0)
 		errors++;
 
-	num_tests++;
-	ret = test_jprobe();
-	if (ret < 0)
-		errors++;
-
-	num_tests++;
-	ret = test_jprobes();
-	if (ret < 0)
-		errors++;
-
 #ifdef CONFIG_KRETPROBES
 	num_tests++;
 	ret = test_kretprobe();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8838d1158d19..0b066b3c9284 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1718,7 +1718,7 @@ config KPROBES_SANITY_TEST
 	default n
 	help
 	  This option provides for testing basic kprobes functionality on
-	  boot. A sample kprobe, jprobe and kretprobe are inserted and
+	  boot. Samples of kprobe and kretprobe are inserted and
 	  verified for functionality.
 
 	  Say N if you are unsure.
-- 
cgit v1.2.3


From 4de58696de076d9bd2745d1cbe0930635c3f5ac9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 20 Jun 2018 01:17:15 +0900
Subject: kprobes: Remove jprobe stub API

Remove jprobe stub APIs from linux/kprobes.h since
the jprobe implementation was completely gone.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-arch@vger.kernel.org
Link: https://lore.kernel.org/lkml/152942503572.15209.1652552217914694917.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kprobes.h | 50 -------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b520baa65682..e909413e4e38 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -63,7 +63,6 @@ struct pt_regs;
 struct kretprobe;
 struct kretprobe_instance;
 typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *);
-typedef int (*kprobe_break_handler_t) (struct kprobe *, struct pt_regs *);
 typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *,
 				       unsigned long flags);
 typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *,
@@ -101,12 +100,6 @@ struct kprobe {
 	 */
 	kprobe_fault_handler_t fault_handler;
 
-	/*
-	 * ... called if breakpoint trap occurs in probe handler.
-	 * Return 1 if it handled break, otherwise kernel will see it.
-	 */
-	kprobe_break_handler_t break_handler;
-
 	/* Saved opcode (which has been replaced with breakpoint) */
 	kprobe_opcode_t opcode;
 
@@ -154,24 +147,6 @@ static inline int kprobe_ftrace(struct kprobe *p)
 	return p->flags & KPROBE_FLAG_FTRACE;
 }
 
-/*
- * Special probe type that uses setjmp-longjmp type tricks to resume
- * execution at a specified entry with a matching prototype corresponding
- * to the probed function - a trick to enable arguments to become
- * accessible seamlessly by probe handling logic.
- * Note:
- * Because of the way compilers allocate stack space for local variables
- * etc upfront, regardless of sub-scopes within a function, this mirroring
- * principle currently works only for probes placed on function entry points.
- */
-struct jprobe {
-	struct kprobe kp;
-	void *entry;	/* probe handling code to jump to */
-};
-
-/* For backward compatibility with old code using JPROBE_ENTRY() */
-#define JPROBE_ENTRY(handler)	(handler)
-
 /*
  * Function-return probe -
  * Note:
@@ -436,9 +411,6 @@ static inline void unregister_kprobe(struct kprobe *p)
 static inline void unregister_kprobes(struct kprobe **kps, int num)
 {
 }
-static inline void jprobe_return(void)
-{
-}
 static inline int register_kretprobe(struct kretprobe *rp)
 {
 	return -ENOSYS;
@@ -465,20 +437,6 @@ static inline int enable_kprobe(struct kprobe *kp)
 	return -ENOSYS;
 }
 #endif /* CONFIG_KPROBES */
-static inline int register_jprobe(struct jprobe *p)
-{
-	return -ENOSYS;
-}
-static inline int register_jprobes(struct jprobe **jps, int num)
-{
-	return -ENOSYS;
-}
-static inline void unregister_jprobe(struct jprobe *p)
-{
-}
-static inline void unregister_jprobes(struct jprobe **jps, int num)
-{
-}
 static inline int disable_kretprobe(struct kretprobe *rp)
 {
 	return disable_kprobe(&rp->kp);
@@ -487,14 +445,6 @@ static inline int enable_kretprobe(struct kretprobe *rp)
 {
 	return enable_kprobe(&rp->kp);
 }
-static inline int disable_jprobe(struct jprobe *jp)
-{
-	return -ENOSYS;
-}
-static inline int enable_jprobe(struct jprobe *jp)
-{
-	return -ENOSYS;
-}
 
 #ifndef CONFIG_KPROBES
 static inline bool is_kprobe_insn_slot(unsigned long addr)
-- 
cgit v1.2.3


From 8bd9cb51daac89337295b6f037b0486911e1b408 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 19 Jun 2018 13:53:08 +0100
Subject: locking/atomics, asm-generic: Move some macros from <linux/bitops.h>
 to a new <linux/bits.h> file

In preparation for implementing the asm-generic atomic bitops in terms
of atomic_long_*(), we need to prevent <asm/atomic.h> implementations from
pulling in <linux/bitops.h>. A common reason for this include is for the
BITS_PER_BYTE definition, so move this and some other BIT() and masking
macros into a new header file, <linux/bits.h>.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: yamada.masahiro@socionext.com
Link: https://lore.kernel.org/lkml/1529412794-17720-4-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/bitops.h | 22 +---------------------
 include/linux/bits.h   | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 21 deletions(-)
 create mode 100644 include/linux/bits.h

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 4cac4e1a72ff..af419012d77d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -2,29 +2,9 @@
 #ifndef _LINUX_BITOPS_H
 #define _LINUX_BITOPS_H
 #include <asm/types.h>
+#include <linux/bits.h>
 
-#ifdef	__KERNEL__
-#define BIT(nr)			(1UL << (nr))
-#define BIT_ULL(nr)		(1ULL << (nr))
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
-#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
-#define BITS_PER_BYTE		8
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-#endif
-
-/*
- * Create a contiguous bitmask starting at bit position @l and ending at
- * position @h. For example
- * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
- */
-#define GENMASK(h, l) \
-	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
-
-#define GENMASK_ULL(h, l) \
-	(((~0ULL) - (1ULL << (l)) + 1) & \
-	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/bits.h b/include/linux/bits.h
new file mode 100644
index 000000000000..2b7b532c1d51
--- /dev/null
+++ b/include/linux/bits.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BITS_H
+#define __LINUX_BITS_H
+#include <asm/bitsperlong.h>
+
+#define BIT(nr)			(1UL << (nr))
+#define BIT_ULL(nr)		(1ULL << (nr))
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
+#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
+#define BITS_PER_BYTE		8
+
+/*
+ * Create a contiguous bitmask starting at bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l) \
+	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif	/* __LINUX_BITS_H */
-- 
cgit v1.2.3


From 05736e4ac13c08a4a9b1ef2de26dd31a32cbee57 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 May 2018 17:48:27 +0200
Subject: cpu/hotplug: Provide knobs to control SMT

Provide a command line and a sysfs knob to control SMT.

The command line options are:

 'nosmt':	Enumerate secondary threads, but do not online them

 'nosmt=force': Ignore secondary threads completely during enumeration
 		via MP table and ACPI/MADT.

The sysfs control file has the following states (read/write):

 'on':		 SMT is enabled. Secondary threads can be freely onlined
 'off':		 SMT is disabled. Secondary threads, even if enumerated
 		 cannot be onlined
 'forceoff':	 SMT is permanentely disabled. Writes to the control
 		 file are rejected.
 'notsupported': SMT is not supported by the CPU

The command line option 'nosmt' sets the sysfs control to 'off'. This
can be changed to 'on' to reenable SMT during runtime.

The command line option 'nosmt=force' sets the sysfs control to
'forceoff'. This cannot be changed during runtime.

When SMT is 'on' and the control file is changed to 'off' then all online
secondary threads are offlined and attempts to online a secondary thread
later on are rejected.

When SMT is 'off' and the control file is changed to 'on' then secondary
threads can be onlined again. The 'off' -> 'on' transition does not
automatically online the secondary threads.

When the control file is set to 'forceoff', the behaviour is the same as
setting it to 'off', but the operation is irreversible and later writes to
the control file are rejected.

When the control status is 'notsupported' then writes to the control file
are rejected.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu |  20 +++
 Documentation/admin-guide/kernel-parameters.txt    |   8 +
 arch/Kconfig                                       |   3 +
 arch/x86/Kconfig                                   |   1 +
 include/linux/cpu.h                                |  13 ++
 kernel/cpu.c                                       | 170 +++++++++++++++++++++
 6 files changed, 215 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 9c5e7732d249..65d9b844ecfd 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -487,3 +487,23 @@ Description:	Information about CPU vulnerabilities
 		"Not affected"	  CPU is not affected by the vulnerability
 		"Vulnerable"	  CPU is affected and no mitigation in effect
 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
+
+What:		/sys/devices/system/cpu/smt
+		/sys/devices/system/cpu/smt/active
+		/sys/devices/system/cpu/smt/control
+Date:		June 2018
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Control Symetric Multi Threading (SMT)
+
+		active:  Tells whether SMT is active (enabled and siblings online)
+
+		control: Read/write interface to control SMT. Possible
+			 values:
+
+			 "on"		SMT is enabled
+			 "off"		SMT is disabled
+			 "forceoff"	SMT is force disabled. Cannot be changed.
+			 "notsupported" SMT is not supported by the CPU
+
+			 If control status is "forceoff" or "notsupported" writes
+			 are rejected.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..8e29c4b6756f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2687,6 +2687,14 @@
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
+			[KNL,x86] Disable symmetric multithreading (SMT).
+			nosmt=force: Force disable SMT, similar to disabling
+				     it in the BIOS except that some of the
+				     resource partitioning effects which are
+				     caused by having SMT enabled in the BIOS
+				     cannot be undone. Depending on the CPU
+				     type this might have a performance impact.
+
 	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
 			allow data leaks with this option, which is equivalent
diff --git a/arch/Kconfig b/arch/Kconfig
index 1aa59063f1fd..d1f2ed462ac8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -13,6 +13,9 @@ config KEXEC_CORE
 config HAVE_IMA_KEXEC
 	bool
 
+config HOTPLUG_SMT
+	bool
+
 config OPROFILE
 	tristate "OProfile system profiling"
 	depends on PROFILING
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f1dbb4ee19d7..7a34fdf8daf0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -187,6 +187,7 @@ config X86
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
+	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
 	select NEED_SG_DMA_LENGTH
 	select PCI_LOCKLESS_CONFIG
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d3da5184aa1c..7532cbf27b1d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -168,4 +168,17 @@ void cpuhp_report_idle_dead(void);
 static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
+enum cpuhp_smt_control {
+	CPU_SMT_ENABLED,
+	CPU_SMT_DISABLED,
+	CPU_SMT_FORCE_DISABLED,
+	CPU_SMT_NOT_SUPPORTED,
+};
+
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
+extern enum cpuhp_smt_control cpu_smt_control;
+#else
+# define cpu_smt_control		(CPU_SMT_ENABLED)
+#endif
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e266cb529c1a..d29fdd7e57bb 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -933,6 +933,29 @@ EXPORT_SYMBOL(cpu_down);
 #define takedown_cpu		NULL
 #endif /*CONFIG_HOTPLUG_CPU*/
 
+#ifdef CONFIG_HOTPLUG_SMT
+enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+
+static int __init smt_cmdline_disable(char *str)
+{
+	cpu_smt_control = CPU_SMT_DISABLED;
+	if (str && !strcmp(str, "force")) {
+		pr_info("SMT: Force disabled\n");
+		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+	}
+	return 0;
+}
+early_param("nosmt", smt_cmdline_disable);
+
+static inline bool cpu_smt_allowed(unsigned int cpu)
+{
+	return cpu_smt_control == CPU_SMT_ENABLED ||
+		topology_is_primary_thread(cpu);
+}
+#else
+static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+#endif
+
 /**
  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
  * @cpu: cpu that just started
@@ -1056,6 +1079,10 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
 		err = -EBUSY;
 		goto out;
 	}
+	if (!cpu_smt_allowed(cpu)) {
+		err = -EPERM;
+		goto out;
+	}
 
 	err = _cpu_up(cpu, 0, target);
 out:
@@ -1904,10 +1931,153 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = {
 	NULL
 };
 
+#ifdef CONFIG_HOTPLUG_SMT
+
+static const char *smt_states[] = {
+	[CPU_SMT_ENABLED]		= "on",
+	[CPU_SMT_DISABLED]		= "off",
+	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
+	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
+};
+
+static ssize_t
+show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
+}
+
+static void cpuhp_offline_cpu_device(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+
+	dev->offline = true;
+	/* Tell user space about the state change */
+	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
+}
+
+static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+{
+	int cpu, ret = 0;
+
+	cpu_maps_update_begin();
+	for_each_online_cpu(cpu) {
+		if (topology_is_primary_thread(cpu))
+			continue;
+		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
+		if (ret)
+			break;
+		/*
+		 * As this needs to hold the cpu maps lock it's impossible
+		 * to call device_offline() because that ends up calling
+		 * cpu_down() which takes cpu maps lock. cpu maps lock
+		 * needs to be held as this might race against in kernel
+		 * abusers of the hotplug machinery (thermal management).
+		 *
+		 * So nothing would update device:offline state. That would
+		 * leave the sysfs entry stale and prevent onlining after
+		 * smt control has been changed to 'off' again. This is
+		 * called under the sysfs hotplug lock, so it is properly
+		 * serialized against the regular offline usage.
+		 */
+		cpuhp_offline_cpu_device(cpu);
+	}
+	if (!ret)
+		cpu_smt_control = ctrlval;
+	cpu_maps_update_done();
+	return ret;
+}
+
+static void cpuhp_smt_enable(void)
+{
+	cpu_maps_update_begin();
+	cpu_smt_control = CPU_SMT_ENABLED;
+	cpu_maps_update_done();
+}
+
+static ssize_t
+store_smt_control(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	int ctrlval, ret;
+
+	if (sysfs_streq(buf, "on"))
+		ctrlval = CPU_SMT_ENABLED;
+	else if (sysfs_streq(buf, "off"))
+		ctrlval = CPU_SMT_DISABLED;
+	else if (sysfs_streq(buf, "forceoff"))
+		ctrlval = CPU_SMT_FORCE_DISABLED;
+	else
+		return -EINVAL;
+
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
+		return -EPERM;
+
+	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		return -ENODEV;
+
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
+
+	if (ctrlval != cpu_smt_control) {
+		switch (ctrlval) {
+		case CPU_SMT_ENABLED:
+			cpuhp_smt_enable();
+			break;
+		case CPU_SMT_DISABLED:
+		case CPU_SMT_FORCE_DISABLED:
+			ret = cpuhp_smt_disable(ctrlval);
+			break;
+		}
+	}
+
+	unlock_device_hotplug();
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+
+static ssize_t
+show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool active = topology_max_smt_threads() > 1;
+
+	return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
+}
+static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+
+static struct attribute *cpuhp_smt_attrs[] = {
+	&dev_attr_control.attr,
+	&dev_attr_active.attr,
+	NULL
+};
+
+static const struct attribute_group cpuhp_smt_attr_group = {
+	.attrs = cpuhp_smt_attrs,
+	.name = "smt",
+	NULL
+};
+
+static int __init cpu_smt_state_init(void)
+{
+	if (!topology_smt_supported())
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+
+	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
+				  &cpuhp_smt_attr_group);
+}
+
+#else
+static inline int cpu_smt_state_init(void) { return 0; }
+#endif
+
 static int __init cpuhp_sysfs_init(void)
 {
 	int cpu, ret;
 
+	ret = cpu_smt_state_init();
+	if (ret)
+		return ret;
+
 	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				 &cpuhp_cpu_root_attr_group);
 	if (ret)
-- 
cgit v1.2.3


From bfc18e389c7a09fbbbed6bf4032396685b14246e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:04 +0100
Subject: atomics/treewide: Rename __atomic_add_unless() =>
 atomic_fetch_add_unless()

While __atomic_add_unless() was originally intended as a building-block
for atomic_add_unless(), it's now used in a number of places around the
kernel. It's the only common atomic operation named __atomic*(), rather
than atomic_*(), and for consistency it would be better named
atomic_fetch_add_unless().

This lack of consistency is slightly confusing, and gets in the way of
scripting atomics. Given that, let's clean things up and promote it to
an official part of the atomics API, in the form of
atomic_fetch_add_unless().

This patch converts definitions and invocations over to the new name,
including the instrumented version, using the following script:

  ----
  git grep -w __atomic_add_unless | while read line; do
  sed -i '{s/\<__atomic_add_unless\>/atomic_fetch_add_unless/}' "${line%%:*}";
  done
  git grep -w __arch_atomic_add_unless | while read line; do
  sed -i '{s/\<__arch_atomic_add_unless\>/arch_atomic_fetch_add_unless/}' "${line%%:*}";
  done
  ----

Note that we do not have atomic{64,_long}_fetch_add_unless(), which will
be introduced by later patches.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-2-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           | 4 ++--
 arch/arc/include/asm/atomic.h             | 4 ++--
 arch/arm/include/asm/atomic.h             | 4 ++--
 arch/arm64/include/asm/atomic.h           | 2 +-
 arch/h8300/include/asm/atomic.h           | 2 +-
 arch/hexagon/include/asm/atomic.h         | 4 ++--
 arch/ia64/include/asm/atomic.h            | 2 +-
 arch/m68k/include/asm/atomic.h            | 2 +-
 arch/mips/include/asm/atomic.h            | 4 ++--
 arch/openrisc/include/asm/atomic.h        | 4 ++--
 arch/parisc/include/asm/atomic.h          | 4 ++--
 arch/powerpc/include/asm/atomic.h         | 8 ++++----
 arch/riscv/include/asm/atomic.h           | 4 ++--
 arch/s390/include/asm/atomic.h            | 2 +-
 arch/sh/include/asm/atomic.h              | 4 ++--
 arch/sparc/include/asm/atomic_32.h        | 2 +-
 arch/sparc/include/asm/atomic_64.h        | 2 +-
 arch/sparc/lib/atomic32.c                 | 4 ++--
 arch/x86/include/asm/atomic.h             | 4 ++--
 arch/xtensa/include/asm/atomic.h          | 4 ++--
 drivers/block/rbd.c                       | 2 +-
 drivers/infiniband/core/rdma_core.c       | 2 +-
 fs/afs/rxrpc.c                            | 2 +-
 include/asm-generic/atomic-instrumented.h | 4 ++--
 include/asm-generic/atomic.h              | 4 ++--
 include/linux/atomic.h                    | 2 +-
 kernel/bpf/syscall.c                      | 4 ++--
 net/rxrpc/call_object.c                   | 2 +-
 net/rxrpc/conn_object.c                   | 4 ++--
 net/rxrpc/local_object.c                  | 2 +-
 net/rxrpc/peer_object.c                   | 2 +-
 31 files changed, 50 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 767bfdd42992..392b15a4dd4f 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -206,7 +206,7 @@ ATOMIC_OPS(xor, xor)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -214,7 +214,7 @@ ATOMIC_OPS(xor, xor)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, new, old;
 	smp_mb();
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 11859287c52a..67121b5ff3a3 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -309,7 +309,7 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #undef ATOMIC_OP
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -317,7 +317,7 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v
  */
-#define __atomic_add_unless(v, a, u)					\
+#define atomic_fetch_add_unless(v, a, u)					\
 ({									\
 	int c, old;							\
 									\
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 66d0e215a773..9d56d0727c9b 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -130,7 +130,7 @@ static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 }
 #define atomic_cmpxchg_relaxed		atomic_cmpxchg_relaxed
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int oldval, newval;
 	unsigned long tmp;
@@ -215,7 +215,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c0235e0ff849..264d20339f74 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -125,7 +125,7 @@
 #define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
-#define __atomic_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
+#define atomic_fetch_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
 /*
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index b174dec099bf..5c856887fdf2 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -94,7 +94,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	h8300flags flags;
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index fb3dfb2a667e..287aa9f394f3 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -164,7 +164,7 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer to value
  * @a: amount to add
  * @u: unless value is equal to u
@@ -173,7 +173,7 @@ ATOMIC_OPS(xor)
  *
  */
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int __oldval;
 	register int tmp;
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 2524fb60fbc2..9d2ddde5f9d5 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,7 +215,7 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index e993e2860ee1..8022d9ea1213 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -211,7 +211,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 0ab176bdb8e8..02fc1553cf9b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -275,7 +275,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -283,7 +283,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index 146e1660f00e..b589fac39b92 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -100,7 +100,7 @@ ATOMIC_OP(xor)
  *
  * This is often used through atomic_inc_not_zero()
  */
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int old, tmp;
 
@@ -119,7 +119,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 	return old;
 }
-#define __atomic_add_unless	__atomic_add_unless
+#define atomic_fetch_add_unless	atomic_fetch_add_unless
 
 #include <asm-generic/atomic.h>
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 88bae6676c9b..7748abced766 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -78,7 +78,7 @@ static __inline__ int atomic_read(const atomic_t *v)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -86,7 +86,7 @@ static __inline__ int atomic_read(const atomic_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 682b3e6a1e21..1483261080a1 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -218,7 +218,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
 #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -226,13 +226,13 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int t;
 
 	__asm__ __volatile__ (
 	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%1		# __atomic_add_unless\n\
+"1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpw	0,%0,%3 \n\
 	beq	2f \n\
 	add	%0,%2,%0 \n"
@@ -538,7 +538,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 	__asm__ __volatile__ (
 	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%1		# __atomic_add_unless\n\
+"1:	ldarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpd	0,%0,%3 \n\
 	beq	2f \n\
 	add	%0,%2,%0 \n"
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 855115ace98c..739e810c857e 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -332,7 +332,7 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
 #undef ATOMIC_OP
 
 /* This is required to provide a full barrier on success. */
-static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
        int prev, rc;
 
@@ -381,7 +381,7 @@ static __always_inline int atomic64_add_unless(atomic64_t *v, long a, long u)
  */
 static __always_inline int atomic_inc_not_zero(atomic_t *v)
 {
-        return __atomic_add_unless(v, 1, 0);
+        return atomic_fetch_add_unless(v, 1, 0);
 }
 
 #ifndef CONFIG_GENERIC_ATOMIC64
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 4b55532f15c4..c2858cdd8c29 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -90,7 +90,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return __atomic_cmpxchg(&v->counter, old, new);
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 0fd0099f43cc..ef45931ebac5 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -46,7 +46,7 @@
 #define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -54,7 +54,7 @@
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index d13ce517f4b9..a58f4b43bcc7 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -27,7 +27,7 @@ int atomic_fetch_or(int, atomic_t *);
 int atomic_fetch_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
-int __atomic_add_unless(atomic_t *, int, int);
+int atomic_fetch_add_unless(atomic_t *, int, int);
 void atomic_set(atomic_t *, int);
 
 #define atomic_set_release(v, i)	atomic_set((v), (i))
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 28db058d471b..f416fd3d2708 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -89,7 +89,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 465a901a0ada..281fa634bb1a 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -95,7 +95,7 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)
 }
 EXPORT_SYMBOL(atomic_cmpxchg);
 
-int __atomic_add_unless(atomic_t *v, int a, int u)
+int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	unsigned long flags;
@@ -107,7 +107,7 @@ int __atomic_add_unless(atomic_t *v, int a, int u)
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret;
 }
-EXPORT_SYMBOL(__atomic_add_unless);
+EXPORT_SYMBOL(atomic_fetch_add_unless);
 
 /* Atomic operations are already serializing */
 void atomic_set(atomic_t *v, int i)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 0db6bec95489..84ed0bd76aef 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -254,7 +254,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 }
 
 /**
- * __arch_atomic_add_unless - add unless the number is already a given value
+ * arch_atomic_fetch_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -262,7 +262,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns the old value of @v.
  */
-static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c = arch_atomic_read(v);
 
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index e7a23f2a519a..4188e56c06c9 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -275,7 +275,7 @@ ATOMIC_OPS(xor)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -283,7 +283,7 @@ ATOMIC_OPS(xor)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index fa0729c1e776..d81c653b9bf6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -61,7 +61,7 @@ static int atomic_inc_return_safe(atomic_t *v)
 {
 	unsigned int counter;
 
-	counter = (unsigned int)__atomic_add_unless(v, 1, 0);
+	counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0);
 	if (counter <= (unsigned int)INT_MAX)
 		return (int)counter;
 
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index a6e904973ba8..475910ffbcb6 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -121,7 +121,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 	 * this lock.
 	 */
 	if (!exclusive)
-		return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+		return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
 			-EBUSY : 0;
 
 	/* lock is either WRITE or DESTROY - should be exclusive */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..183cc5418722 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
 	trace_afs_notify_call(rxcall, call);
 	call->need_attention = true;
 
-	u = __atomic_add_unless(&call->usage, 1, 0);
+	u = atomic_fetch_add_unless(&call->usage, 1, 0);
 	if (u != 0) {
 		trace_afs_call(call, afs_call_trace_wake, u,
 			       atomic_read(&call->net->nr_outstanding_calls),
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index ec07f23678ea..b8b14cc2df6c 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -84,10 +84,10 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne
 }
 #endif
 
-static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	kasan_check_write(v, sizeof(*v));
-	return __arch_atomic_add_unless(v, a, u);
+	return arch_atomic_fetch_add_unless(v, a, u);
 }
 
 
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index abe6dd9ca2a8..10051ed6d088 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -221,8 +221,8 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-#ifndef __atomic_add_unless
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#ifndef atomic_fetch_add_unless
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 01ce3997cb42..9cc982936675 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -530,7 +530,7 @@
  */
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
-	return __atomic_add_unless(v, a, u) != u;
+	return atomic_fetch_add_unless(v, a, u) != u;
 }
 
 /**
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35dc466641f2..f12db70d3bf3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -575,7 +575,7 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
 {
 	int refold;
 
-	refold = __atomic_add_unless(&map->refcnt, 1, 0);
+	refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
 
 	if (refold >= BPF_MAX_REFCNT) {
 		__bpf_map_put(map, false);
@@ -1142,7 +1142,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 {
 	int refold;
 
-	refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
+	refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
 
 	if (refold >= BPF_MAX_REFCNT) {
 		__bpf_prog_put(prog, false);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f6734d8cb01a..9486293fef5c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -415,7 +415,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 bool rxrpc_queue_call(struct rxrpc_call *call)
 {
 	const void *here = __builtin_return_address(0);
-	int n = __atomic_add_unless(&call->usage, 1, 0);
+	int n = atomic_fetch_add_unless(&call->usage, 1, 0);
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&call->processor))
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4c77a78a252a..77440a356b14 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -266,7 +266,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
 bool rxrpc_queue_conn(struct rxrpc_connection *conn)
 {
 	const void *here = __builtin_return_address(0);
-	int n = __atomic_add_unless(&conn->usage, 1, 0);
+	int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&conn->processor))
@@ -309,7 +309,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
 	const void *here = __builtin_return_address(0);
 
 	if (conn) {
-		int n = __atomic_add_unless(&conn->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
 		else
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b493e6b62740..777c3ed4cfc0 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -305,7 +305,7 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
 	const void *here = __builtin_return_address(0);
 
 	if (local) {
-		int n = __atomic_add_unless(&local->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&local->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
 		else
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 1b7e8107b3ae..1cf3b408017a 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -406,7 +406,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
 	const void *here = __builtin_return_address(0);
 
 	if (peer) {
-		int n = __atomic_add_unless(&peer->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
 		else
-- 
cgit v1.2.3


From f74445b6dd6b8f33ed34e005d19ecbb49171dabf Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:06 +0100
Subject: atomics/treewide: Remove atomic_inc_not_zero_hint()

While documentation suggests atomic_inc_not_zero_hint() will perform better
than atomic_inc_not_zero(), this is unlikely to be the case. No architectures
implement atomic_inc_not_zero_hint() directly, and thus it either falls back to
atomic_inc_not_zero(), or a loop using atomic_cmpxchg().

Whenever the hint does not match the value in memory, the repeated use of
atomic_cmpxchg() will be more expensive than the read that
atomic_inc_not_zero_hint() attempts to avoid. For architectures with LL/SC
atomics, a read cannot be avoided, and it would always be better to use
atomic_inc_not_zero() directly. For other architectures, their own
atomic_inc_not_zero() is likely to be more optimal than an atomic_cmpxchg()
loop regardless.

Generally, atomic_inc_not_zero_hint() is liable to perform worse than
atomic_inc_not_zero(). Further, atomic_inc_not_zero_hint() only exists
for atomic_t, and not atomic64_t or atomic_long_t, and there is only one
user in the kernel tree.

Given all this, let's remove atomic_inc_not_zero_hint(), and migrate the
existing user over to atomic_inc_not_zero().

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-4-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 32 --------------------------------
 net/atm/pppoatm.c      |  2 +-
 2 files changed, 1 insertion(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 9cc982936675..5c5620ae5a35 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -571,38 +571,6 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
 }
 #endif
 
-/**
- * atomic_inc_not_zero_hint - increment if not null
- * @v: pointer of type atomic_t
- * @hint: probable value of the atomic before the increment
- *
- * This version of atomic_inc_not_zero() gives a hint of probable
- * value of the atomic. This helps processor to not read the memory
- * before doing the atomic read/modify/write cycle, lowering
- * number of bus transactions on some arches.
- *
- * Returns: 0 if increment was not done, 1 otherwise.
- */
-#ifndef atomic_inc_not_zero_hint
-static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
-{
-	int val, c = hint;
-
-	/* sanity test, should be removed by compiler if hint is a constant */
-	if (!hint)
-		return atomic_inc_not_zero(v);
-
-	do {
-		val = atomic_cmpxchg(v, c, c + 1);
-		if (val == c)
-			return 1;
-		c = val;
-	} while (c);
-
-	return 0;
-}
-#endif
-
 #ifndef atomic_inc_unless_negative
 static inline int atomic_inc_unless_negative(atomic_t *p)
 {
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index af8c4b38b746..d84227d75717 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -244,7 +244,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
 	 * the packet count limit, so...
 	 */
 	if (atm_may_send(pvcc->atmvcc, size) &&
-	    atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT))
+	    atomic_inc_not_zero(&pvcc->inflight))
 		return 1;
 
 	/*
-- 
cgit v1.2.3


From ade5ef9280c33993099199c51e2e27c2c4013afd Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:07 +0100
Subject: atomics: Make conditional ops return 'bool'

Some of the atomics return a status value, which is a boolean value
describing whether the operation was performed. To make it clear that
this is a boolean value, let's update the common fallbacks to return
bool, fixing up the return values and comments likewise.

At the same time, let's simplify the description of the operations in
their respective comments.

The instrumented atomics and generic atomic64 implementation are updated
accordingly.

Note that atomic64_dec_if_positive() doesn't follow the usual test op
pattern, and returns the would-be decremented value. This is not
changed.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-5-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h |  2 +-
 include/asm-generic/atomic64.h            |  3 ++-
 include/linux/atomic.h                    | 24 +++++++++++++-----------
 lib/atomic64.c                            |  6 +++---
 4 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index b8b14cc2df6c..497faa4a05e3 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -205,7 +205,7 @@ static __always_inline s64 atomic64_dec_return(atomic64_t *v)
 	return arch_atomic64_dec_return(v);
 }
 
-static __always_inline s64 atomic64_inc_not_zero(atomic64_t *v)
+static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_not_zero(v);
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 8d28eb010d0d..a951a721e1bb 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -11,6 +11,7 @@
  */
 #ifndef _ASM_GENERIC_ATOMIC64_H
 #define _ASM_GENERIC_ATOMIC64_H
+#include <linux/types.h>
 
 typedef struct {
 	long long counter;
@@ -52,7 +53,7 @@ ATOMIC64_OPS(xor)
 extern long long atomic64_dec_if_positive(atomic64_t *v);
 extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
 extern long long atomic64_xchg(atomic64_t *v, long long new);
-extern int	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
+extern bool	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
 
 #define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 5c5620ae5a35..307a7f6d619a 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -2,6 +2,8 @@
 /* Atomic operations usable in machine independent code */
 #ifndef _LINUX_ATOMIC_H
 #define _LINUX_ATOMIC_H
+#include <linux/types.h>
+
 #include <asm/atomic.h>
 #include <asm/barrier.h>
 
@@ -525,10 +527,10 @@
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
  *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
  */
-static inline int atomic_add_unless(atomic_t *v, int a, int u)
+static inline bool atomic_add_unless(atomic_t *v, int a, int u)
 {
 	return atomic_fetch_add_unless(v, a, u) != u;
 }
@@ -537,8 +539,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
  * atomic_inc_not_zero - increment unless the number is zero
  * @v: pointer of type atomic_t
  *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
  */
 #ifndef atomic_inc_not_zero
 #define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
@@ -572,28 +574,28 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_inc_unless_negative
-static inline int atomic_inc_unless_negative(atomic_t *p)
+static inline bool atomic_inc_unless_negative(atomic_t *p)
 {
 	int v, v1;
 	for (v = 0; v >= 0; v = v1) {
 		v1 = atomic_cmpxchg(p, v, v + 1);
 		if (likely(v1 == v))
-			return 1;
+			return true;
 	}
-	return 0;
+	return false;
 }
 #endif
 
 #ifndef atomic_dec_unless_positive
-static inline int atomic_dec_unless_positive(atomic_t *p)
+static inline bool atomic_dec_unless_positive(atomic_t *p)
 {
 	int v, v1;
 	for (v = 0; v <= 0; v = v1) {
 		v1 = atomic_cmpxchg(p, v, v - 1);
 		if (likely(v1 == v))
-			return 1;
+			return true;
 	}
-	return 0;
+	return false;
 }
 #endif
 
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 53c2d5edc826..4230f4b8906c 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -178,16 +178,16 @@ long long atomic64_xchg(atomic64_t *v, long long new)
 }
 EXPORT_SYMBOL(atomic64_xchg);
 
-int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
-	int ret = 0;
+	bool ret = false;
 
 	raw_spin_lock_irqsave(lock, flags);
 	if (v->counter != u) {
 		v->counter += a;
-		ret = 1;
+		ret = true;
 	}
 	raw_spin_unlock_irqrestore(lock, flags);
 	return ret;
-- 
cgit v1.2.3


From bef828204a1bc7a0fd3a24551c4265e9c2ab95ed Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:08 +0100
Subject: atomics/treewide: Make atomic64_inc_not_zero() optional

We define a trivial fallback for atomic_inc_not_zero(), but don't do
the same for atomic64_inc_not_zero(), leading most architectures to
define the same boilerplate.

Let's add a fallback in <linux/atomic.h>, and remove the redundant
implementations. Note that atomic64_add_unless() is always defined in
<linux/atomic.h>, and promotes its arguments to the requisite types, so
we need not do this explicitly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-6-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  2 --
 arch/arc/include/asm/atomic.h             |  1 -
 arch/arm/include/asm/atomic.h             |  1 -
 arch/arm64/include/asm/atomic.h           |  2 --
 arch/ia64/include/asm/atomic.h            |  2 --
 arch/mips/include/asm/atomic.h            |  2 --
 arch/parisc/include/asm/atomic.h          |  2 --
 arch/powerpc/include/asm/atomic.h         |  1 +
 arch/riscv/include/asm/atomic.h           |  7 -------
 arch/s390/include/asm/atomic.h            |  1 -
 arch/sparc/include/asm/atomic_64.h        |  2 --
 arch/x86/include/asm/atomic64_32.h        |  2 +-
 arch/x86/include/asm/atomic64_64.h        |  2 --
 include/asm-generic/atomic-instrumented.h |  3 +++
 include/asm-generic/atomic64.h            |  1 -
 include/linux/atomic.h                    | 11 +++++++++++
 16 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 392b15a4dd4f..eb0f25e4c5dd 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -296,8 +296,6 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return old - 1;
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
 #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
 
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index cecdf3403caf..1406825b5e7d 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -603,7 +603,6 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
 #endif	/* !CONFIG_GENERIC_ATOMIC64 */
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9d56d0727c9b..02f3894faa48 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -534,7 +534,6 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
 #endif /* !CONFIG_GENERIC_ATOMIC64 */
 #endif
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 264d20339f74..ad50412889c5 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -204,7 +204,5 @@
 #define atomic64_add_unless(v, a, u)	(___atomic_add_unless(v, a, u, 64) != u)
 #define atomic64_andnot			atomic64_andnot
 
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
-
 #endif
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 9d2ddde5f9d5..93d48b823220 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -246,8 +246,6 @@ static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long c, old, dec;
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 02fc1553cf9b..502e691c6393 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -644,8 +644,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v) atomic64_add_return(1, (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 7748abced766..3fd0243bf405 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -305,8 +305,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 1483261080a1..e59620ee4f6b 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -582,6 +582,7 @@ static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
 
 	return t1 != 0;
 }
+#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
 
 #endif /* __powerpc64__ */
 
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 0e27e050ba14..18259e90f57e 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -375,13 +375,6 @@ static __always_inline int atomic64_add_unless(atomic64_t *v, long a, long u)
 }
 #endif
 
-#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
-{
-        return atomic64_add_unless(v, 1, 0);
-}
-#endif
-
 /*
  * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
  * {cmp,}xchg and the operations that return, so they need a full barrier.
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index c2858cdd8c29..66dac30a4fe1 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -212,6 +212,5 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_dec(_v)		atomic64_sub(1, _v)
 #define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
 #define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index f416fd3d2708..07830a316464 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -123,8 +123,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 long atomic64_dec_if_positive(atomic64_t *v);
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 92212bf0484f..2a33cc17801b 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -295,7 +295,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
 	return (int)a;
 }
 
-
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
 static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 {
 	int r;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6106b59d3260..6f95023894b7 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -207,8 +207,6 @@ static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
 	return true;
 }
 
-#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0)
-
 /*
  * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 497faa4a05e3..83bb88d791c4 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -205,11 +205,14 @@ static __always_inline s64 atomic64_dec_return(atomic64_t *v)
 	return arch_atomic64_dec_return(v);
 }
 
+#ifdef arch_atomic64_inc_not_zero
+#define atomic64_inc_not_zero atomic64_inc_not_zero
 static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_not_zero(v);
 }
+#endif
 
 static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
 {
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index a951a721e1bb..5105275ac825 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -63,6 +63,5 @@ extern bool	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-#define atomic64_inc_not_zero(v) 	atomic64_add_unless((v), 1LL, 0LL)
 
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 307a7f6d619a..ae3f30923d05 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -1019,6 +1019,17 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
 #endif /* atomic64_try_cmpxchg */
 
+/**
+ * atomic64_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
+ */
+#ifndef atomic64_inc_not_zero
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
+#endif
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
-- 
cgit v1.2.3


From eccc2da8c03f316bba202e15af2be4615f461900 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:09 +0100
Subject: atomics/treewide: Make atomic_fetch_add_unless() optional

Several architectures these have a near-identical implementation based
on atomic_read() and atomic_cmpxchg() which we can instead define in
<linux/atomic.h>, so let's do so, using something close to the existing
x86 implementation with try_cmpxchg().

Where an architecture provides its own atomic_fetch_add_unless(), it
must define a preprocessor symbol for it. The instrumented atomics are
updated accordingly.

Note that arch/arc's existing atomic_fetch_add_unless() had redundant
barriers, as these are already present in its atomic_cmpxchg()
implementation.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Link: https://lore.kernel.org/lkml/20180621121321.4761-7-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  2 +-
 arch/arc/include/asm/atomic.h             | 28 ----------------------------
 arch/arm/include/asm/atomic.h             | 11 +----------
 arch/arm64/include/asm/atomic.h           |  1 -
 arch/h8300/include/asm/atomic.h           |  1 +
 arch/hexagon/include/asm/atomic.h         |  1 +
 arch/ia64/include/asm/atomic.h            | 16 ----------------
 arch/m68k/include/asm/atomic.h            | 15 ---------------
 arch/mips/include/asm/atomic.h            | 24 ------------------------
 arch/parisc/include/asm/atomic.h          | 24 ------------------------
 arch/powerpc/include/asm/atomic.h         |  1 +
 arch/riscv/include/asm/atomic.h           |  1 +
 arch/s390/include/asm/atomic.h            | 15 ---------------
 arch/sh/include/asm/atomic.h              | 25 -------------------------
 arch/sparc/include/asm/atomic_32.h        |  2 ++
 arch/sparc/include/asm/atomic_64.h        | 15 ---------------
 arch/x86/include/asm/atomic.h             | 21 ---------------------
 arch/xtensa/include/asm/atomic.h          | 24 ------------------------
 include/asm-generic/atomic-instrumented.h |  4 +++-
 include/asm-generic/atomic.h              | 11 -----------
 include/linux/atomic.h                    | 23 +++++++++++++++++++++++
 21 files changed, 34 insertions(+), 231 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index eb0f25e4c5dd..4a800a3424a3 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -235,7 +235,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	smp_mb();
 	return old;
 }
-
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 /**
  * atomic64_add_unless - add unless the number is a given value
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 1406825b5e7d..60da80481c5d 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -308,34 +308,6 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v
- */
-#define atomic_fetch_add_unless(v, a, u)					\
-({									\
-	int c, old;							\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	c = atomic_read(v);						\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
-		c = old;						\
-									\
-	smp_mb();							\
-									\
-	c;								\
-})
-
 #define atomic_inc(v)			atomic_add(1, v)
 #define atomic_dec(v)			atomic_sub(1, v)
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 02f3894faa48..74460aa00fa0 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -156,6 +156,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 	return oldval;
 }
+#define atomic_fetch_add_unless		atomic_fetch_add_unless
 
 #else /* ARM_ARCH_6 */
 
@@ -215,16 +216,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-
-	c = atomic_read(v);
-	while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c)
-		c = old;
-	return c;
-}
-
 #endif /* __LINUX_ARM_ARCH__ */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index ad50412889c5..22c8c43d6689 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -125,7 +125,6 @@
 #define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
-#define atomic_fetch_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
 /*
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 5c856887fdf2..710364946308 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -106,5 +106,6 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	arch_local_irq_restore(flags);
 	return ret;
 }
+#define atomic_fetch_add_unless		atomic_fetch_add_unless
 
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index d2feeba93c44..86c67e9adbfa 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -196,6 +196,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	);
 	return __oldval;
 }
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 #define atomic_inc(v) atomic_add(1, (v))
 #define atomic_dec(v) atomic_sub(1, (v))
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 93d48b823220..cfe44086338e 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,22 +215,6 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
-
 static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
 {
 	long c, old;
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 8022d9ea1213..596882cda224 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -211,19 +211,4 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 502e691c6393..794734e730d9 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -274,30 +274,6 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
 #define atomic_inc_return(v) atomic_add_return(1, (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 3fd0243bf405..b2b6261d05e7 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -77,30 +77,6 @@ static __inline__ int atomic_read(const atomic_t *v)
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define ATOMIC_OP(op, c_op)						\
 static __inline__ void atomic_##op(int i, atomic_t *v)			\
 {									\
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index e59620ee4f6b..b5646c079c16 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -248,6 +248,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 	return t;
 }
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 /**
  * atomic_inc_not_zero - increment unless the number is zero
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 18259e90f57e..5f161daefcd2 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -349,6 +349,7 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 		: "memory");
 	return prev;
 }
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 66dac30a4fe1..26c6b713a7a3 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -90,21 +90,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return __atomic_cmpxchg(&v->counter, old, new);
 }
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == u))
-			break;
-		old = atomic_cmpxchg(v, c, c + a);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define ATOMIC64_INIT(i)  { (i) }
 
 static inline long atomic64_read(const atomic64_t *v)
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index ef45931ebac5..422fac764ca1 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -45,31 +45,6 @@
 #define atomic_xchg(v, new)		(xchg(&((v)->counter), new))
 #define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-
-	return c;
-}
-
 #endif /* CONFIG_CPU_J2 */
 
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index a58f4b43bcc7..9d7a15acc0c5 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -30,6 +30,8 @@ int atomic_xchg(atomic_t *, int);
 int atomic_fetch_add_unless(atomic_t *, int, int);
 void atomic_set(atomic_t *, int);
 
+#define atomic_fetch_add_unless	atomic_fetch_add_unless
+
 #define atomic_set_release(v, i)	atomic_set((v), (i))
 
 #define atomic_read(v)          READ_ONCE((v)->counter)
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 07830a316464..e4f1c93db31f 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -89,21 +89,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 84ed0bd76aef..616327ac9d39 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -253,27 +253,6 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 	return val;
 }
 
-/**
- * arch_atomic_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns the old value of @v.
- */
-static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c = arch_atomic_read(v);
-
-	do {
-		if (unlikely(c == u))
-			break;
-	} while (!arch_atomic_try_cmpxchg(v, &c, c + a));
-
-	return c;
-}
-
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
 #else
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 4188e56c06c9..f4c9f82c40c6 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -274,30 +274,6 @@ ATOMIC_OPS(xor)
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 83bb88d791c4..1f9b2a767d3c 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -84,12 +84,14 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne
 }
 #endif
 
+#ifdef arch_atomic_fetch_add_unless
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_unless(v, a, u);
 }
-
+#endif
 
 static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 10051ed6d088..757e45821220 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -221,15 +221,4 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-#ifndef atomic_fetch_add_unless
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c)
-		c = old;
-	return c;
-}
-#endif
-
 #endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index ae3f30923d05..b89ba36cab94 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -521,6 +521,29 @@
 #endif
 #endif /* xchg_relaxed */
 
+/**
+ * atomic_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns the original value of @v.
+ */
+#ifndef atomic_fetch_add_unless
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c == u))
+			break;
+	} while (!atomic_try_cmpxchg(v, &c, c + a));
+
+	return c;
+}
+#endif
+
 /**
  * atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
-- 
cgit v1.2.3


From 0ae1d994020d75ac065fd42ac4cbf5ac6ce9b255 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:10 +0100
Subject: atomics: Prepare for atomic64_fetch_add_unless()

Currently all architectures must implement atomic_fetch_add_unless(),
with common code providing atomic_add_unless(). Architectures must also
implement atomic64_add_unless() directly, with no corresponding
atomic64_fetch_add_unless().

This divergence is unfortunate, and means that the APIs for atomic_t,
atomic64_t, and atomic_long_t differ.

In preparation for unifying things, with architectures providing
atomic64_fetch_add_unless, this patch adds a generic
atomic64_add_unless() which will use atomic64_fetch_add_unless(). The
instrumented atomics are updated to take this case into account.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Albert Ou <albert@sifive.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Link: https://lore.kernel.org/lkml/20180621121321.4761-8-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h |  9 +++++++++
 include/linux/atomic.h                    | 16 ++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 1f9b2a767d3c..444bf2f9d54d 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -93,11 +93,20 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 }
 #endif
 
+#ifdef arch_atomic64_fetch_add_unless
+#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_add_unless(v, a, u);
+}
+#else
 static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_add_unless(v, a, u);
 }
+#endif
 
 static __always_inline void atomic_inc(atomic_t *v)
 {
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index b89ba36cab94..3c03de648007 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -1042,6 +1042,22 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
 #endif /* atomic64_try_cmpxchg */
 
+/**
+ * atomic64_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
+ */
+#ifdef atomic64_fetch_add_unless
+static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+	return atomic64_fetch_add_unless(v, a, u) != u;
+}
+#endif
+
 /**
  * atomic64_inc_not_zero - increment unless the number is zero
  * @v: pointer of type atomic64_t
-- 
cgit v1.2.3


From 356701329fb391184618eda7b7fb68cb35271506 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:17 +0100
Subject: atomics/treewide: Make atomic64_fetch_add_unless() optional

Architectures with atomic64_fetch_add_unless() provide a preprocessor
symbol if they do so, and all other architectures have trivial C
implementations of atomic64_add_unless() which are near-identical.

Let's unify the trivial definitions of atomic64_fetch_add_unless() in
<linux/atomic.h>, so that we always have both
atomic64_fetch_add_unless() and atomic64_add_unless() with less
boilerplate code.

This means that atomic64_add_unless() is always implemented in core
code, and the instrumented atomics are updated accordingly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-15-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/include/asm/atomic.h           | 12 ------------
 arch/ia64/include/asm/atomic.h            | 15 ---------------
 arch/mips/include/asm/atomic.h            | 24 ------------------------
 arch/parisc/include/asm/atomic.h          | 24 ------------------------
 arch/s390/include/asm/atomic.h            | 16 ----------------
 arch/sparc/include/asm/atomic_64.h        | 15 ---------------
 arch/x86/include/asm/atomic64_64.h        | 19 -------------------
 include/asm-generic/atomic-instrumented.h |  6 ------
 include/linux/atomic.h                    | 26 ++++++++++++++++++++++++--
 9 files changed, 24 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 22c8c43d6689..82db0e4febd4 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -40,17 +40,6 @@
 
 #include <asm/cmpxchg.h>
 
-#define ___atomic_add_unless(v, a, u, sfx)				\
-({									\
-	typeof((v)->counter) c, old;					\
-									\
-	c = atomic##sfx##_read(v);					\
-	while (c != (u) &&						\
-	      (old = atomic##sfx##_cmpxchg((v), c, c + (a))) != c)	\
-		c = old;						\
-	c;								\
- })
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)			READ_ONCE((v)->counter)
@@ -200,7 +189,6 @@
 #define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
 #define atomic64_sub_and_test(i, v)	(atomic64_sub_return((i), (v)) == 0)
 #define atomic64_add_negative(i, v)	(atomic64_add_return((i), (v)) < 0)
-#define atomic64_add_unless(v, a, u)	(___atomic_add_unless(v, a, u, 64) != u)
 #define atomic64_andnot			atomic64_andnot
 
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index cfe44086338e..0f80a3eafaba 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,21 +215,6 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long c, old, dec;
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 794734e730d9..d42b27df1548 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -596,30 +596,6 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-/**
- * atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns true iff @v was not @u.
- */
-static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v) atomic64_add_return(1, (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index b2b6261d05e7..f53ba2d6ff67 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -257,30 +257,6 @@ atomic64_read(const atomic64_t *v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-/**
- * atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 26c6b713a7a3..eb9329741bad 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -153,22 +153,6 @@ ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
 
-static inline int atomic64_add_unless(atomic64_t *v, long i, long u)
-{
-	long c, old;
-
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == u))
-			break;
-		old = atomic64_cmpxchg(v, c, c + i);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != u;
-}
-
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long c, old, dec;
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index e4f1c93db31f..458783e99997 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -93,21 +93,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 long atomic64_dec_if_positive(atomic64_t *v);
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6f95023894b7..7e04b294e6eb 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -188,25 +188,6 @@ static inline long arch_atomic64_xchg(atomic64_t *v, long new)
 	return xchg(&v->counter, new);
 }
 
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	s64 c = arch_atomic64_read(v);
-	do {
-		if (unlikely(c == u))
-			return false;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
-	return true;
-}
-
 /*
  * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 444bf2f9d54d..2b487f28ef35 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -100,12 +100,6 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_unless(v, a, u);
 }
-#else
-static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_add_unless(v, a, u);
-}
 #endif
 
 static __always_inline void atomic_inc(atomic_t *v)
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 3c03de648007..530562ac7909 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -1042,6 +1042,30 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
 #endif /* atomic64_try_cmpxchg */
 
+/**
+ * atomic64_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns the original value of @v.
+ */
+#ifndef atomic64_fetch_add_unless
+static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
+						  long long u)
+{
+	long long c = atomic64_read(v);
+
+	do {
+		if (unlikely(c == u))
+			break;
+	} while (!atomic64_try_cmpxchg(v, &c, c + a));
+
+	return c;
+}
+#endif
+
 /**
  * atomic64_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
@@ -1051,12 +1075,10 @@ static inline int atomic_dec_if_positive(atomic_t *v)
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-#ifdef atomic64_fetch_add_unless
 static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	return atomic64_fetch_add_unless(v, a, u) != u;
 }
-#endif
 
 /**
  * atomic64_inc_not_zero - increment unless the number is zero
-- 
cgit v1.2.3


From 18cc1814d4e7560412c9c8c6d28f9d6782c8b402 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:18 +0100
Subject: atomics/treewide: Make test ops optional

Some of the atomics return the result of a test applied after the atomic
operation, and almost all architectures implement these as trivial
wrappers around the underlying atomic. Specifically:

 * <atomic>_inc_and_test(v)    is (<atomic>_inc_return(v)    == 0)
 * <atomic>_dec_and_test(v)    is (<atomic>_dec_return(v)    == 0)
 * <atomic>_sub_and_test(i, v) is (<atomic>_sub_return(i, v) == 0)
 * <atomic>_add_negative(i, v) is (<atomic>_add_return(i, v)  < 0)

Rather than have these definitions duplicated in all architectures, with
minor inconsistencies in formatting and documentation, let's make these
operations optional, with default fallbacks as above. Implementations
must now provide a preprocessor symbol.

The instrumented atomics are updated accordingly.

Both x86 and m68k have custom implementations, which are left as-is,
given preprocessor symbols to avoid being overridden.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-16-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  12 ---
 arch/arc/include/asm/atomic.h             |  10 ---
 arch/arm/include/asm/atomic.h             |   9 ---
 arch/arm64/include/asm/atomic.h           |   8 --
 arch/h8300/include/asm/atomic.h           |   5 --
 arch/hexagon/include/asm/atomic.h         |   5 --
 arch/ia64/include/asm/atomic.h            |  23 ------
 arch/m68k/include/asm/atomic.h            |   4 +
 arch/mips/include/asm/atomic.h            |  84 --------------------
 arch/parisc/include/asm/atomic.h          |  22 ------
 arch/powerpc/include/asm/atomic.h         |  30 --------
 arch/riscv/include/asm/atomic.h           |  46 -----------
 arch/s390/include/asm/atomic.h            |   8 --
 arch/sh/include/asm/atomic.h              |   4 -
 arch/sparc/include/asm/atomic_32.h        |  15 ----
 arch/sparc/include/asm/atomic_64.h        |  20 -----
 arch/x86/include/asm/atomic.h             |   4 +
 arch/x86/include/asm/atomic64_32.h        |  54 -------------
 arch/x86/include/asm/atomic64_64.h        |   4 +
 arch/xtensa/include/asm/atomic.h          |  42 ----------
 include/asm-generic/atomic-instrumented.h |  24 ++++++
 include/asm-generic/atomic.h              |   9 ---
 include/asm-generic/atomic64.h            |   4 -
 include/linux/atomic.h                    | 124 ++++++++++++++++++++++++++++++
 24 files changed, 160 insertions(+), 410 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index cc486dbb3837..25f8693c5a42 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -297,24 +297,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return old - 1;
 }
 
-#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
-#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
-
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic64_dec_return(v) atomic64_sub_return(1,(v))
 
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 #define atomic64_inc_return(v) atomic64_add_return(1,(v))
 
-#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
-#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
-
-#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
-#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0)
-
-#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
-#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
-
 #define atomic_inc(v) atomic_add(1,(v))
 #define atomic64_inc(v) atomic64_add(1,(v))
 
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4917ffa61579..4222e726f84c 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -311,14 +311,8 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #define atomic_inc(v)			atomic_add(1, v)
 #define atomic_dec(v)			atomic_sub(1, v)
 
-#define atomic_inc_and_test(v)		(atomic_add_return(1, v) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return(1, v) == 0)
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
-
-#define atomic_add_negative(i, v)	(atomic_add_return(i, v) < 0)
-
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 
@@ -566,14 +560,10 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 
 #endif	/* !CONFIG_GENERIC_ATOMIC64 */
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 852e1fee72b0..35fb7f504daa 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -248,13 +248,8 @@ ATOMIC_OPS(xor, ^=, eor)
 #define atomic_inc(v)		atomic_add(1, v)
 #define atomic_dec(v)		atomic_sub(1, v)
 
-#define atomic_inc_and_test(v)	(atomic_add_return(1, v) == 0)
-#define atomic_dec_and_test(v)	(atomic_sub_return(1, v) == 0)
 #define atomic_inc_return_relaxed(v)    (atomic_add_return_relaxed(1, v))
 #define atomic_dec_return_relaxed(v)    (atomic_sub_return_relaxed(1, v))
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
-
-#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 typedef struct {
@@ -517,14 +512,10 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
 #define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1LL, (v))
-#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 
 #endif /* !CONFIG_GENERIC_ATOMIC64 */
 #endif
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 82db0e4febd4..edbe53fa3106 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -110,10 +110,6 @@
 
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
-#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
-#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
 #define atomic_andnot			atomic_andnot
 
 /*
@@ -185,10 +181,6 @@
 
 #define atomic64_inc(v)			atomic64_add(1, (v))
 #define atomic64_dec(v)			atomic64_sub(1, (v))
-#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
-#define atomic64_sub_and_test(i, v)	(atomic64_sub_return((i), (v)) == 0)
-#define atomic64_add_negative(i, v)	(atomic64_add_return((i), (v)) < 0)
 #define atomic64_andnot			atomic64_andnot
 
 #endif
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 710364946308..8977b5157c8f 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -69,17 +69,12 @@ ATOMIC_OPS(sub, -=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-#define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
-
 #define atomic_inc_return(v)		atomic_add_return(1, v)
 #define atomic_dec_return(v)		atomic_sub_return(1, v)
 
 #define atomic_inc(v)			(void)atomic_inc_return(v)
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
 
 #define atomic_dec(v)			(void)atomic_dec_return(v)
-#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 86c67e9adbfa..31638f511674 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -201,11 +201,6 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 #define atomic_inc(v) atomic_add(1, (v))
 #define atomic_dec(v) atomic_sub(1, (v))
 
-#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
-#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, (v)) == 0)
-#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
-
 #define atomic_inc_return(v) (atomic_add_return(1, v))
 #define atomic_dec_return(v) (atomic_sub_return(1, v))
 
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 0f80a3eafaba..e4143c462e65 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -231,34 +231,11 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-/*
- * Atomically add I to V and return TRUE if the resulting value is
- * negative.
- */
-static __inline__ int
-atomic_add_negative (int i, atomic_t *v)
-{
-	return atomic_add_return(i, v) < 0;
-}
-
-static __inline__ long
-atomic64_add_negative (__s64 i, atomic64_t *v)
-{
-	return atomic64_add_return(i, v) < 0;
-}
-
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1, (v))
 
-#define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
-#define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
-#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i), (v)) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_sub_return(1, (v)) == 0)
-#define atomic64_inc_and_test(v)	(atomic64_add_return(1, (v)) == 0)
-
 #define atomic_add(i,v)			(void)atomic_add_return((i), (v))
 #define atomic_sub(i,v)			(void)atomic_sub_return((i), (v))
 #define atomic_inc(v)			atomic_add(1, (v))
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 596882cda224..9df09c876fa2 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -138,6 +138,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
 	__asm__ __volatile__("subql #1,%1; seq %0" : "=d" (c), "+m" (*v));
 	return c != 0;
 }
+#define atomic_dec_and_test atomic_dec_and_test
 
 static inline int atomic_dec_and_test_lt(atomic_t *v)
 {
@@ -155,6 +156,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
 	__asm__ __volatile__("addql #1,%1; seq %0" : "=d" (c), "+m" (*v));
 	return c != 0;
 }
+#define atomic_inc_and_test atomic_inc_and_test
 
 #ifdef CONFIG_RMW_INSNS
 
@@ -201,6 +203,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
 			     : ASM_DI (i));
 	return c != 0;
 }
+#define atomic_sub_and_test atomic_sub_and_test
 
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
@@ -210,5 +213,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 			     : ASM_DI (i));
 	return c != 0;
 }
+#define atomic_add_negative atomic_add_negative
 
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index d42b27df1548..fd3008ae164c 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -277,37 +277,6 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
 #define atomic_inc_return(v) atomic_add_return(1, (v))
 
-/*
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-/*
- * atomic_dec_and_test - decrement by 1 and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
-
 /*
  * atomic_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
@@ -330,17 +299,6 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
  */
 #define atomic_dec(v) atomic_sub(1, (v))
 
-/*
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
-
 #ifdef CONFIG_64BIT
 
 #define ATOMIC64_INIT(i)    { (i) }
@@ -599,37 +557,6 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v) atomic64_add_return(1, (v))
 
-/*
- * atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
-
-/*
- * atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-
-/*
- * atomic64_dec_and_test - decrement by 1 and test
- * @v: pointer of type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic64_t
@@ -652,17 +579,6 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
  */
 #define atomic64_dec(v) atomic64_sub(1, (v))
 
-/*
- * atomic64_add_negative - add and test if negative
- * @v: pointer of type atomic64_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#define atomic64_add_negative(i, v) (atomic64_add_return(i, (v)) < 0)
-
 #endif /* CONFIG_64BIT */
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index f53ba2d6ff67..f85844ff6336 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -142,22 +142,6 @@ ATOMIC_OPS(xor, ^=)
 #define atomic_inc_return(v)	(atomic_add_return(   1,(v)))
 #define atomic_dec_return(v)	(atomic_add_return(  -1,(v)))
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_dec_and_test(v)	(atomic_dec_return(v) == 0)
-
-#define atomic_sub_and_test(i,v)	(atomic_sub_return((i),(v)) == 0)
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef CONFIG_64BIT
@@ -246,12 +230,6 @@ atomic64_read(const atomic64_t *v)
 #define atomic64_inc_return(v)		(atomic64_add_return(   1,(v)))
 #define atomic64_dec_return(v)		(atomic64_add_return(  -1,(v)))
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
-
-#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
-#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i),(v)) == 0)
-
 /* exported interface */
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 233dbf31911c..5d76f05d2be3 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -129,8 +129,6 @@ ATOMIC_OPS(xor, xor)
 #undef ATOMIC_OP_RETURN_RELAXED
 #undef ATOMIC_OP
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
 static __inline__ void atomic_inc(atomic_t *v)
 {
 	int t;
@@ -163,16 +161,6 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
 	return t;
 }
 
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
 static __inline__ void atomic_dec(atomic_t *v)
 {
 	int t;
@@ -281,9 +269,6 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v)
 }
 #define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
 
-#define atomic_sub_and_test(a, v)	(atomic_sub_return((a), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_dec_return((v)) == 0)
-
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1, even if
@@ -413,8 +398,6 @@ ATOMIC64_OPS(xor, xor)
 #undef ATOMIC64_OP_RETURN_RELAXED
 #undef ATOMIC64_OP
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
-
 static __inline__ void atomic64_inc(atomic64_t *v)
 {
 	long t;
@@ -445,16 +428,6 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
 	return t;
 }
 
-/*
- * atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-
 static __inline__ void atomic64_dec(atomic64_t *v)
 {
 	long t;
@@ -488,9 +461,6 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
 #define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
 #define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
 
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1.
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index d959bbaaad41..68eef0a805ca 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -209,36 +209,6 @@ ATOMIC_OPS(xor, xor, i)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-/*
- * The extra atomic operations that are constructed from one of the core
- * AMO-based operations above (aside from sub, which is easier to fit above).
- * These are required to perform a full barrier, but they're OK this way
- * because atomic_*_return is also required to perform a full barrier.
- *
- */
-#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix)		\
-static __always_inline							\
-bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)		\
-{									\
-	return atomic##prefix##_##func_op##_return(i, v) comp_op I;	\
-}
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, func_op, comp_op, I)				\
-        ATOMIC_OP(op, func_op, comp_op, I,  int,   )
-#else
-#define ATOMIC_OPS(op, func_op, comp_op, I)				\
-        ATOMIC_OP(op, func_op, comp_op, I,  int,   )			\
-        ATOMIC_OP(op, func_op, comp_op, I, long, 64)
-#endif
-
-ATOMIC_OPS(add_and_test, add, ==, 0)
-ATOMIC_OPS(sub_and_test, sub, ==, 0)
-ATOMIC_OPS(add_negative, add,  <, 0)
-
-#undef ATOMIC_OP
-#undef ATOMIC_OPS
-
 #define ATOMIC_OP(op, func_op, I, c_type, prefix)			\
 static __always_inline							\
 void atomic##prefix##_##op(atomic##prefix##_t *v)			\
@@ -315,22 +285,6 @@ ATOMIC_OPS(dec, add, +, -1)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-#define ATOMIC_OP(op, func_op, comp_op, I, prefix)			\
-static __always_inline							\
-bool atomic##prefix##_##op(atomic##prefix##_t *v)			\
-{									\
-	return atomic##prefix##_##func_op##_return(v) comp_op I;	\
-}
-
-ATOMIC_OP(inc_and_test, inc, ==, 0,   )
-ATOMIC_OP(dec_and_test, dec, ==, 0,   )
-#ifndef CONFIG_GENERIC_ATOMIC64
-ATOMIC_OP(inc_and_test, inc, ==, 0, 64)
-ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
-#endif
-
-#undef ATOMIC_OP
-
 /* This is required to provide a full barrier on success. */
 static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index eb9329741bad..7f5fbd595f01 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -55,17 +55,13 @@ static inline void atomic_add(int i, atomic_t *v)
 	__atomic_add(i, &v->counter);
 }
 
-#define atomic_add_negative(_i, _v)	(atomic_add_return(_i, _v) < 0)
 #define atomic_inc(_v)			atomic_add(1, _v)
 #define atomic_inc_return(_v)		atomic_add_return(1, _v)
-#define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
 #define atomic_sub(_i, _v)		atomic_add(-(int)(_i), _v)
 #define atomic_sub_return(_i, _v)	atomic_add_return(-(int)(_i), _v)
 #define atomic_fetch_sub(_i, _v)	atomic_fetch_add(-(int)(_i), _v)
-#define atomic_sub_and_test(_i, _v)	(atomic_sub_return(_i, _v) == 0)
 #define atomic_dec(_v)			atomic_sub(1, _v)
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
-#define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
 #define ATOMIC_OPS(op)							\
 static inline void atomic_##op(int i, atomic_t *v)			\
@@ -170,16 +166,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
 #define atomic64_inc(_v)		atomic64_add(1, _v)
 #define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
-#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long)(_i), _v)
 #define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long)(_i), _v)
-#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
 #define atomic64_dec(_v)		atomic64_sub(1, _v)
 #define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
-#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
 
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 422fac764ca1..d438494fa112 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -32,12 +32,8 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
-#define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 9d7a15acc0c5..3a26573790c6 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -51,19 +51,4 @@ void atomic_set(atomic_t *, int);
 #define atomic_inc_return(v)	(atomic_add_return(        1, (v)))
 #define atomic_dec_return(v)	(atomic_add_return(       -1, (v)))
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
-
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 458783e99997..634508282aea 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -56,32 +56,12 @@ ATOMIC_OPS(xor)
 #define atomic_inc_return(v)   atomic_add_return(1, v)
 #define atomic64_inc_return(v) atomic64_add_return(1, v)
 
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
-#define atomic64_sub_and_test(i, v) (atomic64_sub_return(i, v) == 0)
-
-#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
-#define atomic64_dec_and_test(v) (atomic64_sub_return(1, v) == 0)
-
 #define atomic_inc(v) atomic_add(1, v)
 #define atomic64_inc(v) atomic64_add(1, v)
 
 #define atomic_dec(v) atomic_sub(1, v)
 #define atomic64_dec(v) atomic64_sub(1, v)
 
-#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
-#define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0)
-
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 
 static inline int atomic_xchg(atomic_t *v, int new)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 616327ac9d39..73bda4abe180 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,6 +80,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
 static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
@@ -117,6 +118,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
 static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
@@ -130,6 +132,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
 static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
@@ -144,6 +147,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
+#define arch_atomic_add_negative arch_atomic_add_negative
 static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 2a33cc17801b..a26810d005e0 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -197,20 +197,6 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
 	return i;
 }
 
-/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v)
-{
-	return arch_atomic64_sub_return(i, v) == 0;
-}
-
 /**
  * arch_atomic64_inc - increment atomic64 variable
  * @v: pointer to type atomic64_t
@@ -235,46 +221,6 @@ static inline void arch_atomic64_dec(atomic64_t *v)
 			       "S" (v) : "memory", "eax", "ecx", "edx");
 }
 
-/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int arch_atomic64_dec_and_test(atomic64_t *v)
-{
-	return arch_atomic64_dec_return(v) == 0;
-}
-
-/**
- * atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_inc_and_test(atomic64_t *v)
-{
-	return arch_atomic64_inc_return(v) == 0;
-}
-
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int arch_atomic64_add_negative(long long i, atomic64_t *v)
-{
-	return arch_atomic64_add_return(i, v) < 0;
-}
-
 /**
  * arch_atomic64_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 7e04b294e6eb..6a65228a3db6 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,6 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
 static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
@@ -110,6 +111,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
 static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
@@ -123,6 +125,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
 static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
@@ -137,6 +140,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
+#define arch_atomic64_add_negative arch_atomic64_add_negative
 static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index f4c9f82c40c6..332ae4eca737 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -197,17 +197,6 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0)
-
 /**
  * atomic_inc - increment atomic variable
  * @v: pointer of type atomic_t
@@ -240,37 +229,6 @@ ATOMIC_OPS(xor)
  */
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#define atomic_dec_and_test(v) (atomic_sub_return(1,(v)) == 0)
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_add_return(1,(v)) == 0)
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#define atomic_add_negative(i,v) (atomic_add_return((i),(v)) < 0)
-
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 2b487f28ef35..6b64c200de73 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -225,29 +225,41 @@ static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
 	return arch_atomic64_dec_if_positive(v);
 }
 
+#ifdef arch_atomic_dec_and_test
+#define atomic_dec_and_test atomic_dec_and_test
 static __always_inline bool atomic_dec_and_test(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_dec_and_test(v);
 }
+#endif
 
+#ifdef arch_atomic64_dec_and_test
+#define atomic64_dec_and_test atomic64_dec_and_test
 static __always_inline bool atomic64_dec_and_test(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_dec_and_test(v);
 }
+#endif
 
+#ifdef arch_atomic_inc_and_test
+#define atomic_inc_and_test atomic_inc_and_test
 static __always_inline bool atomic_inc_and_test(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_inc_and_test(v);
 }
+#endif
 
+#ifdef arch_atomic64_inc_and_test
+#define atomic64_inc_and_test atomic64_inc_and_test
 static __always_inline bool atomic64_inc_and_test(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_and_test(v);
 }
+#endif
 
 static __always_inline int atomic_add_return(int i, atomic_t *v)
 {
@@ -333,29 +345,41 @@ static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v)
 	return arch_atomic64_fetch_xor(i, v);
 }
 
+#ifdef arch_atomic_sub_and_test
+#define atomic_sub_and_test atomic_sub_and_test
 static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_sub_and_test(i, v);
 }
+#endif
 
+#ifdef arch_atomic64_sub_and_test
+#define atomic64_sub_and_test atomic64_sub_and_test
 static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_sub_and_test(i, v);
 }
+#endif
 
+#ifdef arch_atomic_add_negative
+#define atomic_add_negative atomic_add_negative
 static __always_inline bool atomic_add_negative(int i, atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_add_negative(i, v);
 }
+#endif
 
+#ifdef arch_atomic64_add_negative
+#define atomic64_add_negative atomic64_add_negative
 static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_add_negative(i, v);
 }
+#endif
 
 static __always_inline unsigned long
 cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new, int size)
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 757e45821220..40cab858aaaa 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -186,11 +186,6 @@ ATOMIC_OP(xor, ^)
 
 #include <linux/irqflags.h>
 
-static inline int atomic_add_negative(int i, atomic_t *v)
-{
-	return atomic_add_return(i, v) < 0;
-}
-
 static inline void atomic_add(int i, atomic_t *v)
 {
 	atomic_add_return(i, v);
@@ -214,10 +209,6 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
 
-#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
-
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 49460107b29a..d3827ab97aa4 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -56,13 +56,9 @@ extern long long atomic64_xchg(atomic64_t *v, long long new);
 extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 530562ac7909..3ee8da9023cd 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -569,6 +569,68 @@ static inline bool atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
 #endif
 
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic_inc_and_test
+static inline bool atomic_inc_and_test(atomic_t *v)
+{
+	return atomic_inc_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+#ifndef atomic_dec_and_test
+static inline bool atomic_dec_and_test(atomic_t *v)
+{
+	return atomic_dec_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic_sub_and_test
+static inline bool atomic_sub_and_test(int i, atomic_t *v)
+{
+	return atomic_sub_return(i, v) == 0;
+}
+#endif
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+#ifndef atomic_add_negative
+static inline bool atomic_add_negative(int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+#endif
+
 #ifndef atomic_andnot
 static inline void atomic_andnot(int i, atomic_t *v)
 {
@@ -1091,6 +1153,68 @@ static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 #endif
 
+/**
+ * atomic64_inc_and_test - increment and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic64_inc_and_test
+static inline bool atomic64_inc_and_test(atomic64_t *v)
+{
+	return atomic64_inc_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+#ifndef atomic64_dec_and_test
+static inline bool atomic64_dec_and_test(atomic64_t *v)
+{
+	return atomic64_dec_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic64_sub_and_test
+static inline bool atomic64_sub_and_test(long long i, atomic64_t *v)
+{
+	return atomic64_sub_return(i, v) == 0;
+}
+#endif
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+#ifndef atomic64_add_negative
+static inline bool atomic64_add_negative(long long i, atomic64_t *v)
+{
+	return atomic64_add_return(i, v) < 0;
+}
+#endif
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
-- 
cgit v1.2.3


From 9837559d8eb01ce834e56fc9a567c1d94ebd3698 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:19 +0100
Subject: atomics/treewide: Make unconditional inc/dec ops optional

Many of the inc/dec ops are mandatory, but for most architectures inc/dec are
simply trivial wrappers around their corresponding add/sub ops.

Let's make all the inc/dec ops optional, so that we can get rid of these
boilerplate wrappers.

The instrumented atomics are updated accordingly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-17-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           | 12 -----
 arch/arc/include/asm/atomic.h             | 11 -----
 arch/arm/include/asm/atomic.h             | 11 -----
 arch/arm64/include/asm/atomic.h           | 24 ----------
 arch/h8300/include/asm/atomic.h           |  7 ---
 arch/hexagon/include/asm/atomic.h         |  6 ---
 arch/ia64/include/asm/atomic.h            |  9 ----
 arch/m68k/include/asm/atomic.h            |  5 +-
 arch/mips/include/asm/atomic.h            | 38 ----------------
 arch/parisc/include/asm/atomic.h          | 12 -----
 arch/powerpc/include/asm/atomic.h         |  4 ++
 arch/riscv/include/asm/atomic.h           | 76 -------------------------------
 arch/s390/include/asm/atomic.h            |  8 ----
 arch/sh/include/asm/atomic.h              |  6 ---
 arch/sparc/include/asm/atomic_32.h        |  5 --
 arch/sparc/include/asm/atomic_64.h        | 12 -----
 arch/x86/include/asm/atomic.h             |  5 +-
 arch/x86/include/asm/atomic64_32.h        |  4 ++
 arch/x86/include/asm/atomic64_64.h        |  5 +-
 arch/xtensa/include/asm/atomic.h          | 32 -------------
 include/asm-generic/atomic-instrumented.h | 24 ++++++++++
 include/asm-generic/atomic.h              | 13 ------
 include/asm-generic/atomic64.h            |  5 --
 include/linux/atomic.h                    | 48 +++++++++++++++++++
 24 files changed, 86 insertions(+), 296 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 25f8693c5a42..f6410cb68058 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -297,16 +297,4 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return old - 1;
 }
 
-#define atomic_dec_return(v) atomic_sub_return(1,(v))
-#define atomic64_dec_return(v) atomic64_sub_return(1,(v))
-
-#define atomic_inc_return(v) atomic_add_return(1,(v))
-#define atomic64_inc_return(v) atomic64_add_return(1,(v))
-
-#define atomic_inc(v) atomic_add(1,(v))
-#define atomic64_inc(v) atomic64_add(1,(v))
-
-#define atomic_dec(v) atomic_sub(1,(v))
-#define atomic64_dec(v) atomic64_sub(1,(v))
-
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4222e726f84c..27b95a928c1e 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -308,12 +308,6 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_inc(v)			atomic_add(1, v)
-#define atomic_dec(v)			atomic_sub(1, v)
-
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 
 #include <asm-generic/atomic64.h>
@@ -560,11 +554,6 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-
 #endif	/* !CONFIG_GENERIC_ATOMIC64 */
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 35fb7f504daa..5a58d061d3d2 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -245,12 +245,6 @@ ATOMIC_OPS(xor, ^=, eor)
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-#define atomic_inc(v)		atomic_add(1, v)
-#define atomic_dec(v)		atomic_sub(1, v)
-
-#define atomic_inc_return_relaxed(v)    (atomic_add_return_relaxed(1, v))
-#define atomic_dec_return_relaxed(v)    (atomic_sub_return_relaxed(1, v))
-
 #ifndef CONFIG_GENERIC_ATOMIC64
 typedef struct {
 	long long counter;
@@ -512,11 +506,6 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1LL, (v))
-#define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
-
 #endif /* !CONFIG_GENERIC_ATOMIC64 */
 #endif
 #endif
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index edbe53fa3106..078f785cd97f 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -50,21 +50,11 @@
 #define atomic_add_return_release	atomic_add_return_release
 #define atomic_add_return		atomic_add_return
 
-#define atomic_inc_return_relaxed(v)	atomic_add_return_relaxed(1, (v))
-#define atomic_inc_return_acquire(v)	atomic_add_return_acquire(1, (v))
-#define atomic_inc_return_release(v)	atomic_add_return_release(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
 #define atomic_sub_return_relaxed	atomic_sub_return_relaxed
 #define atomic_sub_return_acquire	atomic_sub_return_acquire
 #define atomic_sub_return_release	atomic_sub_return_release
 #define atomic_sub_return		atomic_sub_return
 
-#define atomic_dec_return_relaxed(v)	atomic_sub_return_relaxed(1, (v))
-#define atomic_dec_return_acquire(v)	atomic_sub_return_acquire(1, (v))
-#define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-
 #define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
 #define atomic_fetch_add_acquire	atomic_fetch_add_acquire
 #define atomic_fetch_add_release	atomic_fetch_add_release
@@ -108,8 +98,6 @@
 	cmpxchg_release(&((v)->counter), (old), (new))
 #define atomic_cmpxchg(v, old, new)	cmpxchg(&((v)->counter), (old), (new))
 
-#define atomic_inc(v)			atomic_add(1, (v))
-#define atomic_dec(v)			atomic_sub(1, (v))
 #define atomic_andnot			atomic_andnot
 
 /*
@@ -124,21 +112,11 @@
 #define atomic64_add_return_release	atomic64_add_return_release
 #define atomic64_add_return		atomic64_add_return
 
-#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1, (v))
-#define atomic64_inc_return_acquire(v)	atomic64_add_return_acquire(1, (v))
-#define atomic64_inc_return_release(v)	atomic64_add_return_release(1, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
-
 #define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
 #define atomic64_sub_return_acquire	atomic64_sub_return_acquire
 #define atomic64_sub_return_release	atomic64_sub_return_release
 #define atomic64_sub_return		atomic64_sub_return
 
-#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1, (v))
-#define atomic64_dec_return_acquire(v)	atomic64_sub_return_acquire(1, (v))
-#define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
-
 #define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
 #define atomic64_fetch_add_acquire	atomic64_fetch_add_acquire
 #define atomic64_fetch_add_release	atomic64_fetch_add_release
@@ -179,8 +157,6 @@
 #define atomic64_cmpxchg_release	atomic_cmpxchg_release
 #define atomic64_cmpxchg		atomic_cmpxchg
 
-#define atomic64_inc(v)			atomic64_add(1, (v))
-#define atomic64_dec(v)			atomic64_sub(1, (v))
 #define atomic64_andnot			atomic64_andnot
 
 #endif
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 8977b5157c8f..c6b6a06231b2 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -69,13 +69,6 @@ ATOMIC_OPS(sub, -=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_inc_return(v)		atomic_add_return(1, v)
-#define atomic_dec_return(v)		atomic_sub_return(1, v)
-
-#define atomic_inc(v)			(void)atomic_inc_return(v)
-
-#define atomic_dec(v)			(void)atomic_dec_return(v)
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 31638f511674..311b9894ccc8 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -198,10 +198,4 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 }
 #define atomic_fetch_add_unless atomic_fetch_add_unless
 
-#define atomic_inc(v) atomic_add(1, (v))
-#define atomic_dec(v) atomic_sub(1, (v))
-
-#define atomic_inc_return(v) (atomic_add_return(1, v))
-#define atomic_dec_return(v) (atomic_sub_return(1, v))
-
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index e4143c462e65..46a15a974bed 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -231,19 +231,10 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
-
 #define atomic_add(i,v)			(void)atomic_add_return((i), (v))
 #define atomic_sub(i,v)			(void)atomic_sub_return((i), (v))
-#define atomic_inc(v)			atomic_add(1, (v))
-#define atomic_dec(v)			atomic_sub(1, (v))
 
 #define atomic64_add(i,v)		(void)atomic64_add_return((i), (v))
 #define atomic64_sub(i,v)		(void)atomic64_sub_return((i), (v))
-#define atomic64_inc(v)			atomic64_add(1, (v))
-#define atomic64_dec(v)			atomic64_sub(1, (v))
 
 #endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 9df09c876fa2..47228b0d4163 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -126,11 +126,13 @@ static inline void atomic_inc(atomic_t *v)
 {
 	__asm__ __volatile__("addql #1,%0" : "+m" (*v));
 }
+#define atomic_inc atomic_inc
 
 static inline void atomic_dec(atomic_t *v)
 {
 	__asm__ __volatile__("subql #1,%0" : "+m" (*v));
 }
+#define atomic_dec atomic_dec
 
 static inline int atomic_dec_and_test(atomic_t *v)
 {
@@ -192,9 +194,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 
 #endif /* !CONFIG_RMW_INSNS */
 
-#define atomic_dec_return(v)	atomic_sub_return(1, (v))
-#define atomic_inc_return(v)	atomic_add_return(1, (v))
-
 static inline int atomic_sub_and_test(int i, atomic_t *v)
 {
 	char c;
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index fd3008ae164c..79be687de4ab 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -274,31 +274,12 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-#define atomic_dec_return(v) atomic_sub_return(1, (v))
-#define atomic_inc_return(v) atomic_add_return(1, (v))
-
 /*
  * atomic_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
  */
 #define atomic_dec_if_positive(v)	atomic_sub_if_positive(1, v)
 
-/*
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic_inc(v) atomic_add(1, (v))
-
-/*
- * atomic_dec - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic_dec(v) atomic_sub(1, (v))
-
 #ifdef CONFIG_64BIT
 
 #define ATOMIC64_INIT(i)    { (i) }
@@ -554,31 +535,12 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
-#define atomic64_inc_return(v) atomic64_add_return(1, (v))
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic64_t
  */
 #define atomic64_dec_if_positive(v)	atomic64_sub_if_positive(1, v)
 
-/*
- * atomic64_inc - increment atomic variable
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic64_inc(v) atomic64_add(1, (v))
-
-/*
- * atomic64_dec - decrement and test
- * @v: pointer of type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic64_dec(v) atomic64_sub(1, (v))
-
 #endif /* CONFIG_64BIT */
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index f85844ff6336..10bc490327c1 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -136,12 +136,6 @@ ATOMIC_OPS(xor, ^=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_inc(v)	(atomic_add(   1,(v)))
-#define atomic_dec(v)	(atomic_add(  -1,(v)))
-
-#define atomic_inc_return(v)	(atomic_add_return(   1,(v)))
-#define atomic_dec_return(v)	(atomic_add_return(  -1,(v)))
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef CONFIG_64BIT
@@ -224,12 +218,6 @@ atomic64_read(const atomic64_t *v)
 	return READ_ONCE((v)->counter);
 }
 
-#define atomic64_inc(v)		(atomic64_add(   1,(v)))
-#define atomic64_dec(v)		(atomic64_add(  -1,(v)))
-
-#define atomic64_inc_return(v)		(atomic64_add_return(   1,(v)))
-#define atomic64_dec_return(v)		(atomic64_add_return(  -1,(v)))
-
 /* exported interface */
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 5d76f05d2be3..ebaefdee4a57 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -143,6 +143,7 @@ static __inline__ void atomic_inc(atomic_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic_inc atomic_inc
 
 static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
 {
@@ -175,6 +176,7 @@ static __inline__ void atomic_dec(atomic_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic_dec atomic_dec
 
 static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
 {
@@ -411,6 +413,7 @@ static __inline__ void atomic64_inc(atomic64_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic64_inc atomic64_inc
 
 static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
 {
@@ -441,6 +444,7 @@ static __inline__ void atomic64_dec(atomic64_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic64_dec atomic64_dec
 
 static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
 {
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 68eef0a805ca..512b89485790 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -209,82 +209,6 @@ ATOMIC_OPS(xor, xor, i)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-#define ATOMIC_OP(op, func_op, I, c_type, prefix)			\
-static __always_inline							\
-void atomic##prefix##_##op(atomic##prefix##_t *v)			\
-{									\
-	atomic##prefix##_##func_op(I, v);				\
-}
-
-#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix)			\
-static __always_inline							\
-c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v)	\
-{									\
-	return atomic##prefix##_fetch_##func_op##_relaxed(I, v);	\
-}									\
-static __always_inline							\
-c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v)		\
-{									\
-	return atomic##prefix##_fetch_##func_op(I, v);			\
-}
-
-#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix)		\
-static __always_inline							\
-c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v)	\
-{									\
-        return atomic##prefix##_fetch_##op##_relaxed(v) c_op I;		\
-}									\
-static __always_inline							\
-c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v)		\
-{									\
-        return atomic##prefix##_fetch_##op(v) c_op I;			\
-}
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I)					\
-        ATOMIC_OP(       op, asm_op,       I,  int,   )			\
-        ATOMIC_FETCH_OP( op, asm_op,       I,  int,   )			\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )
-#else
-#define ATOMIC_OPS(op, asm_op, c_op, I)					\
-        ATOMIC_OP(       op, asm_op,       I,  int,   )			\
-        ATOMIC_FETCH_OP( op, asm_op,       I,  int,   )			\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )			\
-        ATOMIC_OP(       op, asm_op,       I, long, 64)			\
-        ATOMIC_FETCH_OP( op, asm_op,       I, long, 64)			\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
-#endif
-
-ATOMIC_OPS(inc, add, +,  1)
-ATOMIC_OPS(dec, add, +, -1)
-
-#define atomic_inc_return_relaxed	atomic_inc_return_relaxed
-#define atomic_dec_return_relaxed	atomic_dec_return_relaxed
-#define atomic_inc_return		atomic_inc_return
-#define atomic_dec_return		atomic_dec_return
-
-#define atomic_fetch_inc_relaxed	atomic_fetch_inc_relaxed
-#define atomic_fetch_dec_relaxed	atomic_fetch_dec_relaxed
-#define atomic_fetch_inc		atomic_fetch_inc
-#define atomic_fetch_dec		atomic_fetch_dec
-
-#ifndef CONFIG_GENERIC_ATOMIC64
-#define atomic64_inc_return_relaxed	atomic64_inc_return_relaxed
-#define atomic64_dec_return_relaxed	atomic64_dec_return_relaxed
-#define atomic64_inc_return		atomic64_inc_return
-#define atomic64_dec_return		atomic64_dec_return
-
-#define atomic64_fetch_inc_relaxed	atomic64_fetch_inc_relaxed
-#define atomic64_fetch_dec_relaxed	atomic64_fetch_dec_relaxed
-#define atomic64_fetch_inc		atomic64_fetch_inc
-#define atomic64_fetch_dec		atomic64_fetch_dec
-#endif
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-
 /* This is required to provide a full barrier on success. */
 static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7f5fbd595f01..376e64af951f 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -55,13 +55,9 @@ static inline void atomic_add(int i, atomic_t *v)
 	__atomic_add(i, &v->counter);
 }
 
-#define atomic_inc(_v)			atomic_add(1, _v)
-#define atomic_inc_return(_v)		atomic_add_return(1, _v)
 #define atomic_sub(_i, _v)		atomic_add(-(int)(_i), _v)
 #define atomic_sub_return(_i, _v)	atomic_add_return(-(int)(_i), _v)
 #define atomic_fetch_sub(_i, _v)	atomic_fetch_add(-(int)(_i), _v)
-#define atomic_dec(_v)			atomic_sub(1, _v)
-#define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 
 #define ATOMIC_OPS(op)							\
 static inline void atomic_##op(int i, atomic_t *v)			\
@@ -166,12 +162,8 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-#define atomic64_inc(_v)		atomic64_add(1, _v)
-#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long)(_i), _v)
 #define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long)(_i), _v)
-#define atomic64_dec(_v)		atomic64_sub(1, _v)
-#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
 
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index d438494fa112..f37b95a80232 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -32,12 +32,6 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
-#define atomic_inc(v)			atomic_add(1, (v))
-#define atomic_dec(v)			atomic_sub(1, (v))
-
 #define atomic_xchg(v, new)		(xchg(&((v)->counter), new))
 #define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
 
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 3a26573790c6..94c930f0bc62 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -38,8 +38,6 @@ void atomic_set(atomic_t *, int);
 
 #define atomic_add(i, v)	((void)atomic_add_return( (int)(i), (v)))
 #define atomic_sub(i, v)	((void)atomic_add_return(-(int)(i), (v)))
-#define atomic_inc(v)		((void)atomic_add_return(        1, (v)))
-#define atomic_dec(v)		((void)atomic_add_return(       -1, (v)))
 
 #define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
 #define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
@@ -48,7 +46,4 @@ void atomic_set(atomic_t *, int);
 #define atomic_sub_return(i, v)	(atomic_add_return(-(int)(i), (v)))
 #define atomic_fetch_sub(i, v)  (atomic_fetch_add (-(int)(i), (v)))
 
-#define atomic_inc_return(v)	(atomic_add_return(        1, (v)))
-#define atomic_dec_return(v)	(atomic_add_return(       -1, (v)))
-
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 634508282aea..304865c7cdbb 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -50,18 +50,6 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_dec_return(v)   atomic_sub_return(1, v)
-#define atomic64_dec_return(v) atomic64_sub_return(1, v)
-
-#define atomic_inc_return(v)   atomic_add_return(1, v)
-#define atomic64_inc_return(v) atomic64_add_return(1, v)
-
-#define atomic_inc(v) atomic_add(1, v)
-#define atomic64_inc(v) atomic64_add(1, v)
-
-#define atomic_dec(v) atomic_sub(1, v)
-#define atomic64_dec(v) atomic64_sub(1, v)
-
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 
 static inline int atomic_xchg(atomic_t *v, int new)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 73bda4abe180..823fd2f320cf 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -92,6 +92,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
  *
  * Atomically increments @v by 1.
  */
+#define arch_atomic_inc arch_atomic_inc
 static __always_inline void arch_atomic_inc(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "incl %0"
@@ -104,6 +105,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
  *
  * Atomically decrements @v by 1.
  */
+#define arch_atomic_dec arch_atomic_dec
 static __always_inline void arch_atomic_dec(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "decl %0"
@@ -177,9 +179,6 @@ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
 	return arch_atomic_add_return(-i, v);
 }
 
-#define arch_atomic_inc_return(v)  (arch_atomic_add_return(1, v))
-#define arch_atomic_dec_return(v)  (arch_atomic_sub_return(1, v))
-
 static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
 {
 	return xadd(&v->counter, i);
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a26810d005e0..472c7af0ed48 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -158,6 +158,7 @@ static inline long long arch_atomic64_inc_return(atomic64_t *v)
 			     "S" (v) : "memory", "ecx");
 	return a;
 }
+#define arch_atomic64_inc_return arch_atomic64_inc_return
 
 static inline long long arch_atomic64_dec_return(atomic64_t *v)
 {
@@ -166,6 +167,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
 			     "S" (v) : "memory", "ecx");
 	return a;
 }
+#define arch_atomic64_dec_return arch_atomic64_dec_return
 
 /**
  * arch_atomic64_add - add integer to atomic64 variable
@@ -203,6 +205,7 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
  *
  * Atomically increments @v by 1.
  */
+#define arch_atomic64_inc arch_atomic64_inc
 static inline void arch_atomic64_inc(atomic64_t *v)
 {
 	__alternative_atomic64(inc, inc_return, /* no output */,
@@ -215,6 +218,7 @@ static inline void arch_atomic64_inc(atomic64_t *v)
  *
  * Atomically decrements @v by 1.
  */
+#define arch_atomic64_dec arch_atomic64_dec
 static inline void arch_atomic64_dec(atomic64_t *v)
 {
 	__alternative_atomic64(dec, dec_return, /* no output */,
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6a65228a3db6..1b282272a801 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -83,6 +83,7 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
  *
  * Atomically increments @v by 1.
  */
+#define arch_atomic64_inc arch_atomic64_inc
 static __always_inline void arch_atomic64_inc(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "incq %0"
@@ -96,6 +97,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
  *
  * Atomically decrements @v by 1.
  */
+#define arch_atomic64_dec arch_atomic64_dec
 static __always_inline void arch_atomic64_dec(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "decq %0"
@@ -173,9 +175,6 @@ static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
 	return xadd(&v->counter, -i);
 }
 
-#define arch_atomic64_inc_return(v)  (arch_atomic64_add_return(1, (v)))
-#define arch_atomic64_dec_return(v)  (arch_atomic64_sub_return(1, (v)))
-
 static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
 {
 	return arch_cmpxchg(&v->counter, old, new);
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 332ae4eca737..7de0149e1cf7 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -197,38 +197,6 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic_inc(v) atomic_add(1,(v))
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic_inc_return(v) atomic_add_return(1,(v))
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic_dec(v) atomic_sub(1,(v))
-
-/**
- * atomic_dec_return - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic_dec_return(v) atomic_sub_return(1,(v))
-
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 6b64c200de73..12f9634750d7 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -102,29 +102,41 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
 }
 #endif
 
+#ifdef arch_atomic_inc
+#define atomic_inc atomic_inc
 static __always_inline void atomic_inc(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic_inc(v);
 }
+#endif
 
+#ifdef arch_atomic64_inc
+#define atomic64_inc atomic64_inc
 static __always_inline void atomic64_inc(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic64_inc(v);
 }
+#endif
 
+#ifdef arch_atomic_dec
+#define atomic_dec atomic_dec
 static __always_inline void atomic_dec(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic_dec(v);
 }
+#endif
 
+#ifdef atch_atomic64_dec
+#define atomic64_dec
 static __always_inline void atomic64_dec(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic64_dec(v);
 }
+#endif
 
 static __always_inline void atomic_add(int i, atomic_t *v)
 {
@@ -186,29 +198,41 @@ static __always_inline void atomic64_xor(s64 i, atomic64_t *v)
 	arch_atomic64_xor(i, v);
 }
 
+#ifdef arch_atomic_inc_return
+#define atomic_inc_return atomic_inc_return
 static __always_inline int atomic_inc_return(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_inc_return(v);
 }
+#endif
 
+#ifdef arch_atomic64_in_return
+#define atomic64_inc_return atomic64_inc_return
 static __always_inline s64 atomic64_inc_return(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_return(v);
 }
+#endif
 
+#ifdef arch_atomic_dec_return
+#define atomic_dec_return atomic_dec_return
 static __always_inline int atomic_dec_return(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_dec_return(v);
 }
+#endif
 
+#ifdef arch_atomic64_dec_return
+#define atomic64_dec_return atomic64_dec_return
 static __always_inline s64 atomic64_dec_return(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_dec_return(v);
 }
+#endif
 
 #ifdef arch_atomic64_inc_not_zero
 #define atomic64_inc_not_zero atomic64_inc_not_zero
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 40cab858aaaa..13324aa828eb 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -196,19 +196,6 @@ static inline void atomic_sub(int i, atomic_t *v)
 	atomic_sub_return(i, v);
 }
 
-static inline void atomic_inc(atomic_t *v)
-{
-	atomic_add_return(1, v);
-}
-
-static inline void atomic_dec(atomic_t *v)
-{
-	atomic_sub_return(1, v);
-}
-
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index d3827ab97aa4..242b79ae0b57 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -56,9 +56,4 @@ extern long long atomic64_xchg(atomic64_t *v, long long new);
 extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 3ee8da9023cd..24f345df7ba6 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -97,11 +97,23 @@
 #endif
 #endif /* atomic_add_return_relaxed */
 
+#ifndef atomic_inc
+#define atomic_inc(v)			atomic_add(1, (v))
+#endif
+
 /* atomic_inc_return_relaxed */
 #ifndef atomic_inc_return_relaxed
+
+#ifndef atomic_inc_return
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+#define atomic_inc_return_relaxed(v)	atomic_add_return_relaxed(1, (v))
+#define atomic_inc_return_acquire(v)	atomic_add_return_acquire(1, (v))
+#define atomic_inc_return_release(v)	atomic_add_return_release(1, (v))
+#else /* atomic_inc_return */
 #define  atomic_inc_return_relaxed	atomic_inc_return
 #define  atomic_inc_return_acquire	atomic_inc_return
 #define  atomic_inc_return_release	atomic_inc_return
+#endif /* atomic_inc_return */
 
 #else /* atomic_inc_return_relaxed */
 
@@ -145,11 +157,23 @@
 #endif
 #endif /* atomic_sub_return_relaxed */
 
+#ifndef atomic_dec
+#define atomic_dec(v)			atomic_sub(1, (v))
+#endif
+
 /* atomic_dec_return_relaxed */
 #ifndef atomic_dec_return_relaxed
+
+#ifndef atomic_dec_return
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+#define atomic_dec_return_relaxed(v)	atomic_sub_return_relaxed(1, (v))
+#define atomic_dec_return_acquire(v)	atomic_sub_return_acquire(1, (v))
+#define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
+#else /* atomic_dec_return */
 #define  atomic_dec_return_relaxed	atomic_dec_return
 #define  atomic_dec_return_acquire	atomic_dec_return
 #define  atomic_dec_return_release	atomic_dec_return
+#endif /* atomic_dec_return */
 
 #else /* atomic_dec_return_relaxed */
 
@@ -748,11 +772,23 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #endif
 #endif /* atomic64_add_return_relaxed */
 
+#ifndef atomic64_inc
+#define atomic64_inc(v)			atomic64_add(1, (v))
+#endif
+
 /* atomic64_inc_return_relaxed */
 #ifndef atomic64_inc_return_relaxed
+
+#ifndef atomic64_inc_return
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
+#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1, (v))
+#define atomic64_inc_return_acquire(v)	atomic64_add_return_acquire(1, (v))
+#define atomic64_inc_return_release(v)	atomic64_add_return_release(1, (v))
+#else /* atomic64_inc_return */
 #define  atomic64_inc_return_relaxed	atomic64_inc_return
 #define  atomic64_inc_return_acquire	atomic64_inc_return
 #define  atomic64_inc_return_release	atomic64_inc_return
+#endif /* atomic64_inc_return */
 
 #else /* atomic64_inc_return_relaxed */
 
@@ -797,11 +833,23 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #endif
 #endif /* atomic64_sub_return_relaxed */
 
+#ifndef atomic64_dec
+#define atomic64_dec(v)			atomic64_sub(1, (v))
+#endif
+
 /* atomic64_dec_return_relaxed */
 #ifndef atomic64_dec_return_relaxed
+
+#ifndef atomic64_dec_return
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1, (v))
+#define atomic64_dec_return_acquire(v)	atomic64_sub_return_acquire(1, (v))
+#define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
+#else /* atomic64_dec_return */
 #define  atomic64_dec_return_relaxed	atomic64_dec_return
 #define  atomic64_dec_return_acquire	atomic64_dec_return
 #define  atomic64_dec_return_release	atomic64_dec_return
+#endif /* atomic64_dec_return */
 
 #else /* atomic64_dec_return_relaxed */
 
-- 
cgit v1.2.3


From b3a2a05f9111de0b79312e577608a27b0318c0a1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:20 +0100
Subject: atomics/treewide: Make conditional inc/dec ops optional

The conditional inc/dec ops differ for atomic_t and atomic64_t:

- atomic_inc_unless_positive() is optional for atomic_t, and doesn't exist for atomic64_t.
- atomic_dec_unless_negative() is optional for atomic_t, and doesn't exist for atomic64_t.
- atomic_dec_if_positive is optional for atomic_t, and is mandatory for atomic64_t.

Let's make these consistently optional for both. At the same time, let's
clean up the existing fallbacks to use atomic_try_cmpxchg().

The instrumented atomics are updated accordingly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-18-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  1 +
 arch/arc/include/asm/atomic.h             |  1 +
 arch/arm/include/asm/atomic.h             |  1 +
 arch/arm64/include/asm/atomic.h           |  2 +
 arch/ia64/include/asm/atomic.h            | 16 -----
 arch/parisc/include/asm/atomic.h          | 23 --------
 arch/powerpc/include/asm/atomic.h         |  1 +
 arch/s390/include/asm/atomic.h            | 17 ------
 arch/sparc/include/asm/atomic_64.h        |  1 +
 arch/x86/include/asm/atomic64_32.h        |  1 +
 arch/x86/include/asm/atomic64_64.h        | 18 ------
 include/asm-generic/atomic-instrumented.h |  3 +
 include/asm-generic/atomic64.h            |  1 +
 include/linux/atomic.h                    | 97 +++++++++++++++++++++++--------
 14 files changed, 85 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index f6410cb68058..4a6a8f58c9c9 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -296,5 +296,6 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	smp_mb();
 	return old - 1;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 27b95a928c1e..8f64f3b79b8a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -517,6 +517,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 
 	return val;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 /**
  * atomic64_fetch_add_unless - add unless the number is a given value
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 5a58d061d3d2..884c241424fe 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -474,6 +474,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 
 	return result;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 						  long long u)
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 078f785cd97f..9bca54dda75c 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -159,5 +159,7 @@
 
 #define atomic64_andnot			atomic64_andnot
 
+#define atomic64_dec_if_positive	atomic64_dec_if_positive
+
 #endif
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 46a15a974bed..206530d0751b 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,22 +215,6 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
-{
-	long c, old, dec;
-	c = atomic64_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic64_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 #define atomic_add(i,v)			(void)atomic_add_return((i), (v))
 #define atomic_sub(i,v)			(void)atomic_sub_return((i), (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 10bc490327c1..118953d41763 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -223,29 +223,6 @@ atomic64_read(const atomic64_t *v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-/*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline long atomic64_dec_if_positive(atomic64_t *v)
-{
-	long c, old, dec;
-	c = atomic64_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic64_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 #endif /* !CONFIG_64BIT */
 
 
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index ebaefdee4a57..a0156cb43d1f 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -488,6 +488,7 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 
 	return t;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_cmpxchg_relaxed(v, o, n) \
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 376e64af951f..fd20ab5d4cf7 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -145,23 +145,6 @@ ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
 
-static inline long atomic64_dec_if_positive(atomic64_t *v)
-{
-	long c, old, dec;
-
-	c = atomic64_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic64_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long)(_i), _v)
 #define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long)(_i), _v)
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 304865c7cdbb..6963482c81d8 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -62,5 +62,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
 long atomic64_dec_if_positive(atomic64_t *v);
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 472c7af0ed48..ef959f02d070 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -254,6 +254,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 	return r;
 }
 
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	long long r;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 1b282272a801..849f1c566a11 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -191,24 +191,6 @@ static inline long arch_atomic64_xchg(atomic64_t *v, long new)
 	return xchg(&v->counter, new);
 }
 
-/*
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
-{
-	s64 dec, c = arch_atomic64_read(v);
-	do {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, dec));
-	return dec;
-}
-
 static inline void arch_atomic64_and(long i, atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "andq %1,%0"
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 12f9634750d7..3c64e95d5ed0 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -243,11 +243,14 @@ static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
 }
 #endif
 
+#ifdef arch_atomic64_dec_if_positive
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_dec_if_positive(v);
 }
+#endif
 
 #ifdef arch_atomic_dec_and_test
 #define atomic_dec_and_test atomic_dec_and_test
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 242b79ae0b57..97b28b7f1f29 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -51,6 +51,7 @@ ATOMIC64_OPS(xor)
 #undef ATOMIC64_OP
 
 extern long long atomic64_dec_if_positive(atomic64_t *v);
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
 extern long long atomic64_xchg(atomic64_t *v, long long new);
 extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 24f345df7ba6..93fe5b4041e1 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -683,28 +683,30 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_inc_unless_negative
-static inline bool atomic_inc_unless_negative(atomic_t *p)
+static inline bool atomic_inc_unless_negative(atomic_t *v)
 {
-	int v, v1;
-	for (v = 0; v >= 0; v = v1) {
-		v1 = atomic_cmpxchg(p, v, v + 1);
-		if (likely(v1 == v))
-			return true;
-	}
-	return false;
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c < 0))
+			return false;
+	} while (!atomic_try_cmpxchg(v, &c, c + 1));
+
+	return true;
 }
 #endif
 
 #ifndef atomic_dec_unless_positive
-static inline bool atomic_dec_unless_positive(atomic_t *p)
+static inline bool atomic_dec_unless_positive(atomic_t *v)
 {
-	int v, v1;
-	for (v = 0; v <= 0; v = v1) {
-		v1 = atomic_cmpxchg(p, v, v - 1);
-		if (likely(v1 == v))
-			return true;
-	}
-	return false;
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c > 0))
+			return false;
+	} while (!atomic_try_cmpxchg(v, &c, c - 1));
+
+	return true;
 }
 #endif
 
@@ -718,17 +720,14 @@ static inline bool atomic_dec_unless_positive(atomic_t *p)
 #ifndef atomic_dec_if_positive
 static inline int atomic_dec_if_positive(atomic_t *v)
 {
-	int c, old, dec;
-	c = atomic_read(v);
-	for (;;) {
+	int dec, c = atomic_read(v);
+
+	do {
 		dec = c - 1;
 		if (unlikely(dec < 0))
 			break;
-		old = atomic_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
+	} while (!atomic_try_cmpxchg(v, &c, dec));
+
 	return dec;
 }
 #endif
@@ -1290,6 +1289,56 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v
 }
 #endif
 
+#ifndef atomic64_inc_unless_negative
+static inline bool atomic64_inc_unless_negative(atomic64_t *v)
+{
+	long long c = atomic64_read(v);
+
+	do {
+		if (unlikely(c < 0))
+			return false;
+	} while (!atomic64_try_cmpxchg(v, &c, c + 1));
+
+	return true;
+}
+#endif
+
+#ifndef atomic64_dec_unless_positive
+static inline bool atomic64_dec_unless_positive(atomic64_t *v)
+{
+	long long c = atomic64_read(v);
+
+	do {
+		if (unlikely(c > 0))
+			return false;
+	} while (!atomic64_try_cmpxchg(v, &c, c - 1));
+
+	return true;
+}
+#endif
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic64 variable, v, was not decremented.
+ */
+#ifndef atomic64_dec_if_positive
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long long dec, c = atomic64_read(v);
+
+	do {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+	} while (!atomic64_try_cmpxchg(v, &c, dec));
+
+	return dec;
+}
+#endif
+
 #define atomic64_cond_read_relaxed(v, c)	smp_cond_load_relaxed(&(v)->counter, (c))
 #define atomic64_cond_read_acquire(v, c)	smp_cond_load_acquire(&(v)->counter, (c))
 
-- 
cgit v1.2.3


From 7cc7eaad49c30ac165ecf84d95b26f7e0d53bd97 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:21 +0100
Subject: atomics/treewide: Clean up '*_andnot()' ifdeffery

The ifdeffery for atomic*_{fetch_,}andnot() is unlike that for all the
other atomics. If atomic*_andnot() is not defined, the corresponding
atomic*_fetch_andnot() is assumed to not be defined.

Additionally, the fallbacks for the various ordering cases are written
much later in atomic.h as static inlines.

This isn't problematic today, but gets in the way of scripting the
generation of atomics. To prepare for scripting, this patch:

* Switches to separate ifdefs for atomic*_andnot() and
  atomic*_fetch_andnot(), updating implementations as appropriate.

* Moves the fallbacks into the standards ifdefs, as macro expansions
  rather than static inlines.

* Removes trivial andnot implementations from architectures, where these
  are superseded by core code.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-19-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/atomic.h |  8 ++--
 arch/arm/include/asm/atomic.h |  2 +
 include/linux/atomic.h        | 96 ++++++++++++++-----------------------------
 3 files changed, 36 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 8f64f3b79b8a..4e0072730241 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -187,7 +187,8 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define atomic_andnot atomic_andnot
+#define atomic_andnot		atomic_andnot
+#define atomic_fetch_andnot	atomic_fetch_andnot
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					\
@@ -296,8 +297,6 @@ ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
-#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-#define atomic_fetch_andnot(mask, v) atomic_fetch_and(~(mask), (v))
 ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
 ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 
@@ -430,7 +429,8 @@ static inline long long atomic64_fetch_##op(long long a, atomic64_t *v)	\
 	ATOMIC64_OP_RETURN(op, op1, op2)				\
 	ATOMIC64_FETCH_OP(op, op1, op2)
 
-#define atomic64_andnot atomic64_andnot
+#define atomic64_andnot		atomic64_andnot
+#define atomic64_fetch_andnot	atomic64_fetch_andnot
 
 ATOMIC64_OPS(add, add.f, adc)
 ATOMIC64_OPS(sub, sub.f, sbc)
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 884c241424fe..f74756641410 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -216,6 +216,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_fetch_andnot		atomic_fetch_andnot
+
 #endif /* __LINUX_ARM_ARCH__ */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 93fe5b4041e1..8e04f1f69bd9 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -354,12 +354,22 @@
 #endif
 #endif /* atomic_fetch_and_relaxed */
 
-#ifdef atomic_andnot
-/* atomic_fetch_andnot_relaxed */
+#ifndef atomic_andnot
+#define atomic_andnot(i, v)		atomic_and(~(int)(i), (v))
+#endif
+
 #ifndef atomic_fetch_andnot_relaxed
-#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot
-#define atomic_fetch_andnot_acquire	atomic_fetch_andnot
-#define atomic_fetch_andnot_release	atomic_fetch_andnot
+
+#ifndef atomic_fetch_andnot
+#define atomic_fetch_andnot(i, v)		atomic_fetch_and(~(int)(i), (v))
+#define atomic_fetch_andnot_relaxed(i, v)	atomic_fetch_and_relaxed(~(int)(i), (v))
+#define atomic_fetch_andnot_acquire(i, v)	atomic_fetch_and_acquire(~(int)(i), (v))
+#define atomic_fetch_andnot_release(i, v)	atomic_fetch_and_release(~(int)(i), (v))
+#else /* atomic_fetch_andnot */
+#define atomic_fetch_andnot_relaxed		atomic_fetch_andnot
+#define atomic_fetch_andnot_acquire		atomic_fetch_andnot
+#define atomic_fetch_andnot_release		atomic_fetch_andnot
+#endif /* atomic_fetch_andnot */
 
 #else /* atomic_fetch_andnot_relaxed */
 
@@ -378,7 +388,6 @@
 	__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
 #endif
 #endif /* atomic_fetch_andnot_relaxed */
-#endif /* atomic_andnot */
 
 /* atomic_fetch_xor_relaxed */
 #ifndef atomic_fetch_xor_relaxed
@@ -655,33 +664,6 @@ static inline bool atomic_add_negative(int i, atomic_t *v)
 }
 #endif
 
-#ifndef atomic_andnot
-static inline void atomic_andnot(int i, atomic_t *v)
-{
-	atomic_and(~i, v);
-}
-
-static inline int atomic_fetch_andnot(int i, atomic_t *v)
-{
-	return atomic_fetch_and(~i, v);
-}
-
-static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v)
-{
-	return atomic_fetch_and_relaxed(~i, v);
-}
-
-static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v)
-{
-	return atomic_fetch_and_acquire(~i, v);
-}
-
-static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
-{
-	return atomic_fetch_and_release(~i, v);
-}
-#endif
-
 #ifndef atomic_inc_unless_negative
 static inline bool atomic_inc_unless_negative(atomic_t *v)
 {
@@ -1029,12 +1011,22 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #endif
 #endif /* atomic64_fetch_and_relaxed */
 
-#ifdef atomic64_andnot
-/* atomic64_fetch_andnot_relaxed */
+#ifndef atomic64_andnot
+#define atomic64_andnot(i, v)		atomic64_and(~(long long)(i), (v))
+#endif
+
 #ifndef atomic64_fetch_andnot_relaxed
-#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot
-#define atomic64_fetch_andnot_acquire	atomic64_fetch_andnot
-#define atomic64_fetch_andnot_release	atomic64_fetch_andnot
+
+#ifndef atomic64_fetch_andnot
+#define atomic64_fetch_andnot(i, v)		atomic64_fetch_and(~(long long)(i), (v))
+#define atomic64_fetch_andnot_relaxed(i, v)	atomic64_fetch_and_relaxed(~(long long)(i), (v))
+#define atomic64_fetch_andnot_acquire(i, v)	atomic64_fetch_and_acquire(~(long long)(i), (v))
+#define atomic64_fetch_andnot_release(i, v)	atomic64_fetch_and_release(~(long long)(i), (v))
+#else /* atomic64_fetch_andnot */
+#define atomic64_fetch_andnot_relaxed		atomic64_fetch_andnot
+#define atomic64_fetch_andnot_acquire		atomic64_fetch_andnot
+#define atomic64_fetch_andnot_release		atomic64_fetch_andnot
+#endif /* atomic64_fetch_andnot */
 
 #else /* atomic64_fetch_andnot_relaxed */
 
@@ -1053,7 +1045,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 	__atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
 #endif
 #endif /* atomic64_fetch_andnot_relaxed */
-#endif /* atomic64_andnot */
 
 /* atomic64_fetch_xor_relaxed */
 #ifndef atomic64_fetch_xor_relaxed
@@ -1262,33 +1253,6 @@ static inline bool atomic64_add_negative(long long i, atomic64_t *v)
 }
 #endif
 
-#ifndef atomic64_andnot
-static inline void atomic64_andnot(long long i, atomic64_t *v)
-{
-	atomic64_and(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and_relaxed(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and_acquire(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and_release(~i, v);
-}
-#endif
-
 #ifndef atomic64_inc_unless_negative
 static inline bool atomic64_inc_unless_negative(atomic64_t *v)
 {
-- 
cgit v1.2.3


From 75a040ff14d9a99fc041f5e1d8f09541cab13ba4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 1 Apr 2018 01:00:36 +0300
Subject: locking/refcounts: Include fewer headers in <linux/refcount.h>

Debloat <linux/refcount.h>'s dependencies:

- <linux/kernel.h> is not needed, but <linux/compiler.h> is.
- <linux/mutex.h> is not needed, only a forward declaration of "struct mutex".
- <linux/spinlock.h> is not needed, <linux/spinlock_types.h> is enough.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/lkml/20180331220036.GA7676@avx2
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/refcount.h | 1 +
 include/linux/refcount.h        | 7 ++++---
 lib/refcount.c                  | 2 ++
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4cf11d88d3b3..19b90521954c 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -5,6 +5,7 @@
  * PaX/grsecurity.
  */
 #include <linux/refcount.h>
+#include <asm/bug.h>
 
 /*
  * This is the first portion of the refcount error handling, which lives in
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 4193c41e383a..c36addd27dd5 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -3,9 +3,10 @@
 #define _LINUX_REFCOUNT_H
 
 #include <linux/atomic.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/spinlock_types.h>
+
+struct mutex;
 
 /**
  * struct refcount_t - variant of atomic_t specialized for reference counts
diff --git a/lib/refcount.c b/lib/refcount.c
index 0eb48353abe3..4bd842f20749 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -35,7 +35,9 @@
  *
  */
 
+#include <linux/mutex.h>
 #include <linux/refcount.h>
+#include <linux/spinlock.h>
 #include <linux/bug.h>
 
 #ifdef CONFIG_REFCOUNT_FULL
-- 
cgit v1.2.3


From 0af5107cd0640ee3424e337b492e4b11b450ce28 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Thu, 17 May 2018 11:14:18 +0300
Subject: net/mlx5: Add RoCE RX ICRC encapsulated counter

Add capability bit in PCAM register and RoCE ICRC error counter
to PPCNT register.

Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f810772e80c0..deb3a459a22e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1681,7 +1681,11 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
 
 	u8         rx_buffer_full_low[0x20];
 
-	u8         reserved_at_1c0[0x600];
+	u8         rx_icrc_encapsulated_high[0x20];
+
+	u8         rx_icrc_encapsulated_low[0x20];
+
+	u8         reserved_at_200[0x5c0];
 };
 
 struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@ -8044,8 +8048,9 @@ struct mlx5_ifc_peir_reg_bits {
 };
 
 struct mlx5_ifc_pcam_enhanced_features_bits {
-	u8         reserved_at_0[0x76];
-
+	u8         reserved_at_0[0x6d];
+	u8         rx_icrc_encapsulated_counter[0x1];
+	u8	   reserved_at_6e[0x8];
 	u8         pfcc_mask[0x1];
 	u8         reserved_at_77[0x4];
 	u8         rx_buffer_fullness_counters[0x1];
-- 
cgit v1.2.3


From 0eb71a9da5796851fa87ddc1a534066c0fe54055 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: split rhashtable.h

Due to the use of rhashtables in net namespaces,
rhashtable.h is included in lots of the kernel,
so a small changes can required a large recompilation.
This makes development painful.

This patch splits out rhashtable-types.h which just includes
the major type declarations, and does not include (non-trivial)
inline code.  rhashtable.h is no longer included by anything
in the include/ directory.
Common include files only include rhashtable-types.h so a large
recompilation is only triggered when that changes.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                                |   2 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |   1 +
 include/linux/ipc.h                        |   2 +-
 include/linux/ipc_namespace.h              |   2 +-
 include/linux/mroute_base.h                |   2 +-
 include/linux/rhashtable-types.h           | 139 +++++++++++++++++++++++++++++
 include/linux/rhashtable.h                 | 127 +-------------------------
 include/net/inet_frag.h                    |   2 +-
 include/net/netfilter/nf_flow_table.h      |   2 +-
 include/net/sctp/structs.h                 |   2 +-
 include/net/seg6.h                         |   2 +-
 include/net/seg6_hmac.h                    |   2 +-
 ipc/msg.c                                  |   1 +
 ipc/sem.c                                  |   1 +
 ipc/shm.c                                  |   1 +
 ipc/util.c                                 |   1 +
 lib/rhashtable.c                           |   1 +
 net/ipv4/inet_fragment.c                   |   1 +
 net/ipv4/ipmr.c                            |   1 +
 net/ipv4/ipmr_base.c                       |   1 +
 net/ipv6/ip6mr.c                           |   1 +
 net/ipv6/seg6.c                            |   1 +
 net/ipv6/seg6_hmac.c                       |   1 +
 net/netfilter/nf_tables_api.c              |   1 +
 net/sctp/input.c                           |   1 +
 net/sctp/socket.c                          |   1 +
 26 files changed, 166 insertions(+), 133 deletions(-)
 create mode 100644 include/linux/rhashtable-types.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index edf3cf5ea691..99e5cef8172e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12162,7 +12162,9 @@ M:	Herbert Xu <herbert@gondor.apana.org.au>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	lib/rhashtable.c
+F:	lib/test_rhashtable.c
 F:	include/linux/rhashtable.h
+F:	include/linux/rhashtable-types.h
 
 RICOH R5C592 MEMORYSTICK DRIVER
 M:	Maxim Levitsky <maximlevitsky@gmail.com>
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 0dbe2d9e22d6..1adb968b8354 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -46,6 +46,7 @@
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
 #include <linux/etherdevice.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 6cc2df7f7ac9..e1c9eea6015b 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -4,7 +4,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/uidgid.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <uapi/linux/ipc.h>
 #include <linux/refcount.h>
 
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index b5630c8eb2f3..6cea726612b7 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -9,7 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/refcount.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 struct user_namespace;
 
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index d633f737b3c6..fd436cdd4725 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -2,7 +2,7 @@
 #define __LINUX_MROUTE_BASE_H
 
 #include <linux/netdevice.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/spinlock.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
new file mode 100644
index 000000000000..9740063ff13b
--- /dev/null
+++ b/include/linux/rhashtable-types.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Simple structures that might be needed in include
+ * files.
+ */
+
+#ifndef _LINUX_RHASHTABLE_TYPES_H
+#define _LINUX_RHASHTABLE_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+struct rhash_head {
+	struct rhash_head __rcu		*next;
+};
+
+struct rhlist_head {
+	struct rhash_head		rhead;
+	struct rhlist_head __rcu	*next;
+};
+
+struct bucket_table;
+
+/**
+ * struct rhashtable_compare_arg - Key for the function rhashtable_compare
+ * @ht: Hash table
+ * @key: Key to compare against
+ */
+struct rhashtable_compare_arg {
+	struct rhashtable *ht;
+	const void *key;
+};
+
+typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
+			       const void *obj);
+
+/**
+ * struct rhashtable_params - Hash table construction parameters
+ * @nelem_hint: Hint on number of elements, should be 75% of desired size
+ * @key_len: Length of key
+ * @key_offset: Offset of key in struct to be hashed
+ * @head_offset: Offset of rhash_head in struct to be hashed
+ * @max_size: Maximum size while expanding
+ * @min_size: Minimum size while shrinking
+ * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
+ * @automatic_shrinking: Enable automatic shrinking of tables
+ * @nulls_base: Base value to generate nulls marker
+ * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
+ * @obj_hashfn: Function to hash object
+ * @obj_cmpfn: Function to compare key with object
+ */
+struct rhashtable_params {
+	u16			nelem_hint;
+	u16			key_len;
+	u16			key_offset;
+	u16			head_offset;
+	unsigned int		max_size;
+	u16			min_size;
+	bool			automatic_shrinking;
+	u8			locks_mul;
+	u32			nulls_base;
+	rht_hashfn_t		hashfn;
+	rht_obj_hashfn_t	obj_hashfn;
+	rht_obj_cmpfn_t		obj_cmpfn;
+};
+
+/**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
+ * @key_len: Key length for hashfn
+ * @max_elems: Maximum number of elements in table
+ * @p: Configuration parameters
+ * @rhlist: True if this is an rhltable
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
+ */
+struct rhashtable {
+	struct bucket_table __rcu	*tbl;
+	unsigned int			key_len;
+	unsigned int			max_elems;
+	struct rhashtable_params	p;
+	bool				rhlist;
+	struct work_struct		run_work;
+	struct mutex                    mutex;
+	spinlock_t			lock;
+	atomic_t			nelems;
+};
+
+/**
+ * struct rhltable - Hash table with duplicate objects in a list
+ * @ht: Underlying rhtable
+ */
+struct rhltable {
+	struct rhashtable ht;
+};
+
+/**
+ * struct rhashtable_walker - Hash table walker
+ * @list: List entry on list of walkers
+ * @tbl: The table that we were walking over
+ */
+struct rhashtable_walker {
+	struct list_head list;
+	struct bucket_table *tbl;
+};
+
+/**
+ * struct rhashtable_iter - Hash table iterator
+ * @ht: Table to iterate through
+ * @p: Current pointer
+ * @list: Current hash list pointer
+ * @walker: Associated rhashtable walker
+ * @slot: Current slot
+ * @skip: Number of entries to skip in slot
+ */
+struct rhashtable_iter {
+	struct rhashtable *ht;
+	struct rhash_head *p;
+	struct rhlist_head *list;
+	struct rhashtable_walker walker;
+	unsigned int slot;
+	unsigned int skip;
+	bool end_of_table;
+};
+
+int rhashtable_init(struct rhashtable *ht,
+		    const struct rhashtable_params *params);
+int rhltable_init(struct rhltable *hlt,
+		  const struct rhashtable_params *params);
+
+#endif /* _LINUX_RHASHTABLE_TYPES_H */
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 4e1f535c2034..48754ab07cdf 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
@@ -17,16 +18,14 @@
 #ifndef _LINUX_RHASHTABLE_H
 #define _LINUX_RHASHTABLE_H
 
-#include <linux/atomic.h>
-#include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/jhash.h>
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
-#include <linux/mutex.h>
 #include <linux/rculist.h>
 
+#include <linux/rhashtable-types.h>
 /*
  * The end of the chain is marked with a special nulls marks which has
  * the following format:
@@ -64,15 +63,6 @@
  */
 #define RHT_ELASTICITY	16u
 
-struct rhash_head {
-	struct rhash_head __rcu		*next;
-};
-
-struct rhlist_head {
-	struct rhash_head		rhead;
-	struct rhlist_head __rcu	*next;
-};
-
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
@@ -102,114 +92,6 @@ struct bucket_table {
 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
-/**
- * struct rhashtable_compare_arg - Key for the function rhashtable_compare
- * @ht: Hash table
- * @key: Key to compare against
- */
-struct rhashtable_compare_arg {
-	struct rhashtable *ht;
-	const void *key;
-};
-
-typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
-			       const void *obj);
-
-struct rhashtable;
-
-/**
- * struct rhashtable_params - Hash table construction parameters
- * @nelem_hint: Hint on number of elements, should be 75% of desired size
- * @key_len: Length of key
- * @key_offset: Offset of key in struct to be hashed
- * @head_offset: Offset of rhash_head in struct to be hashed
- * @max_size: Maximum size while expanding
- * @min_size: Minimum size while shrinking
- * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
- * @automatic_shrinking: Enable automatic shrinking of tables
- * @nulls_base: Base value to generate nulls marker
- * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
- * @obj_hashfn: Function to hash object
- * @obj_cmpfn: Function to compare key with object
- */
-struct rhashtable_params {
-	u16			nelem_hint;
-	u16			key_len;
-	u16			key_offset;
-	u16			head_offset;
-	unsigned int		max_size;
-	u16			min_size;
-	bool			automatic_shrinking;
-	u8			locks_mul;
-	u32			nulls_base;
-	rht_hashfn_t		hashfn;
-	rht_obj_hashfn_t	obj_hashfn;
-	rht_obj_cmpfn_t		obj_cmpfn;
-};
-
-/**
- * struct rhashtable - Hash table handle
- * @tbl: Bucket table
- * @key_len: Key length for hashfn
- * @max_elems: Maximum number of elements in table
- * @p: Configuration parameters
- * @rhlist: True if this is an rhltable
- * @run_work: Deferred worker to expand/shrink asynchronously
- * @mutex: Mutex to protect current/future table swapping
- * @lock: Spin lock to protect walker list
- * @nelems: Number of elements in table
- */
-struct rhashtable {
-	struct bucket_table __rcu	*tbl;
-	unsigned int			key_len;
-	unsigned int			max_elems;
-	struct rhashtable_params	p;
-	bool				rhlist;
-	struct work_struct		run_work;
-	struct mutex                    mutex;
-	spinlock_t			lock;
-	atomic_t			nelems;
-};
-
-/**
- * struct rhltable - Hash table with duplicate objects in a list
- * @ht: Underlying rhtable
- */
-struct rhltable {
-	struct rhashtable ht;
-};
-
-/**
- * struct rhashtable_walker - Hash table walker
- * @list: List entry on list of walkers
- * @tbl: The table that we were walking over
- */
-struct rhashtable_walker {
-	struct list_head list;
-	struct bucket_table *tbl;
-};
-
-/**
- * struct rhashtable_iter - Hash table iterator
- * @ht: Table to iterate through
- * @p: Current pointer
- * @list: Current hash list pointer
- * @walker: Associated rhashtable walker
- * @slot: Current slot
- * @skip: Number of entries to skip in slot
- */
-struct rhashtable_iter {
-	struct rhashtable *ht;
-	struct rhash_head *p;
-	struct rhlist_head *list;
-	struct rhashtable_walker walker;
-	unsigned int slot;
-	unsigned int skip;
-	bool end_of_table;
-};
-
 static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
 {
 	return NULLS_MARKER(ht->p.nulls_base + hash);
@@ -376,11 +258,6 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 }
 #endif /* CONFIG_PROVE_LOCKING */
 
-int rhashtable_init(struct rhashtable *ht,
-		    const struct rhashtable_params *params);
-int rhltable_init(struct rhltable *hlt,
-		  const struct rhashtable_params *params);
-
 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 			     struct rhash_head *obj);
 
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index ed07e3786d98..f4272a29dc44 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -2,7 +2,7 @@
 #ifndef __NET_FRAG_H__
 #define __NET_FRAG_H__
 
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 struct netns_frags {
 	/* sysctls */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ba9fa4592f2b..0e355f4a3d76 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -4,7 +4,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/rcupdate.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/dst.h>
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index dbe1b911a24d..e0f962d27386 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -48,7 +48,7 @@
 #define __sctp_structs_h__
 
 #include <linux/ktime.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/socket.h>	/* linux/in.h needs this!!    */
 #include <linux/in.h>		/* We get struct sockaddr_in. */
 #include <linux/in6.h>		/* We get struct in6_addr     */
diff --git a/include/net/seg6.h b/include/net/seg6.h
index e029e301faa5..2567941a2f32 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -18,7 +18,7 @@
 #include <linux/ipv6.h>
 #include <net/lwtunnel.h>
 #include <linux/seg6.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
 				     __be32 to)
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
index 69c3a106056b..7fda469e2758 100644
--- a/include/net/seg6_hmac.h
+++ b/include/net/seg6_hmac.h
@@ -22,7 +22,7 @@
 #include <linux/route.h>
 #include <net/seg6.h>
 #include <linux/seg6_hmac.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 #define SEG6_HMAC_MAX_DIGESTSIZE	160
 #define SEG6_HMAC_RING_SIZE		256
diff --git a/ipc/msg.c b/ipc/msg.c
index 3b6545302598..203281198079 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -38,6 +38,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
diff --git a/ipc/sem.c b/ipc/sem.c
index 5af1943ad782..29c0347ef11d 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -86,6 +86,7 @@
 #include <linux/ipc_namespace.h>
 #include <linux/sched/wake_q.h>
 #include <linux/nospec.h>
+#include <linux/rhashtable.h>
 
 #include <linux/uaccess.h>
 #include "util.h"
diff --git a/ipc/shm.c b/ipc/shm.c
index 051a3e1fb8df..d4daf78df6da 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -43,6 +43,7 @@
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <linux/uaccess.h>
 
diff --git a/ipc/util.c b/ipc/util.c
index 4e81182fa0ac..fdffff41f65b 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -63,6 +63,7 @@
 #include <linux/rwsem.h>
 #include <linux/memory.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <asm/unistd.h>
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9427b5766134..c9fafea7dc6e 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -28,6 +28,7 @@
 #include <linux/rhashtable.h>
 #include <linux/err.h>
 #include <linux/export.h>
+#include <linux/rhashtable.h>
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c9e35b81d093..316518f87294 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -20,6 +20,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/sock.h>
 #include <net/inet_frag.h>
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9f79b9803a16..82f914122f1b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -60,6 +60,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/compat.h>
 #include <linux/export.h>
+#include <linux/rhashtable.h>
 #include <net/ip_tunnels.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index cafb0506c8c9..1ad9aa62a97b 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -2,6 +2,7 @@
  * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mroute_base.h>
 
 /* Sets everything common except 'dev', since that is done under locking */
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0d0f0053bb11..d0b7e0249c13 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/raw.h>
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 0fdf2a55e746..8d0ba757a46c 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -17,6 +17,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/ipv6.h>
 #include <net/protocol.h>
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 33fb35cbfac1..b1791129a875 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -22,6 +22,7 @@
 #include <linux/icmpv6.h>
 #include <linux/mroute6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896d4a36081d..3f211e1025c1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -14,6 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nf_tables.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ba8a6e6c36fa..9bbc5f92c941 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -56,6 +56,7 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/checksum.h>
 #include <net/net_namespace.h>
+#include <linux/rhashtable.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d20f7addee19..0e91e83eea5a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -66,6 +66,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 
 #include <net/ip.h>
 #include <net/icmp.h>
-- 
cgit v1.2.3


From 9f9a707738aa7a8b9f78a641b83927ada256a626 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: remove nulls_base and related code.

This "feature" is unused, undocumented, and untested and so doesn't
really belong.  A patch is under development to properly implement
support for detecting when a search gets diverted down a different
chain, which the common purpose of nulls markers.

This patch actually fixes a bug too.  The table resizing allows a
table to grow to 2^31 buckets, but the hash is truncated to 27 bits -
any growth beyond 2^27 is wasteful an ineffective.

This patch results in NULLS_MARKER(0) being used for all chains,
and leaves the use of rht_is_a_null() to test for it.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable-types.h |  2 --
 include/linux/rhashtable.h       | 33 +++------------------------------
 lib/rhashtable.c                 |  8 --------
 lib/test_rhashtable.c            |  5 +----
 net/core/xdp.c                   |  4 ++--
 5 files changed, 6 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index 9740063ff13b..763d613ce2c2 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -50,7 +50,6 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
  * @min_size: Minimum size while shrinking
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
  * @automatic_shrinking: Enable automatic shrinking of tables
- * @nulls_base: Base value to generate nulls marker
  * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
  * @obj_hashfn: Function to hash object
  * @obj_cmpfn: Function to compare key with object
@@ -64,7 +63,6 @@ struct rhashtable_params {
 	u16			min_size;
 	bool			automatic_shrinking;
 	u8			locks_mul;
-	u32			nulls_base;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
 	rht_obj_cmpfn_t		obj_cmpfn;
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 48754ab07cdf..d9f719af7936 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -28,25 +28,8 @@
 #include <linux/rhashtable-types.h>
 /*
  * The end of the chain is marked with a special nulls marks which has
- * the following format:
- *
- * +-------+-----------------------------------------------------+-+
- * | Base  |                      Hash                           |1|
- * +-------+-----------------------------------------------------+-+
- *
- * Base (4 bits) : Reserved to distinguish between multiple tables.
- *                 Specified via &struct rhashtable_params.nulls_base.
- * Hash (27 bits): Full hash (unmasked) of first element added to bucket
- * 1 (1 bit)     : Nulls marker (always set)
- *
- * The remaining bits of the next pointer remain unused for now.
+ * the least significant bit set.
  */
-#define RHT_BASE_BITS		4
-#define RHT_HASH_BITS		27
-#define RHT_BASE_SHIFT		RHT_HASH_BITS
-
-/* Base bits plus 1 bit for nulls marker */
-#define RHT_HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
 
 /* Maximum chain length before rehash
  *
@@ -92,24 +75,14 @@ struct bucket_table {
 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
-static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
-{
-	return NULLS_MARKER(ht->p.nulls_base + hash);
-}
-
 #define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
-	((ptr) = (typeof(ptr)) rht_marker(ht, hash))
+	((ptr) = (typeof(ptr)) NULLS_MARKER(0))
 
 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
 {
 	return ((unsigned long) ptr & 1);
 }
 
-static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
-{
-	return ((unsigned long) ptr) >> 1;
-}
-
 static inline void *rht_obj(const struct rhashtable *ht,
 			    const struct rhash_head *he)
 {
@@ -119,7 +92,7 @@ static inline void *rht_obj(const struct rhashtable *ht,
 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
 					    unsigned int hash)
 {
-	return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
+	return hash & (tbl->size - 1);
 }
 
 static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c9fafea7dc6e..688693c919be 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -995,7 +995,6 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *	.key_offset = offsetof(struct test_obj, key),
  *	.key_len = sizeof(int),
  *	.hashfn = jhash,
- *	.nulls_base = (1U << RHT_BASE_SHIFT),
  * };
  *
  * Configuration Example 2: Variable length keys
@@ -1029,9 +1028,6 @@ int rhashtable_init(struct rhashtable *ht,
 	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
 
-	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
-		return -EINVAL;
-
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
 	spin_lock_init(&ht->lock);
@@ -1096,10 +1092,6 @@ int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
 {
 	int err;
 
-	/* No rhlist NULLs marking for now. */
-	if (params->nulls_base)
-		return -EINVAL;
-
 	err = rhashtable_init(&hlt->ht, params);
 	hlt->ht.rhlist = true;
 	return err;
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 6ca59ffcacbe..82ac39ce5310 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -83,7 +83,7 @@ static u32 my_hashfn(const void *data, u32 len, u32 seed)
 {
 	const struct test_obj_rhl *obj = data;
 
-	return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+	return (obj->value.id % 10);
 }
 
 static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
@@ -99,7 +99,6 @@ static struct rhashtable_params test_rht_params = {
 	.key_offset = offsetof(struct test_obj, value),
 	.key_len = sizeof(struct test_obj_val),
 	.hashfn = jhash,
-	.nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
 static struct rhashtable_params test_rht_params_dup = {
@@ -296,8 +295,6 @@ static int __init test_rhltable(unsigned int entries)
 	if (!obj_in_table)
 		goto out_free;
 
-	/* nulls_base not supported in rhlist interface */
-	test_rht_params.nulls_base = 0;
 	err = rhltable_init(&rhlt, &test_rht_params);
 	if (WARN_ON(err))
 		goto out_free;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9d1f22072d5d..31c58719b5a9 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -45,8 +45,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
 	BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
 		     != sizeof(u32));
 
-	/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
-	return key << RHT_HASH_RESERVED_SPACE;
+	/* Use cyclic increasing ID as direct hash key */
+	return key;
 }
 
 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
-- 
cgit v1.2.3


From 9b4f64a227b6f462482a8cc68c7134dc6e26f1c1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: simplify INIT_RHT_NULLS_HEAD()

The 'ht' and 'hash' arguments to INIT_RHT_NULLS_HEAD() are
no longer used - so drop them.  This allows us to also
remove the nhash argument from nested_table_alloc().

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 +-
 lib/rhashtable.c           | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index d9f719af7936..3f3a182bd0b4 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -75,7 +75,7 @@ struct bucket_table {
 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
-#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
+#define INIT_RHT_NULLS_HEAD(ptr)	\
 	((ptr) = (typeof(ptr)) NULLS_MARKER(0))
 
 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 688693c919be..a81cd27d518c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -116,8 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head)
 
 static union nested_table *nested_table_alloc(struct rhashtable *ht,
 					      union nested_table __rcu **prev,
-					      unsigned int shifted,
-					      unsigned int nhash)
+					      unsigned int shifted)
 {
 	union nested_table *ntbl;
 	int i;
@@ -130,8 +129,7 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
 
 	if (ntbl && shifted) {
 		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
-			INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
-					    (i << shifted) | nhash);
+			INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
 	}
 
 	rcu_assign_pointer(*prev, ntbl);
@@ -157,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
 		return NULL;
 
 	if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
-				0, 0)) {
+				0)) {
 		kfree(tbl);
 		return NULL;
 	}
@@ -207,7 +205,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	tbl->hash_rnd = get_random_u32();
 
 	for (i = 0; i < nbuckets; i++)
-		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
+		INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
 
 	return tbl;
 }
@@ -1217,7 +1215,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
 	nhash = index;
 	shifted = tbl->nest;
 	ntbl = nested_table_alloc(ht, &ntbl[index].table,
-				  size <= (1 << shift) ? shifted : 0, nhash);
+				  size <= (1 << shift) ? shifted : 0);
 
 	while (ntbl && size > (1 << shift)) {
 		index = hash & ((1 << shift) - 1);
@@ -1226,8 +1224,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
 		nhash |= index << shifted;
 		shifted += shift;
 		ntbl = nested_table_alloc(ht, &ntbl[index].table,
-					  size <= (1 << shift) ? shifted : 0,
-					  nhash);
+					  size <= (1 << shift) ? shifted : 0);
 	}
 
 	if (!ntbl)
-- 
cgit v1.2.3


From c0690016a73fe6bd456887bbbe6e10c7f0096554 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: clean up dereference of ->future_tbl.

Using rht_dereference_bucket() to dereference
->future_tbl looks like a type error, and could be confusing.
Using rht_dereference_rcu() to test a pointer for NULL
adds an unnecessary barrier - rcu_access_pointer() is preferred
for NULL tests when no lock is held.

This uses 3 different ways to access ->future_tbl.
- if we know the mutex is held, use rht_dereference()
- if we don't hold the mutex, and are only testing for NULL,
  use rcu_access_pointer()
- otherwise (using RCU protection for true dereference),
  use rht_dereference_rcu().

Note that this includes a simplification of the call to
rhashtable_last_table() - we don't do an extra dereference
before the call any more.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 +-
 lib/rhashtable.c           | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 3f3a182bd0b4..eb7111039247 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -595,7 +595,7 @@ static inline void *__rhashtable_insert_fast(
 	lock = rht_bucket_lock(tbl, hash);
 	spin_lock_bh(lock);
 
-	if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) {
+	if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
 slow_path:
 		spin_unlock_bh(lock);
 		rcu_read_unlock();
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 52ec83212856..0e04947b7e0c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -226,8 +226,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-	struct bucket_table *new_tbl = rhashtable_last_table(ht,
-		rht_dereference_rcu(old_tbl->future_tbl, ht));
+	struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
 	struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
 	int err = -EAGAIN;
 	struct rhash_head *head, *next, *entry;
@@ -467,7 +466,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
 
 fail:
 	/* Do not fail the insert if someone else did a rehash. */
-	if (likely(rcu_dereference_raw(tbl->future_tbl)))
+	if (likely(rcu_access_pointer(tbl->future_tbl)))
 		return 0;
 
 	/* Schedule async rehash to retry allocation in process context. */
@@ -540,7 +539,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
 		return ERR_CAST(data);
 
-	new_tbl = rcu_dereference(tbl->future_tbl);
+	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	if (new_tbl)
 		return new_tbl;
 
@@ -599,7 +598,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 			break;
 
 		spin_unlock_bh(lock);
-		tbl = rcu_dereference(tbl->future_tbl);
+		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	}
 
 	data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
-- 
cgit v1.2.3


From 3f6c65d6255a872846c44182c82c78d3dc6239f5 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 19 Jun 2018 21:42:50 -0700
Subject: tcp: ignore rcv_rtt sample with old ts ecr value

When receiving multiple packets with the same ts ecr value, only try
to compute rcv_rtt sample with the earliest received packet.
This is because the rcv_rtt calculated by later received packets
could possibly include long idle time or other types of delay.
For example:
(1) server sends last packet of reply with TS val V1
(2) client ACKs last packet of reply with TS ecr V1
(3) long idle time passes
(4) client sends next request data packet with TS ecr V1 (again!)
At this time, the rcv_rtt computed on server with TS ecr V1 will be
inflated with the idle time and should get ignored.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  1 +
 net/ipv4/tcp.c       |  1 +
 net/ipv4/tcp_input.c | 14 +++++++++++---
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 72705eaf4b84..3dbea6610304 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -350,6 +350,7 @@ struct tcp_sock {
 #endif
 
 /* Receiver side RTT estimation */
+	u32 rcv_rtt_last_tsecr;
 	struct {
 		u32	rtt_us;
 		u32	seq;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 141acd92e58a..47c45d5be9f9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2563,6 +2563,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_shutdown = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->srtt_us = 0;
+	tp->rcv_rtt_last_tsecr = 0;
 	tp->write_seq += tp->max_window + 2;
 	if (tp->write_seq == 0)
 		tp->write_seq = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 355d3dffd021..76ca88f63b70 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,9 +582,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->rx_opt.rcv_tsecr &&
-	    (TCP_SKB_CB(skb)->end_seq -
-	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+	if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
+		return;
+	tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
+
+	if (TCP_SKB_CB(skb)->end_seq -
+	    TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
 		u32 delta_us;
 
@@ -5475,6 +5478,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 				tcp_ack(sk, skb, 0);
 				__kfree_skb(skb);
 				tcp_data_snd_check(sk);
+				/* When receiving pure ack in fast path, update
+				 * last ts ecr directly instead of calling
+				 * tcp_rcv_rtt_measure_ts()
+				 */
+				tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
 				return;
 			} else { /* Header too small */
 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-- 
cgit v1.2.3


From 63a67a926e214dac94e29147c0f3d11499f655a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2018 17:16:44 -0400
Subject: kill dentry_update_name_case()

the last user is gone

Spotted-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 27 ---------------------------
 include/linux/dcache.h |  2 --
 2 files changed, 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..d9323af88ed0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2676,33 +2676,6 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
 }
 EXPORT_SYMBOL(d_exact_alias);
 
-/**
- * dentry_update_name_case - update case insensitive dentry with a new name
- * @dentry: dentry to be updated
- * @name: new name
- *
- * Update a case insensitive dentry with new case of name.
- *
- * dentry must have been returned by d_lookup with name @name. Old and new
- * name lengths must match (ie. no d_compare which allows mismatched name
- * lengths).
- *
- * Parent inode i_mutex must be held over d_lookup and into this call (to
- * keep renames and concurrent inserts, and readdir(2) away).
- */
-void dentry_update_name_case(struct dentry *dentry, const struct qstr *name)
-{
-	BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
-	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
-
-	spin_lock(&dentry->d_lock);
-	write_seqcount_begin(&dentry->d_seq);
-	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
-	write_seqcount_end(&dentry->d_seq);
-	spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(dentry_update_name_case);
-
 static void swap_names(struct dentry *dentry, struct dentry *target)
 {
 	if (unlikely(dname_external(target))) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..cee70bf207fc 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -271,8 +271,6 @@ extern void d_rehash(struct dentry *);
  
 extern void d_add(struct dentry *, struct inode *);
 
-extern void dentry_update_name_case(struct dentry *, const struct qstr *);
-
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);
 extern void d_exchange(struct dentry *, struct dentry *);
-- 
cgit v1.2.3


From d0dd63a8aee1ef89f2e48e554b796b9f9e4fcadb Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 16 Jun 2018 22:11:42 -0700
Subject: time: Introduce struct __kernel_itimerspec

struct itimerspec is not y2038-safe.

Introduce a new struct __kernel_itimerspec based on the kernel internal
y2038-safe struct itimerspec64.

The definition of struct __kernel_itimerspec includes two struct
__kernel_timespec.

Since struct __kernel_timespec has the same representation in native and
compat modes, so does struct __kernel_itimerspec. This helps have a common
entry point for syscalls using struct __kernel_itimerspec.

New y2038-safe syscalls will use this new type. Since most of the new
syscalls are just an update to the native syscalls with the type update,
place the new definition under CONFIG_64BIT_TIME. This helps architectures
that do not support the above config to keep using the old definition of
struct itimerspec.

Also change the get/put_itimerspec64 to use struct__kernel_itimerspec.
This will help 32 bit architectures to use the new syscalls when
architectures select CONFIG_64BIT_TIME.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: y2038@lists.linaro.org
Link: https://lkml.kernel.org/r/20180617051144.29756-2-deepa.kernel@gmail.com
---
 include/linux/time.h      | 4 ++--
 include/linux/time64.h    | 1 +
 include/uapi/linux/time.h | 7 +++++++
 kernel/time/time.c        | 4 ++--
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index aed74463592d..27d83fd2ae61 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -14,9 +14,9 @@ int get_timespec64(struct timespec64 *ts,
 int put_timespec64(const struct timespec64 *ts,
 		struct __kernel_timespec __user *uts);
 int get_itimerspec64(struct itimerspec64 *it,
-			const struct itimerspec __user *uit);
+			const struct __kernel_itimerspec __user *uit);
 int put_itimerspec64(const struct itimerspec64 *it,
-			struct itimerspec __user *uit);
+			struct __kernel_itimerspec __user *uit);
 
 extern time64_t mktime64(const unsigned int year, const unsigned int mon,
 			const unsigned int day, const unsigned int hour,
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 0a7b2f79cec7..05634afba0db 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -12,6 +12,7 @@ typedef __u64 timeu64_t;
  */
 #ifndef CONFIG_64BIT_TIME
 #define __kernel_timespec timespec
+#define __kernel_itimerspec itimerspec
 #endif
 
 #include <uapi/linux/time.h>
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index fcf936656493..6b56a2208be7 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -49,6 +49,13 @@ struct __kernel_timespec {
 };
 #endif
 
+#ifndef __kernel_itimerspec
+struct __kernel_itimerspec {
+	struct __kernel_timespec it_interval;    /* timer period */
+	struct __kernel_timespec it_value;       /* timer expiration */
+};
+#endif
+
 /*
  * legacy timeval structure, only embedded in structures that
  * traditionally used 'timeval' to pass time intervals (not absolute
diff --git a/kernel/time/time.c b/kernel/time/time.c
index b1225db61eb2..c0195225fdce 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -927,7 +927,7 @@ int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
 EXPORT_SYMBOL_GPL(compat_put_timespec64);
 
 int get_itimerspec64(struct itimerspec64 *it,
-			const struct itimerspec __user *uit)
+			const struct __kernel_itimerspec __user *uit)
 {
 	int ret;
 
@@ -942,7 +942,7 @@ int get_itimerspec64(struct itimerspec64 *it,
 EXPORT_SYMBOL_GPL(get_itimerspec64);
 
 int put_itimerspec64(const struct itimerspec64 *it,
-			struct itimerspec __user *uit)
+			struct __kernel_itimerspec __user *uit)
 {
 	int ret;
 
-- 
cgit v1.2.3


From afef05cf238cfcecdc5ea9b06f31027b13ce6214 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 16 Jun 2018 22:11:43 -0700
Subject: time: Enable get/put_compat_itimerspec64 always

This will aid in enabling the compat syscalls on 32-bit architectures later
on.

Also move compat_itimerspec and related defines to compat_time.h.  The
compat_time.h file will eventually be deleted.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: y2038@lists.linaro.org
Link: https://lkml.kernel.org/r/20180617051144.29756-3-deepa.kernel@gmail.com
---
 include/linux/compat.h      |  9 ---------
 include/linux/compat_time.h |  9 +++++++++
 kernel/compat.c             | 29 -----------------------------
 kernel/time/time.c          | 21 +++++++++++++++++++++
 4 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index b1a5562b3215..18a13f2df14b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -109,11 +109,6 @@ typedef	compat_ulong_t		compat_aio_context_t;
 struct compat_sel_arg_struct;
 struct rusage;
 
-struct compat_itimerspec {
-	struct compat_timespec it_interval;
-	struct compat_timespec it_value;
-};
-
 struct compat_utimbuf {
 	compat_time_t		actime;
 	compat_time_t		modtime;
@@ -294,10 +289,6 @@ extern int compat_get_timespec(struct timespec *, const void __user *);
 extern int compat_put_timespec(const struct timespec *, void __user *);
 extern int compat_get_timeval(struct timeval *, const void __user *);
 extern int compat_put_timeval(const struct timeval *, void __user *);
-extern int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits);
-extern int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits);
 
 struct compat_iovec {
 	compat_uptr_t	iov_base;
diff --git a/include/linux/compat_time.h b/include/linux/compat_time.h
index 31f2774f1994..e70bfd1d2c3f 100644
--- a/include/linux/compat_time.h
+++ b/include/linux/compat_time.h
@@ -17,7 +17,16 @@ struct compat_timeval {
 	s32		tv_usec;
 };
 
+struct compat_itimerspec {
+	struct compat_timespec it_interval;
+	struct compat_timespec it_value;
+};
+
 extern int compat_get_timespec64(struct timespec64 *, const void __user *);
 extern int compat_put_timespec64(const struct timespec64 *, void __user *);
+extern int get_compat_itimerspec64(struct itimerspec64 *its,
+			const struct compat_itimerspec __user *uits);
+extern int put_compat_itimerspec64(const struct itimerspec64 *its,
+			struct compat_itimerspec __user *uits);
 
 #endif /* _LINUX_COMPAT_TIME_H */
diff --git a/kernel/compat.c b/kernel/compat.c
index 702aa846ddac..8e40efc2928a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -324,35 +324,6 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t,  pid, unsigned int, len,
 	return ret;
 }
 
-/* Todo: Delete these extern declarations when get/put_compat_itimerspec64()
- * are moved to kernel/time/time.c .
- */
-extern int __compat_get_timespec64(struct timespec64 *ts64,
-				   const struct compat_timespec __user *cts);
-extern int __compat_put_timespec64(const struct timespec64 *ts64,
-				   struct compat_timespec __user *cts);
-
-int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits)
-{
-
-	if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_get_timespec64(&its->it_value, &uits->it_value))
-		return -EFAULT;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
-
-int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits)
-{
-	if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_put_timespec64(&its->it_value, &uits->it_value))
-		return -EFAULT;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
-
 /*
  * We currently only need the following fields from the sigevent
  * structure: sigev_value, sigev_signo, sig_notify and (sometimes
diff --git a/kernel/time/time.c b/kernel/time/time.c
index c0195225fdce..72caf051901c 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -955,3 +955,24 @@ int put_itimerspec64(const struct itimerspec64 *it,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(put_itimerspec64);
+
+int get_compat_itimerspec64(struct itimerspec64 *its,
+			const struct compat_itimerspec __user *uits)
+{
+
+	if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
+	    __compat_get_timespec64(&its->it_value, &uits->it_value))
+		return -EFAULT;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
+
+int put_compat_itimerspec64(const struct itimerspec64 *its,
+			struct compat_itimerspec __user *uits)
+{
+	if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
+	    __compat_put_timespec64(&its->it_value, &uits->it_value))
+		return -EFAULT;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
-- 
cgit v1.2.3


From 6ff84735070276d72af716e21c3214ee20d60e70 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 16 Jun 2018 22:11:44 -0700
Subject: time: Change types to new y2038 safe __kernel_itimerspec

timer_set/gettime and timerfd_set/get apis use struct itimerspec at the
user interface layer.  struct itimerspec is not y2038-safe.  Change these
interfaces to use y2038-safe struct __kernel_itimerspec instead.  This will
help define new syscalls when 32bit architectures select CONFIG_64BIT_TIME.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: y2038@lists.linaro.org
Link: https://lkml.kernel.org/r/20180617051144.29756-4-deepa.kernel@gmail.com
---
 fs/timerfd.c               |  8 ++++----
 include/linux/syscalls.h   | 10 +++++-----
 kernel/time/posix-timers.c | 12 +++++++-----
 3 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index d84a2bee4f82..8bb926253f88 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
 }
 
 SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
-		const struct itimerspec __user *, utmr,
-		struct itimerspec __user *, otmr)
+		const struct __kernel_itimerspec __user *, utmr,
+		struct __kernel_itimerspec __user *, otmr)
 {
 	struct itimerspec64 new, old;
 	int ret;
@@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	return ret;
 }
 
-SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
+SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr)
 {
 	struct itimerspec64 kotmr;
 	int ret = do_timerfd_gettime(ufd, &kotmr);
@@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 	return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 		const struct compat_itimerspec __user *, utmr,
 		struct compat_itimerspec __user *, otmr)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 73810808cdf2..38b9ec152024 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -501,9 +501,9 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
 /* fs/timerfd.c */
 asmlinkage long sys_timerfd_create(int clockid, int flags);
 asmlinkage long sys_timerfd_settime(int ufd, int flags,
-				    const struct itimerspec __user *utmr,
-				    struct itimerspec __user *otmr);
-asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
+				    const struct __kernel_itimerspec __user *utmr,
+				    struct __kernel_itimerspec __user *otmr);
+asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr);
 
 /* fs/utimes.c */
 asmlinkage long sys_utimensat(int dfd, const char __user *filename,
@@ -568,10 +568,10 @@ asmlinkage long sys_timer_create(clockid_t which_clock,
 				 struct sigevent __user *timer_event_spec,
 				 timer_t __user * created_timer_id);
 asmlinkage long sys_timer_gettime(timer_t timer_id,
-				struct itimerspec __user *setting);
+				struct __kernel_itimerspec __user *setting);
 asmlinkage long sys_timer_getoverrun(timer_t timer_id);
 asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
-				const struct itimerspec __user *new_setting,
+				const struct __kernel_itimerspec __user *new_setting,
 				struct itimerspec __user *old_setting);
 asmlinkage long sys_timer_delete(timer_t timer_id);
 asmlinkage long sys_clock_settime(clockid_t which_clock,
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index fcf90a10c43a..80d59333c76e 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -743,7 +743,7 @@ static int do_timer_gettime(timer_t timer_id,  struct itimerspec64 *setting)
 
 /* Get the time remaining on a POSIX.1b interval timer. */
 SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
-		struct itimerspec __user *, setting)
+		struct __kernel_itimerspec __user *, setting)
 {
 	struct itimerspec64 cur_setting;
 
@@ -755,7 +755,8 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
+
 COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 		       struct compat_itimerspec __user *, setting)
 {
@@ -768,6 +769,7 @@ COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	}
 	return ret;
 }
+
 #endif
 
 /*
@@ -906,8 +908,8 @@ retry:
 
 /* Set a POSIX.1b interval timer */
 SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
-		const struct itimerspec __user *, new_setting,
-		struct itimerspec __user *, old_setting)
+		const struct __kernel_itimerspec __user *, new_setting,
+		struct __kernel_itimerspec __user *, old_setting)
 {
 	struct itimerspec64 new_spec, old_spec;
 	struct itimerspec64 *rtn = old_setting ? &old_spec : NULL;
@@ -927,7 +929,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 	return error;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 		       struct compat_itimerspec __user *, new,
 		       struct compat_itimerspec __user *, old)
-- 
cgit v1.2.3


From faef87723ace12e5f685437c1e68cbecb69a7a89 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 15 Jun 2018 13:08:51 +0200
Subject: dma-noncoherent: add a arch_sync_dma_for_cpu_all hook

The MIPS bmips platform needs a global flush when transferring ownership
back to the CPU.  Add a hook for that to the dma-noncoherent
implementation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Patchwork: https://patchwork.linux-mips.org/patch/19549/
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Tom Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: iommu@lists.linux-foundation.org
Cc: linux-mips@linux-mips.org
---
 include/linux/dma-noncoherent.h | 8 ++++++++
 kernel/dma/noncoherent.c        | 8 ++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 10b2654d549b..a0aa00cc909d 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -44,4 +44,12 @@ static inline void arch_sync_dma_for_cpu(struct device *dev,
 }
 #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
 
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
+void arch_sync_dma_for_cpu_all(struct device *dev);
+#else
+static inline void arch_sync_dma_for_cpu_all(struct device *dev)
+{
+}
+#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */
+
 #endif /* _LINUX_DMA_NONCOHERENT_H */
diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c
index 79e9a757387f..031fe235d958 100644
--- a/kernel/dma/noncoherent.c
+++ b/kernel/dma/noncoherent.c
@@ -49,11 +49,13 @@ static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
 	return nents;
 }
 
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
 static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
 		dma_addr_t addr, size_t size, enum dma_data_direction dir)
 {
 	arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
+	arch_sync_dma_for_cpu_all(dev);
 }
 
 static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
@@ -64,6 +66,7 @@ static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
 
 	for_each_sg(sgl, sg, nents, i)
 		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+	arch_sync_dma_for_cpu_all(dev);
 }
 
 static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
@@ -89,7 +92,8 @@ const struct dma_map_ops dma_noncoherent_ops = {
 	.sync_sg_for_device	= dma_noncoherent_sync_sg_for_device,
 	.map_page		= dma_noncoherent_map_page,
 	.map_sg			= dma_noncoherent_map_sg,
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
 	.sync_single_for_cpu	= dma_noncoherent_sync_single_for_cpu,
 	.sync_sg_for_cpu	= dma_noncoherent_sync_sg_for_cpu,
 	.unmap_page		= dma_noncoherent_unmap_page,
-- 
cgit v1.2.3


From 996302c5e85650722f1e5aeaeaaac12f9f362bf8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 24 Jun 2018 00:37:44 +0900
Subject: module: replace VMLINUX_SYMBOL_STR() with __stringify() or string
 literal

With the special case handling for Blackfin and Metag was removed by
commit 94e58e0ac312 ("export.h: remove code for prefixing symbols with
underscore"), VMLINUX_SYMBOL_STR() is now equivalent to __stringify().

Replace the remaining usages to prepare for the entire removal of
VMLINUX_SYMBOL_STR().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 include/linux/module.h | 4 ++--
 kernel/module.c        | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index d44df9b2c131..f807f15bebbe 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -266,7 +266,7 @@ extern int modules_disabled; /* for sysctl */
 /* Get/put a kernel symbol (calls must be symmetric) */
 void *__symbol_get(const char *symbol);
 void *__symbol_get_gpl(const char *symbol);
-#define symbol_get(x) ((typeof(&x))(__symbol_get(VMLINUX_SYMBOL_STR(x))))
+#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x))))
 
 /* modules using other modules: kdb wants to see this. */
 struct module_use {
@@ -575,7 +575,7 @@ extern void __noreturn __module_put_and_exit(struct module *mod,
 #ifdef CONFIG_MODULE_UNLOAD
 int module_refcount(struct module *mod);
 void __symbol_put(const char *symbol);
-#define symbol_put(x) __symbol_put(VMLINUX_SYMBOL_STR(x))
+#define symbol_put(x) __symbol_put(__stringify(x))
 void symbol_put_addr(void *addr);
 
 /* Sometimes we know we already have a refcount, and it's easier not
diff --git a/kernel/module.c b/kernel/module.c
index b6b3a3c58af1..ba45a84e4287 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1341,14 +1341,12 @@ static inline int check_modstruct_version(const struct load_info *info,
 	 * locking is necessary -- use preempt_disable() to placate lockdep.
 	 */
 	preempt_disable();
-	if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
-			 &crc, true, false)) {
+	if (!find_symbol("module_layout", NULL, &crc, true, false)) {
 		preempt_enable();
 		BUG();
 	}
 	preempt_enable();
-	return check_version(info, VMLINUX_SYMBOL_STR(module_layout),
-			     mod, crc);
+	return check_version(info, "module_layout", mod, crc);
 }
 
 /* First part is kernel version, which we ignore if module has crcs. */
-- 
cgit v1.2.3


From 6136f97cd2dd2b5a88d62bd73cc663a803124c09 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Thu, 21 Jun 2018 19:27:17 -0700
Subject: HID: i2c-hid: Add vddl regulator control

Some wacom w9013 devices have a vddl supply for "low valtage"
requirements. Add support in this driver to turn on this low voltage
supply. We can also drop a handful of error messages because the
regulator core is already printing an error when bulk regulators fail to
enable or disable.

Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Dmitry Torokhov <dtor@chromium.org>
Cc: Doug Anderson <dianders@chromium.org>
Acked-by: Rob Herring <robh@kernel.org>
Cc: <devicetree@vger.kernel.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 .../devicetree/bindings/input/hid-over-i2c.txt     |  3 +-
 drivers/hid/i2c-hid/i2c-hid.c                      | 48 +++++++++++-----------
 include/linux/platform_data/i2c-hid.h              |  7 ++--
 3 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/input/hid-over-i2c.txt b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
index 4d3da9d91de4..89e6ab89ba38 100644
--- a/Documentation/devicetree/bindings/input/hid-over-i2c.txt
+++ b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
@@ -26,7 +26,8 @@ device-specific compatible properties, which should be used in addition to the
 
 - compatible:
   * "wacom,w9013" (Wacom W9013 digitizer). Supports:
-    - vdd-supply
+    - vdd-supply (3.3V)
+    - vddl-supply (1.8V)
     - post-power-on-delay-ms
 
 - vdd-supply: phandle of the regulator that provides the supply voltage.
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index c7d6738dc524..4f532d9238fb 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1021,21 +1021,20 @@ static int i2c_hid_probe(struct i2c_client *client,
 	/* Parse platform agnostic common properties from ACPI / device tree */
 	i2c_hid_fwnode_probe(client, &ihid->pdata);
 
-	ihid->pdata.supply = devm_regulator_get(&client->dev, "vdd");
-	if (IS_ERR(ihid->pdata.supply)) {
-		ret = PTR_ERR(ihid->pdata.supply);
-		if (ret != -EPROBE_DEFER)
-			dev_err(&client->dev, "Failed to get regulator: %d\n",
-				ret);
-		goto err;
-	}
+	ihid->pdata.supplies[0].supply = "vdd";
+	ihid->pdata.supplies[1].supply = "vddl";
+
+	ret = devm_regulator_bulk_get(&client->dev,
+				      ARRAY_SIZE(ihid->pdata.supplies),
+				      ihid->pdata.supplies);
+	if (ret)
+		return ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ihid->pdata.supplies),
+				    ihid->pdata.supplies);
+	if (ret < 0)
+		return ret;
 
-	ret = regulator_enable(ihid->pdata.supply);
-	if (ret < 0) {
-		dev_err(&client->dev, "Failed to enable regulator: %d\n",
-			ret);
-		goto err;
-	}
 	if (ihid->pdata.post_power_delay_ms)
 		msleep(ihid->pdata.post_power_delay_ms);
 
@@ -1122,9 +1121,8 @@ err_pm:
 	pm_runtime_disable(&client->dev);
 
 err_regulator:
-	regulator_disable(ihid->pdata.supply);
-
-err:
+	regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
+			       ihid->pdata.supplies);
 	i2c_hid_free_buffers(ihid);
 	return ret;
 }
@@ -1147,7 +1145,8 @@ static int i2c_hid_remove(struct i2c_client *client)
 	if (ihid->bufsize)
 		i2c_hid_free_buffers(ihid);
 
-	regulator_disable(ihid->pdata.supply);
+	regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
+			       ihid->pdata.supplies);
 
 	return 0;
 }
@@ -1198,9 +1197,8 @@ static int i2c_hid_suspend(struct device *dev)
 			hid_warn(hid, "Failed to enable irq wake: %d\n",
 				wake_status);
 	} else {
-		ret = regulator_disable(ihid->pdata.supply);
-		if (ret < 0)
-			hid_warn(hid, "Failed to disable supply: %d\n", ret);
+		regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
+				       ihid->pdata.supplies);
 	}
 
 	return 0;
@@ -1215,9 +1213,11 @@ static int i2c_hid_resume(struct device *dev)
 	int wake_status;
 
 	if (!device_may_wakeup(&client->dev)) {
-		ret = regulator_enable(ihid->pdata.supply);
-		if (ret < 0)
-			hid_warn(hid, "Failed to enable supply: %d\n", ret);
+		ret = regulator_bulk_enable(ARRAY_SIZE(ihid->pdata.supplies),
+					    ihid->pdata.supplies);
+		if (ret)
+			hid_warn(hid, "Failed to enable supplies: %d\n", ret);
+
 		if (ihid->pdata.post_power_delay_ms)
 			msleep(ihid->pdata.post_power_delay_ms);
 	} else if (ihid->irq_wake_enabled) {
diff --git a/include/linux/platform_data/i2c-hid.h b/include/linux/platform_data/i2c-hid.h
index 1fb088239d12..c628bb5e1061 100644
--- a/include/linux/platform_data/i2c-hid.h
+++ b/include/linux/platform_data/i2c-hid.h
@@ -12,14 +12,13 @@
 #ifndef __LINUX_I2C_HID_H
 #define __LINUX_I2C_HID_H
 
+#include <linux/regulator/consumer.h>
 #include <linux/types.h>
 
-struct regulator;
-
 /**
  * struct i2chid_platform_data - used by hid over i2c implementation.
  * @hid_descriptor_address: i2c register where the HID descriptor is stored.
- * @supply: regulator for powering on the device.
+ * @supplies: regulators for powering on the device.
  * @post_power_delay_ms: delay after powering on before device is usable.
  *
  * Note that it is the responsibility of the platform driver (or the acpi 5.0
@@ -35,7 +34,7 @@ struct regulator;
  */
 struct i2c_hid_platform_data {
 	u16 hid_descriptor_address;
-	struct regulator *supply;
+	struct regulator_bulk_data supplies[2];
 	int post_power_delay_ms;
 };
 
-- 
cgit v1.2.3


From 8f732850df1b2b4d8d719f7e606dfb3050e7ea11 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 31 May 2018 13:49:29 +0200
Subject: HID: core: allow concurrent registration of drivers

Detected on the Dell XPS 9365.

The laptop has 2 devices that benefit from the hid-generic auto-unbinding.
When those 2 devices are presented to the userspace, udev loads both wacom and
hid-multitouch. When this happens, the code in __hid_bus_reprobe_drivers() is
called concurrently and the second device gets reprobed twice.

An other bug in the power_supply subsystem prevent to remove the wacom driver
if it just finished its initialization, which basically kills the wacom node.

[jkosina@suse.cz: reformat changelog a bit]
Fixes c17a7476e4c4 ("HID: core: rewrite the hid-generic automatic unbind")
Cc: stable@vger.kernel.org # v4.17
Tested-by: Mario Limonciello <mario.limonciello@dell.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 5 ++++-
 include/linux/hid.h    | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 355dc7e49562..a460ec147aee 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1949,6 +1949,8 @@ static int hid_device_probe(struct device *dev)
 	}
 	hdev->io_started = false;
 
+	clear_bit(ffs(HID_STAT_REPROBED), &hdev->status);
+
 	if (!hdev->driver) {
 		id = hid_match_device(hdev, hdrv);
 		if (id == NULL) {
@@ -2212,7 +2214,8 @@ static int __hid_bus_reprobe_drivers(struct device *dev, void *data)
 	struct hid_device *hdev = to_hid_device(dev);
 
 	if (hdev->driver == hdrv &&
-	    !hdrv->match(hdev, hid_ignore_special_drivers))
+	    !hdrv->match(hdev, hid_ignore_special_drivers) &&
+	    !test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status))
 		return device_reprobe(dev);
 
 	return 0;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 41a3d5775394..773bcb1d4044 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -511,6 +511,7 @@ struct hid_output_fifo {
 #define HID_STAT_ADDED		BIT(0)
 #define HID_STAT_PARSED		BIT(1)
 #define HID_STAT_DUP_DETECTED	BIT(2)
+#define HID_STAT_REPROBED	BIT(3)
 
 struct hid_input {
 	struct list_head list;
@@ -579,7 +580,7 @@ struct hid_device {							/* device report descriptor */
 	bool battery_avoid_query;
 #endif
 
-	unsigned int status;						/* see STAT flags above */
+	unsigned long status;						/* see STAT flags above */
 	unsigned claimed;						/* Claimed by hidinput, hiddev? */
 	unsigned quirks;						/* Various quirks the device can pull on us */
 	bool io_started;						/* If IO has started */
-- 
cgit v1.2.3


From 379cacc5e566f7197bdeb1ea3e99219d3e880c0a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 8 Jun 2018 16:59:37 -0400
Subject: USB: Report wakeup events on root-hub ports

When a USB device attached to a root-hub port sends a wakeup request
to a sleeping system, we do not report the wakeup event to the PM
core.  This is because a system resume involves waking up all
suspended USB ports as quickly as possible; without the normal
USB_RESUME_TIMEOUT delay, the host controller driver doesn't set the
USB_PORT_STAT_C_SUSPEND flag and so usb_port_resume() doesn't realize
that a wakeup request was received.

However, some environments (such as Chrome OS) want to have all wakeup
events reported so they can be ascribed to the appropriate device.  To
accommodate these environments, this patch adds a new routine to the
hub driver and a corresponding new HCD method to be used when a root
hub resumes.  The HCD method returns a bitmap of ports that have
initiated a wakeup signal but not yet completed resuming.  The hub
driver can then report to the PM core that the child devices attached
to these ports initiated a wakeup event.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Suggested-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c  | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/hcd.h |  1 +
 2 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index fcae521df29b..fef5af7aab92 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3656,12 +3656,54 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
 	return 0;
 }
 
+/* Report wakeup requests from the ports of a resuming root hub */
+static void report_wakeup_requests(struct usb_hub *hub)
+{
+	struct usb_device	*hdev = hub->hdev;
+	struct usb_device	*udev;
+	struct usb_hcd		*hcd;
+	unsigned long		resuming_ports;
+	int			i;
+
+	if (hdev->parent)
+		return;		/* Not a root hub */
+
+	hcd = bus_to_hcd(hdev->bus);
+	if (hcd->driver->get_resuming_ports) {
+
+		/*
+		 * The get_resuming_ports() method returns a bitmap (origin 0)
+		 * of ports which have started wakeup signaling but have not
+		 * yet finished resuming.  During system resume we will
+		 * resume all the enabled ports, regardless of any wakeup
+		 * signals, which means the wakeup requests would be lost.
+		 * To prevent this, report them to the PM core here.
+		 */
+		resuming_ports = hcd->driver->get_resuming_ports(hcd);
+		for (i = 0; i < hdev->maxchild; ++i) {
+			if (test_bit(i, &resuming_ports)) {
+				udev = hub->ports[i]->child;
+				if (udev)
+					pm_wakeup_event(&udev->dev, 0);
+			}
+		}
+	}
+}
+
 static int hub_resume(struct usb_interface *intf)
 {
 	struct usb_hub *hub = usb_get_intfdata(intf);
 
 	dev_dbg(&intf->dev, "%s\n", __func__);
 	hub_activate(hub, HUB_RESUME);
+
+	/*
+	 * This should be called only for system resume, not runtime resume.
+	 * We can't tell the difference here, so some wakeup requests will be
+	 * reported at the wrong time or more than once.  This shouldn't
+	 * matter much, so long as they do get reported.
+	 */
+	report_wakeup_requests(hub);
 	return 0;
 }
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 34a6ded6f319..97e2ddec18b1 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -322,6 +322,7 @@ struct hc_driver {
 	int	(*bus_suspend)(struct usb_hcd *);
 	int	(*bus_resume)(struct usb_hcd *);
 	int	(*start_port_reset)(struct usb_hcd *, unsigned port_num);
+	unsigned long	(*get_resuming_ports)(struct usb_hcd *);
 
 		/* force handover of high-speed port to full-speed companion */
 	void	(*relinquish_port)(struct usb_hcd *, int);
-- 
cgit v1.2.3


From fd55a281adf6c9b9723d369203fdd92e5ea62a09 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Apr 2018 10:06:03 -0700
Subject: srcu: Introduce srcu_read_{un,}lock_notrace()

Joel Fernandes is using SRCU to protect from-idle tracepoints, which
requires notrace variants of srcu_read_lock() and srcu_read_unlock()
in order to avoid problems with tracepoints in lockdep.  This commit
therefore adds srcu_read_lock_notrace() and srcu_read_unlock_notrace().

[1] http://lkml.kernel.org/r/20180427042656.190746-1-joelaf@google.com

Reported-by: Joel Fernandes <joelaf@google.com>
Intermittently-reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 91494d7e8e41..3e72a291c401 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	return retval;
 }
 
+/* Used by tracing, cannot be traced and cannot invoke lockdep. */
+static inline notrace int
+srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
+{
+	int retval;
+
+	retval = __srcu_read_lock(sp);
+	return retval;
+}
+
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
  * @sp: srcu_struct in which to unregister the old reader.
@@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__srcu_read_unlock(sp, idx);
 }
 
+/* Used by tracing, cannot be traced and cannot call lockdep. */
+static inline notrace void
+srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
+{
+	__srcu_read_unlock(sp, idx);
+}
+
 /**
  * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
  *
-- 
cgit v1.2.3


From 90127d605f403d814f4986436871210bf8ceb335 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 9 May 2018 10:29:18 -0700
Subject: torture: Make online/offline messages appear only for verbose=2

Some bugs reproduce quickly only at high CPU-hotplug rates, so the
rcutorture TREE03 scenario now has only 200 milliseconds spacing between
CPU-hotplug operations.  At this rate, the torture-test pair of console
messages per operation becomes a bit voluminous.  This commit therefore
converts the torture-test set of "verbose" kernel-boot arguments from
bool to int, and prints the extra console messages only when verbose=2.
The default is still verbose=1.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h      |  2 +-
 kernel/locking/locktorture.c |  2 +-
 kernel/rcu/rcuperf.c         |  2 +-
 kernel/rcu/rcutorture.c      |  2 +-
 kernel/torture.c             | 12 ++++++------
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 66272862070b..a55e80817dae 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -79,7 +79,7 @@ void stutter_wait(const char *title);
 int torture_stutter_init(int s);
 
 /* Initialization and cleanup. */
-bool torture_init_begin(char *ttype, bool v);
+bool torture_init_begin(char *ttype, int v);
 void torture_init_end(void);
 bool torture_cleanup_begin(void);
 void torture_cleanup_end(void);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8402b3349dca..4a2e13870a9b 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -57,7 +57,7 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
 torture_param(int, stat_interval, 60,
 	     "Number of seconds between stats printk()s");
 torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
-torture_param(bool, verbose, true,
+torture_param(int, verbose, 1,
 	     "Enable verbose debugging printk()s");
 
 static char *torture_type = "spin_lock";
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index e232846516b3..fb8094848906 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,7 +88,7 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
 torture_param(bool, shutdown, !IS_ENABLED(MODULE),
 	      "Shutdown at end of performance tests.");
-torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
+torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 
 static char *perf_type = "rcu";
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 42fcb7f05fac..a5540bd831c4 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -101,7 +101,7 @@ torture_param(int, test_boost_interval, 7,
 	     "Interval between boost tests, seconds.");
 torture_param(bool, test_no_idle_hz, true,
 	     "Test support for tickless idle CPUs");
-torture_param(bool, verbose, true,
+torture_param(int, verbose, 1,
 	     "Enable verbose debugging printk()s");
 
 static char *torture_type = "rcu";
diff --git a/kernel/torture.c b/kernel/torture.c
index 3de1efbecd6a..840fd33c1cda 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -53,7 +53,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
 
 static char *torture_type;
-static bool verbose;
+static int verbose;
 
 /* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
 #define FULLSTOP_DONTSTOP 0	/* Normal operation. */
@@ -98,7 +98,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
 	if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
 		return false;
 
-	if (verbose)
+	if (verbose > 1)
 		pr_alert("%s" TORTURE_FLAG
 			 "torture_onoff task: offlining %d\n",
 			 torture_type, cpu);
@@ -111,7 +111,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
 				 "torture_onoff task: offline %d failed: errno %d\n",
 				 torture_type, cpu, ret);
 	} else {
-		if (verbose)
+		if (verbose > 1)
 			pr_alert("%s" TORTURE_FLAG
 				 "torture_onoff task: offlined %d\n",
 				 torture_type, cpu);
@@ -147,7 +147,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
 	if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
 		return false;
 
-	if (verbose)
+	if (verbose > 1)
 		pr_alert("%s" TORTURE_FLAG
 			 "torture_onoff task: onlining %d\n",
 			 torture_type, cpu);
@@ -160,7 +160,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
 				 "torture_onoff task: online %d failed: errno %d\n",
 				 torture_type, cpu, ret);
 	} else {
-		if (verbose)
+		if (verbose > 1)
 			pr_alert("%s" TORTURE_FLAG
 				 "torture_onoff task: onlined %d\n",
 				 torture_type, cpu);
@@ -647,7 +647,7 @@ static void torture_stutter_cleanup(void)
  * The runnable parameter points to a flag that controls whether or not
  * the test is currently runnable.  If there is no such flag, pass in NULL.
  */
-bool torture_init_begin(char *ttype, bool v)
+bool torture_init_begin(char *ttype, int v)
 {
 	mutex_lock(&fullstop_mutex);
 	if (torture_type != NULL) {
-- 
cgit v1.2.3


From d4546c2509b1e9cd082e3682dcec98472e37ee5a Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 24 Jun 2018 14:13:49 +0900
Subject: net: Convert GRO SKB handling to list_head.

Manage pending per-NAPI GRO packets via list_head.

Return an SKB pointer from the GRO receive handlers.  When GRO receive
handlers return non-NULL, it means that this SKB needs to be completed
at this time and removed from the NAPI queue.

Several operations are greatly simplified by this transformation,
especially timing out the oldest SKB in the list when gro_count
exceeds MAX_GRO_SKBS, and napi_gro_flush() which walks the queue
in reverse order.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c        | 11 ++++----
 drivers/net/vxlan.c         | 11 ++++----
 include/linux/etherdevice.h |  3 +-
 include/linux/netdevice.h   | 32 ++++++++++-----------
 include/linux/skbuff.h      |  3 +-
 include/linux/udp.h         |  4 +--
 include/net/inet_common.h   |  2 +-
 include/net/tcp.h           |  2 +-
 include/net/udp.h           |  4 +--
 include/net/udp_tunnel.h    |  6 ++--
 net/8021q/vlan.c            | 13 +++++----
 net/core/dev.c              | 68 +++++++++++++++++++--------------------------
 net/core/skbuff.c           |  4 +--
 net/ethernet/eth.c          | 12 ++++----
 net/ipv4/af_inet.c          | 12 ++++----
 net/ipv4/esp4_offload.c     |  4 +--
 net/ipv4/fou.c              | 20 ++++++-------
 net/ipv4/gre_offload.c      |  8 +++---
 net/ipv4/tcp_offload.c      | 14 +++++-----
 net/ipv4/udp_offload.c      | 13 +++++----
 net/ipv6/esp6_offload.c     |  4 +--
 net/ipv6/ip6_offload.c      | 16 +++++------
 net/ipv6/tcpv6_offload.c    |  4 +--
 net/ipv6/udp_offload.c      |  4 +--
 24 files changed, 133 insertions(+), 141 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 750eaa53bf0c..3e94375b9b01 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -418,11 +418,12 @@ static int geneve_hlen(struct genevehdr *gh)
 	return sizeof(*gh) + gh->opt_len * 4;
 }
 
-static struct sk_buff **geneve_gro_receive(struct sock *sk,
-					   struct sk_buff **head,
-					   struct sk_buff *skb)
+static struct sk_buff *geneve_gro_receive(struct sock *sk,
+					  struct list_head *head,
+					  struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	struct genevehdr *gh, *gh2;
 	unsigned int hlen, gh_len, off_gnv;
 	const struct packet_offload *ptype;
@@ -449,7 +450,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
 			goto out;
 	}
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index aee0e60471f1..cc14e0cd5647 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -568,11 +568,12 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 	return vh;
 }
 
-static struct sk_buff **vxlan_gro_receive(struct sock *sk,
-					  struct sk_buff **head,
-					  struct sk_buff *skb)
+static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+					 struct list_head *head,
+					 struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	struct vxlanhdr *vh, *vh2;
 	unsigned int hlen, off_vx;
 	int flush = 1;
@@ -607,7 +608,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
 
 	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 79563840c295..572e11bb8696 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -59,8 +59,7 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
 					   unsigned int rxqs);
 #define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1)
 
-struct sk_buff **eth_gro_receive(struct sk_buff **head,
-				 struct sk_buff *skb);
+struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb);
 int eth_gro_complete(struct sk_buff *skb, int nhoff);
 
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec9850c7936..f176d9873910 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -322,7 +322,7 @@ struct napi_struct {
 	int			poll_owner;
 #endif
 	struct net_device	*dev;
-	struct sk_buff		*gro_list;
+	struct list_head	gro_list;
 	struct sk_buff		*skb;
 	struct hrtimer		timer;
 	struct list_head	dev_list;
@@ -2255,10 +2255,10 @@ static inline int gro_recursion_inc_test(struct sk_buff *skb)
 	return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
 }
 
-typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *);
-static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
-						struct sk_buff **head,
-						struct sk_buff *skb)
+typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
+static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
+					       struct list_head *head,
+					       struct sk_buff *skb)
 {
 	if (unlikely(gro_recursion_inc_test(skb))) {
 		NAPI_GRO_CB(skb)->flush |= 1;
@@ -2268,12 +2268,12 @@ static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
 	return cb(head, skb);
 }
 
-typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **,
-					     struct sk_buff *);
-static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb,
-						   struct sock *sk,
-						   struct sk_buff **head,
-						   struct sk_buff *skb)
+typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
+					    struct sk_buff *);
+static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
+						  struct sock *sk,
+						  struct list_head *head,
+						  struct sk_buff *skb)
 {
 	if (unlikely(gro_recursion_inc_test(skb))) {
 		NAPI_GRO_CB(skb)->flush |= 1;
@@ -2299,8 +2299,8 @@ struct packet_type {
 struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
-	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
-						 struct sk_buff *skb);
+	struct sk_buff		*(*gro_receive)(struct list_head *head,
+						struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
 };
 
@@ -2568,7 +2568,7 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
-int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
 
 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
 {
@@ -2784,13 +2784,13 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
 }
 
 #ifdef CONFIG_XFRM_OFFLOAD
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
 {
 	if (PTR_ERR(pp) != -EINPROGRESS)
 		NAPI_GRO_CB(skb)->flush |= flush;
 }
 #else
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
 {
 	NAPI_GRO_CB(skb)->flush |= flush;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c86885954994..7ccc601b55d9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -677,7 +677,8 @@ struct sk_buff {
 				int			ip_defrag_offset;
 			};
 		};
-		struct rb_node	rbnode; /* used in netem & tcp stack */
+		struct rb_node		rbnode; /* used in netem & tcp stack */
+		struct list_head	list;
 	};
 	struct sock		*sk;
 
diff --git a/include/linux/udp.h b/include/linux/udp.h
index ca840345571b..320d49d85484 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -74,8 +74,8 @@ struct udp_sock {
 	void (*encap_destroy)(struct sock *sk);
 
 	/* GRO functions for UDP socket */
-	struct sk_buff **	(*gro_receive)(struct sock *sk,
-					       struct sk_buff **head,
+	struct sk_buff *	(*gro_receive)(struct sock *sk,
+					       struct list_head *head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sock *sk,
 						struct sk_buff *skb,
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 384b90c62c0b..3ca969cbd161 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -43,7 +43,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,
 		    int *addr_len);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb);
 int inet_gro_complete(struct sk_buff *skb, int nhoff);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 822ee49ed0f9..402a88b0e8a8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1788,7 +1788,7 @@ void tcp_v4_destroy_sock(struct sock *sk);
 
 struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 				netdev_features_t features);
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
 int tcp_gro_complete(struct sk_buff *skb);
 
 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
diff --git a/include/net/udp.h b/include/net/udp.h
index b1ea8b0f5e6a..5723c6128ae4 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -170,8 +170,8 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
 typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
 				     __be16 dport);
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-				 struct udphdr *uh, udp_lookup_t lookup);
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				struct udphdr *uh, udp_lookup_t lookup);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index b95a6927c718..fe680ab6b15a 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -65,9 +65,9 @@ static inline int udp_sock_create(struct net *net,
 
 typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
 typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
-typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk,
-						     struct sk_buff **head,
-						     struct sk_buff *skb);
+typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
+						    struct list_head *head,
+						    struct sk_buff *skb);
 typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb,
 					 int nhoff);
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..99141986efa0 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -647,13 +647,14 @@ out:
 	return err;
 }
 
-static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *vlan_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
-	struct vlan_hdr *vhdr;
-	unsigned int hlen, off_vlan;
 	const struct packet_offload *ptype;
+	unsigned int hlen, off_vlan;
+	struct sk_buff *pp = NULL;
+	struct vlan_hdr *vhdr;
+	struct sk_buff *p;
 	__be16 type;
 	int flush = 1;
 
@@ -675,7 +676,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		struct vlan_hdr *vhdr2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/core/dev.c b/net/core/dev.c
index a5aa1c7444e6..aa61b9344b46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4881,36 +4881,25 @@ out:
  */
 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
-	struct sk_buff *skb, *prev = NULL;
-
-	/* scan list and build reverse chain */
-	for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
-		skb->prev = prev;
-		prev = skb;
-	}
-
-	for (skb = prev; skb; skb = prev) {
-		skb->next = NULL;
+	struct sk_buff *skb, *p;
 
+	list_for_each_entry_safe_reverse(skb, p, &napi->gro_list, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
-
-		prev = skb->prev;
+		list_del_init(&skb->list);
 		napi_gro_complete(skb);
 		napi->gro_count--;
 	}
-
-	napi->gro_list = NULL;
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 {
-	struct sk_buff *p;
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
+	struct sk_buff *p;
 
-	for (p = napi->gro_list; p; p = p->next) {
+	list_for_each_entry(p, &napi->gro_list, list) {
 		unsigned long diffs;
 
 		NAPI_GRO_CB(p)->flush = 0;
@@ -4977,12 +4966,12 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &offload_base;
-	int same_flow;
+	struct sk_buff *pp = NULL;
 	enum gro_result ret;
+	int same_flow;
 	int grow;
 
 	if (netif_elide_gro(skb->dev))
@@ -5039,11 +5028,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
 	if (pp) {
-		struct sk_buff *nskb = *pp;
-
-		*pp = nskb->next;
-		nskb->next = NULL;
-		napi_gro_complete(nskb);
+		list_del_init(&pp->list);
+		napi_gro_complete(pp);
 		napi->gro_count--;
 	}
 
@@ -5054,15 +5040,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		goto normal;
 
 	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-		struct sk_buff *nskb = napi->gro_list;
+		struct sk_buff *nskb;
 
-		/* locate the end of the list to select the 'oldest' flow */
-		while (nskb->next) {
-			pp = &nskb->next;
-			nskb = *pp;
-		}
-		*pp = NULL;
-		nskb->next = NULL;
+		nskb = list_last_entry(&napi->gro_list, struct sk_buff, list);
+		list_del(&nskb->list);
 		napi_gro_complete(nskb);
 	} else {
 		napi->gro_count++;
@@ -5071,8 +5052,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-	skb->next = napi->gro_list;
-	napi->gro_list = skb;
+	list_add(&skb->list, &napi->gro_list);
 	ret = GRO_HELD;
 
 pull:
@@ -5478,7 +5458,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_list) {
+	if (!list_empty(&n->gro_list)) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -5687,7 +5667,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_list && !napi_disable_pending(napi) &&
+	if (!list_empty(&napi->gro_list) && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -5701,7 +5681,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
 	napi->gro_count = 0;
-	napi->gro_list = NULL;
+	INIT_LIST_HEAD(&napi->gro_list);
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
@@ -5734,6 +5714,14 @@ void napi_disable(struct napi_struct *n)
 }
 EXPORT_SYMBOL(napi_disable);
 
+static void gro_list_free(struct list_head *head)
+{
+	struct sk_buff *skb, *p;
+
+	list_for_each_entry_safe(skb, p, head, list)
+		kfree_skb(skb);
+}
+
 /* Must be called in process context */
 void netif_napi_del(struct napi_struct *napi)
 {
@@ -5743,8 +5731,8 @@ void netif_napi_del(struct napi_struct *napi)
 	list_del_init(&napi->dev_list);
 	napi_free_frags(napi);
 
-	kfree_skb_list(napi->gro_list);
-	napi->gro_list = NULL;
+	gro_list_free(&napi->gro_list);
+	INIT_LIST_HEAD(&napi->gro_list);
 	napi->gro_count = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
@@ -5787,7 +5775,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (n->gro_list) {
+	if (!list_empty(&n->gro_list)) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c642304f178c..b1f274f22d85 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3815,14 +3815,14 @@ err:
 }
 EXPORT_SYMBOL_GPL(skb_segment);
 
-int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 {
 	struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
 	unsigned int offset = skb_gro_offset(skb);
 	unsigned int headlen = skb_headlen(skb);
 	unsigned int len = skb_gro_len(skb);
-	struct sk_buff *lp, *p = *head;
 	unsigned int delta_truesize;
+	struct sk_buff *lp;
 
 	if (unlikely(p->len + len >= 65536))
 		return -E2BIG;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ee28440f57c5..fd8faa0dfa61 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -427,13 +427,13 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 }
 EXPORT_SYMBOL(sysfs_format_mac);
 
-struct sk_buff **eth_gro_receive(struct sk_buff **head,
-				 struct sk_buff *skb)
+struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
-	struct ethhdr *eh, *eh2;
-	unsigned int hlen, off_eth;
 	const struct packet_offload *ptype;
+	unsigned int hlen, off_eth;
+	struct sk_buff *pp = NULL;
+	struct ethhdr *eh, *eh2;
+	struct sk_buff *p;
 	__be16 type;
 	int flush = 1;
 
@@ -448,7 +448,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head,
 
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 15e125558c76..06b218a2870f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1384,12 +1384,12 @@ out:
 }
 EXPORT_SYMBOL(inet_gso_segment);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
-	struct sk_buff *p;
+	struct sk_buff *pp = NULL;
 	const struct iphdr *iph;
+	struct sk_buff *p;
 	unsigned int hlen;
 	unsigned int off;
 	unsigned int id;
@@ -1425,7 +1425,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
 	id >>= 16;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		struct iphdr *iph2;
 		u16 flush_id;
 
@@ -1505,8 +1505,8 @@ out:
 }
 EXPORT_SYMBOL(inet_gro_receive);
 
-static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *ipip_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	if (NAPI_GRO_CB(skb)->encap_mark) {
 		NAPI_GRO_CB(skb)->flush = 1;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 7cf755ef9efb..bbeecd13e534 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -28,8 +28,8 @@
 #include <linux/spinlock.h>
 #include <net/udp.h>
 
-static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *esp4_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..efdc9e1f741e 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -224,14 +224,14 @@ drop:
 	return 0;
 }
 
-static struct sk_buff **fou_gro_receive(struct sock *sk,
-					struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *fou_gro_receive(struct sock *sk,
+				       struct list_head *head,
+				       struct sk_buff *skb)
 {
-	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
 	u8 proto = fou_from_sock(sk)->protocol;
 	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
 
 	/* We can clear the encap_mark for FOU as we are essentially doing
 	 * one of two possible things.  We are either adding an L4 tunnel
@@ -305,13 +305,13 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	return guehdr;
 }
 
-static struct sk_buff **gue_gro_receive(struct sock *sk,
-					struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *gue_gro_receive(struct sock *sk,
+				       struct list_head *head,
+				       struct sk_buff *skb)
 {
 	const struct net_offload **offloads;
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct guehdr *guehdr;
 	size_t len, optlen, hdrlen, off;
@@ -397,7 +397,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 
 	skb_gro_pull(skb, hdrlen);
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct guehdr *guehdr2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..b9673c21be45 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -108,10 +108,10 @@ out:
 	return segs;
 }
 
-static struct sk_buff **gre_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *gre_gro_receive(struct list_head *head,
+				       struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	const struct gre_base_hdr *greh;
 	unsigned int hlen, grehlen;
@@ -182,7 +182,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 					     null_compute_pseudo);
 	}
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct gre_base_hdr *greh2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8cc7c3487330..f5aee641f825 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -180,9 +180,9 @@ out:
 	return segs;
 }
 
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct tcphdr *th;
 	struct tcphdr *th2;
@@ -220,7 +220,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	len = skb_gro_len(skb);
 	flags = tcp_flag_word(th);
 
-	for (; (p = *head); head = &p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
@@ -233,7 +233,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 		goto found;
 	}
-
+	p = NULL;
 	goto out_check_final;
 
 found:
@@ -263,7 +263,7 @@ found:
 	flush |= (len - 1) >= mss;
 	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
 
-	if (flush || skb_gro_receive(head, skb)) {
+	if (flush || skb_gro_receive(p, skb)) {
 		mss = 1;
 		goto out_check_final;
 	}
@@ -277,7 +277,7 @@ out_check_final:
 					TCP_FLAG_FIN));
 
 	if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
-		pp = head;
+		pp = p;
 
 out:
 	NAPI_GRO_CB(skb)->flush |= (flush != 0);
@@ -302,7 +302,7 @@ int tcp_gro_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
-static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..ac46c1c55c99 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -343,10 +343,11 @@ out:
 	return segs;
 }
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-				 struct udphdr *uh, udp_lookup_t lookup)
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				struct udphdr *uh, udp_lookup_t lookup)
 {
-	struct sk_buff *p, **pp = NULL;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	struct udphdr *uh2;
 	unsigned int off = skb_gro_offset(skb);
 	int flush = 1;
@@ -371,7 +372,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 unflush:
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
@@ -399,8 +400,8 @@ out:
 }
 EXPORT_SYMBOL(udp_gro_receive);
 
-static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *udp4_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 27f59b61f70f..ddfa533a84e5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -49,8 +49,8 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	return 0;
 }
 
-static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *esp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 5b3f2f89ef41..37ff4805b20c 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -163,11 +163,11 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
 	return len;
 }
 
-static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *ipv6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct ipv6hdr *iph;
 	unsigned int nlen;
@@ -214,7 +214,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	flush--;
 	nlen = skb_network_header_len(skb);
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct ipv6hdr *iph2;
 		__be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
 
@@ -263,8 +263,8 @@ out:
 	return pp;
 }
 
-static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
-					       struct sk_buff *skb)
+static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
+					      struct sk_buff *skb)
 {
 	/* Common GRO receive for SIT and IP6IP6 */
 
@@ -278,8 +278,8 @@ static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
 	return ipv6_gro_receive(head, skb);
 }
 
-static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head,
-					   struct sk_buff *skb)
+static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
+					  struct sk_buff *skb)
 {
 	/* Common GRO receive for SIT and IP6IP6 */
 
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 278e49cd67d4..e72947c99454 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -15,8 +15,8 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *tcp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 03a2ff3fe1e6..95dee9ca8d22 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -114,8 +114,8 @@ out:
 	return segs;
 }
 
-static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *udp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-- 
cgit v1.2.3


From 07d78363dcffd9cb1bf6f06a6cac0e0847f3c1de Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 24 Jun 2018 14:14:02 +0900
Subject: net: Convert NAPI gro list into a small hash table.

Improve the performance of GRO receive by splitting flows into
multiple hash chains.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   3 +-
 net/core/dev.c            | 105 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 81 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f176d9873910..c6b377a15869 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -305,6 +305,7 @@ int __init netdev_boot_setup(char *str);
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
+#define GRO_HASH_BUCKETS	8
 struct napi_struct {
 	/* The poll_list must only be managed by the entity which
 	 * changes the state of the NAPI_STATE_SCHED bit.  This means
@@ -322,7 +323,7 @@ struct napi_struct {
 	int			poll_owner;
 #endif
 	struct net_device	*dev;
-	struct list_head	gro_list;
+	struct list_head	gro_hash[GRO_HASH_BUCKETS];
 	struct sk_buff		*skb;
 	struct hrtimer		timer;
 	struct list_head	dev_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index aa61b9344b46..dffed642e686 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4875,15 +4875,12 @@ out:
 	return netif_receive_skb_internal(skb);
 }
 
-/* napi->gro_list contains packets ordered by age.
- * youngest packets at the head of it.
- * Complete skbs in reverse order to reduce latencies.
- */
-void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head,
+				   bool flush_old)
 {
 	struct sk_buff *skb, *p;
 
-	list_for_each_entry_safe_reverse(skb, p, &napi->gro_list, list) {
+	list_for_each_entry_safe_reverse(skb, p, head, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
 		list_del_init(&skb->list);
@@ -4891,15 +4888,33 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 		napi->gro_count--;
 	}
 }
+
+/* napi->gro_hash contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+{
+	int i;
+
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		struct list_head *head = &napi->gro_hash[i];
+
+		__napi_gro_flush_chain(napi, head, flush_old);
+	}
+}
 EXPORT_SYMBOL(napi_gro_flush);
 
-static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
+static struct list_head *gro_list_prepare(struct napi_struct *napi,
+					  struct sk_buff *skb)
 {
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
+	struct list_head *head;
 	struct sk_buff *p;
 
-	list_for_each_entry(p, &napi->gro_list, list) {
+	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)];
+	list_for_each_entry(p, head, list) {
 		unsigned long diffs;
 
 		NAPI_GRO_CB(p)->flush = 0;
@@ -4922,6 +4937,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 	}
+
+	return head;
 }
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
@@ -4964,11 +4981,45 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 	}
 }
 
+static void gro_flush_oldest(struct napi_struct *napi)
+{
+	struct sk_buff *oldest = NULL;
+	unsigned long age = jiffies;
+	int i;
+
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		struct list_head *head = &napi->gro_hash[i];
+		struct sk_buff *skb;
+
+		if (list_empty(head))
+			continue;
+
+		skb = list_last_entry(head, struct sk_buff, list);
+		if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) {
+			oldest = skb;
+			age = NAPI_GRO_CB(skb)->age;
+		}
+	}
+
+	/* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is
+	 * impossible.
+	 */
+	if (WARN_ON_ONCE(!oldest))
+		return;
+
+	/* Do not adjust napi->gro_count, caller is adding a new SKB to
+	 * the chain.
+	 */
+	list_del(&oldest->list);
+	napi_gro_complete(oldest);
+}
+
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
+	struct list_head *gro_head;
 	struct sk_buff *pp = NULL;
 	enum gro_result ret;
 	int same_flow;
@@ -4977,7 +5028,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	gro_list_prepare(napi, skb);
+	gro_head = gro_list_prepare(napi, skb);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
@@ -5011,7 +5062,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 			NAPI_GRO_CB(skb)->csum_valid = 0;
 		}
 
-		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
+		pp = ptype->callbacks.gro_receive(gro_head, skb);
 		break;
 	}
 	rcu_read_unlock();
@@ -5040,11 +5091,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		goto normal;
 
 	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-		struct sk_buff *nskb;
-
-		nskb = list_last_entry(&napi->gro_list, struct sk_buff, list);
-		list_del(&nskb->list);
-		napi_gro_complete(nskb);
+		gro_flush_oldest(napi);
 	} else {
 		napi->gro_count++;
 	}
@@ -5052,7 +5099,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-	list_add(&skb->list, &napi->gro_list);
+	list_add(&skb->list, gro_head);
 	ret = GRO_HELD;
 
 pull:
@@ -5458,7 +5505,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (!list_empty(&n->gro_list)) {
+	if (n->gro_count) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -5667,7 +5714,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (!list_empty(&napi->gro_list) && !napi_disable_pending(napi) &&
+	if (napi->gro_count && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -5677,11 +5724,14 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
+	int i;
+
 	INIT_LIST_HEAD(&napi->poll_list);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
 	napi->gro_count = 0;
-	INIT_LIST_HEAD(&napi->gro_list);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++)
+		INIT_LIST_HEAD(&napi->gro_hash[i]);
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
@@ -5714,12 +5764,16 @@ void napi_disable(struct napi_struct *n)
 }
 EXPORT_SYMBOL(napi_disable);
 
-static void gro_list_free(struct list_head *head)
+static void flush_gro_hash(struct napi_struct *napi)
 {
-	struct sk_buff *skb, *p;
+	int i;
 
-	list_for_each_entry_safe(skb, p, head, list)
-		kfree_skb(skb);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		struct sk_buff *skb, *n;
+
+		list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list)
+			kfree_skb(skb);
+	}
 }
 
 /* Must be called in process context */
@@ -5731,8 +5785,7 @@ void netif_napi_del(struct napi_struct *napi)
 	list_del_init(&napi->dev_list);
 	napi_free_frags(napi);
 
-	gro_list_free(&napi->gro_list);
-	INIT_LIST_HEAD(&napi->gro_list);
+	flush_gro_hash(napi);
 	napi->gro_count = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
@@ -5775,7 +5828,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (!list_empty(&n->gro_list)) {
+	if (n->gro_count) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
-- 
cgit v1.2.3


From fdb5c4531c1e0e50e609df83f736b6f3a02896e2 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 19 Jun 2018 00:04:24 +0100
Subject: bpf: fix attach type BPF_LIRC_MODE2 dependency wrt CONFIG_CGROUP_BPF

If the kernel is compiled with CONFIG_CGROUP_BPF not enabled, it is not
possible to attach, detach or query IR BPF programs to /dev/lircN devices,
making them impossible to use. For embedded devices, it should be possible
to use IR decoding without cgroups or CONFIG_CGROUP_BPF enabled.

This change requires some refactoring, since bpf_prog_{attach,detach,query}
functions are now always compiled, but their code paths for cgroups need
moving out. Rather than a #ifdef CONFIG_CGROUP_BPF in kernel/bpf/syscall.c,
moving them to kernel/bpf/cgroup.c and kernel/bpf/sockmap.c does not
require #ifdefs since that is already conditionally compiled.

Fixes: f4364dcfc86d ("media: rc: introduce BPF_PROG_LIRC_MODE2")
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/media/rc/bpf-lirc.c | 14 +------
 include/linux/bpf-cgroup.h  | 26 ++++++++++++
 include/linux/bpf.h         |  8 ++++
 include/linux/bpf_lirc.h    |  5 ++-
 kernel/bpf/cgroup.c         | 54 +++++++++++++++++++++++++
 kernel/bpf/sockmap.c        | 18 +++++++++
 kernel/bpf/syscall.c        | 99 ++++++++++-----------------------------------
 7 files changed, 132 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 40826bba06b6..fcfab6635f9c 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -207,29 +207,19 @@ void lirc_bpf_free(struct rc_dev *rcdev)
 	bpf_prog_array_free(rcdev->raw->progs);
 }
 
-int lirc_prog_attach(const union bpf_attr *attr)
+int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
-	struct bpf_prog *prog;
 	struct rc_dev *rcdev;
 	int ret;
 
 	if (attr->attach_flags)
 		return -EINVAL;
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd,
-				 BPF_PROG_TYPE_LIRC_MODE2);
-	if (IS_ERR(prog))
-		return PTR_ERR(prog);
-
 	rcdev = rc_dev_get_from_fd(attr->target_fd);
-	if (IS_ERR(rcdev)) {
-		bpf_prog_put(prog);
+	if (IS_ERR(rcdev))
 		return PTR_ERR(rcdev);
-	}
 
 	ret = lirc_bpf_attach(rcdev, prog);
-	if (ret)
-		bpf_prog_put(prog);
 
 	put_device(&rcdev->dev);
 
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 975fb4cf1bb7..79795c5fa7c3 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -188,12 +188,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 									      \
 	__ret;								      \
 })
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype, struct bpf_prog *prog);
+int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype);
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr);
 #else
 
+struct bpf_prog;
 struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 
+static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype,
+					 struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
+					union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7df32a3200f7..8827e797ff97 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -696,6 +696,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
 struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+			struct bpf_prog *prog);
 #else
 static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -714,6 +716,12 @@ static inline int sock_map_prog(struct bpf_map *map,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+				      struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
 #endif
 
 #if defined(CONFIG_XDP_SOCKETS)
diff --git a/include/linux/bpf_lirc.h b/include/linux/bpf_lirc.h
index 5f8a4283092d..9d9ff755ec29 100644
--- a/include/linux/bpf_lirc.h
+++ b/include/linux/bpf_lirc.h
@@ -5,11 +5,12 @@
 #include <uapi/linux/bpf.h>
 
 #ifdef CONFIG_BPF_LIRC_MODE2
-int lirc_prog_attach(const union bpf_attr *attr);
+int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int lirc_prog_detach(const union bpf_attr *attr);
 int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
 #else
-static inline int lirc_prog_attach(const union bpf_attr *attr)
+static inline int lirc_prog_attach(const union bpf_attr *attr,
+				   struct bpf_prog *prog)
 {
 	return -EINVAL;
 }
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f7c00bd6f8e4..3d83ee7df381 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	return ret;
 }
 
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype, struct bpf_prog *prog)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+				attr->attach_flags);
+	cgroup_put(cgrp);
+	return ret;
+}
+
+int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	if (IS_ERR(prog))
+		prog = NULL;
+
+	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+	if (prog)
+		bpf_prog_put(prog);
+
+	cgroup_put(cgrp);
+	return ret;
+}
+
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->query.target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	ret = cgroup_bpf_query(cgrp, attr, uattr);
+
+	cgroup_put(cgrp);
+	return ret;
+}
+
 /**
  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
  * @sk: The socket sending or receiving traffic
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d816c0e..81d0c55a77aa 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1915,6 +1915,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 	return 0;
 }
 
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+			struct bpf_prog *prog)
+{
+	int ufd = attr->target_fd;
+	struct bpf_map *map;
+	struct fd f;
+	int err;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = sock_map_prog(map, prog, attr->attach_type);
+	fdput(f);
+	return err;
+}
+
 static void *sock_map_lookup(struct bpf_map *map, void *key)
 {
 	return NULL;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35dc466641f2..d10ecd78105f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1483,8 +1483,6 @@ out_free_tp:
 	return err;
 }
 
-#ifdef CONFIG_CGROUP_BPF
-
 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 					     enum bpf_attach_type attach_type)
 {
@@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr,
-			       int type, bool attach)
-{
-	struct bpf_prog *prog = NULL;
-	int ufd = attr->target_fd;
-	struct bpf_map *map;
-	struct fd f;
-	int err;
-
-	f = fdget(ufd);
-	map = __bpf_map_get(f);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-
-	if (attach) {
-		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
-		if (IS_ERR(prog)) {
-			fdput(f);
-			return PTR_ERR(prog);
-		}
-	}
-
-	err = sock_map_prog(map, prog, attr->attach_type);
-	if (err) {
-		fdput(f);
-		if (prog)
-			bpf_prog_put(prog);
-		return err;
-	}
-
-	fdput(f);
-	return 0;
-}
-
 #define BPF_F_ATTACH_MASK \
 	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
 
@@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
 	struct bpf_prog *prog;
-	struct cgroup *cgrp;
 	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
@@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
+		ptype = BPF_PROG_TYPE_SK_MSG;
+		break;
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
+		ptype = BPF_PROG_TYPE_SK_SKB;
+		break;
 	case BPF_LIRC_MODE2:
-		return lirc_prog_attach(attr);
+		ptype = BPF_PROG_TYPE_LIRC_MODE2;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp)) {
-		bpf_prog_put(prog);
-		return PTR_ERR(cgrp);
+	switch (ptype) {
+	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_SK_MSG:
+		ret = sockmap_get_from_fd(attr, ptype, prog);
+		break;
+	case BPF_PROG_TYPE_LIRC_MODE2:
+		ret = lirc_prog_attach(attr, prog);
+		break;
+	default:
+		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 	}
 
-	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
-				attr->attach_flags);
 	if (ret)
 		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-
 	return ret;
 }
 
@@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 static int bpf_prog_detach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
-	struct bpf_prog *prog;
-	struct cgroup *cgrp;
-	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_detach(attr);
 	default:
 		return -EINVAL;
 	}
 
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
-	if (IS_ERR(prog))
-		prog = NULL;
-
-	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
-	if (prog)
-		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-	return ret;
+	return cgroup_bpf_prog_detach(attr, ptype);
 }
 
 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
@@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 static int bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr)
 {
-	struct cgroup *cgrp;
-	int ret;
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_QUERY))
@@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	default:
 		return -EINVAL;
 	}
-	cgrp = cgroup_get_from_fd(attr->query.target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-	ret = cgroup_bpf_query(cgrp, attr, uattr);
-	cgroup_put(cgrp);
-	return ret;
+
+	return cgroup_bpf_prog_query(attr, uattr);
 }
-#endif /* CONFIG_CGROUP_BPF */
 
 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
 
@@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_OBJ_GET:
 		err = bpf_obj_get(&attr);
 		break;
-#ifdef CONFIG_CGROUP_BPF
 	case BPF_PROG_ATTACH:
 		err = bpf_prog_attach(&attr);
 		break;
@@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_PROG_QUERY:
 		err = bpf_prog_query(&attr, uattr);
 		break;
-#endif
 	case BPF_PROG_TEST_RUN:
 		err = bpf_prog_test_run(&attr, uattr);
 		break;
-- 
cgit v1.2.3


From a8f62d0c6fe533e07cd1acce7588278f9d6e7720 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 25 Jun 2018 20:37:08 +0800
Subject: ptp: support DPAA FMan 1588 timer in ptp_qoriq

This patch is to support DPAA (Data Path Acceleration Architecture)
1588 timer by adding "fsl,fman-ptp-timer" compatible, sharing
interrupt with FMan, adding FSL_DPAA_ETH dependency, and fixing
up register offset.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/Kconfig           |   2 +-
 drivers/ptp/ptp_qoriq.c       | 104 +++++++++++++++++++++++++++---------------
 include/linux/fsl/ptp_qoriq.h |  38 ++++++++++++---
 3 files changed, 98 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 474c988d2e95..d137c480db46 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -43,7 +43,7 @@ config PTP_1588_CLOCK_DTE
 
 config PTP_1588_CLOCK_QORIQ
 	tristate "Freescale QorIQ 1588 timer as PTP clock"
-	depends on GIANFAR
+	depends on GIANFAR || FSL_DPAA_ETH
 	depends on PTP_1588_CLOCK
 	default y
 	help
diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index e8652c148c52..a14c317b5a38 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -39,11 +39,12 @@
 /* Caller must hold qoriq_ptp->lock. */
 static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	u64 ns;
 	u32 lo, hi;
 
-	lo = qoriq_read(&qoriq_ptp->regs->tmr_cnt_l);
-	hi = qoriq_read(&qoriq_ptp->regs->tmr_cnt_h);
+	lo = qoriq_read(&regs->ctrl_regs->tmr_cnt_l);
+	hi = qoriq_read(&regs->ctrl_regs->tmr_cnt_h);
 	ns = ((u64) hi) << 32;
 	ns |= lo;
 	return ns;
@@ -52,16 +53,18 @@ static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 /* Caller must hold qoriq_ptp->lock. */
 static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	u32 hi = ns >> 32;
 	u32 lo = ns & 0xffffffff;
 
-	qoriq_write(&qoriq_ptp->regs->tmr_cnt_l, lo);
-	qoriq_write(&qoriq_ptp->regs->tmr_cnt_h, hi);
+	qoriq_write(&regs->ctrl_regs->tmr_cnt_l, lo);
+	qoriq_write(&regs->ctrl_regs->tmr_cnt_h, hi);
 }
 
 /* Caller must hold qoriq_ptp->lock. */
 static void set_alarm(struct qoriq_ptp *qoriq_ptp)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	u64 ns;
 	u32 lo, hi;
 
@@ -70,16 +73,18 @@ static void set_alarm(struct qoriq_ptp *qoriq_ptp)
 	ns -= qoriq_ptp->tclk_period;
 	hi = ns >> 32;
 	lo = ns & 0xffffffff;
-	qoriq_write(&qoriq_ptp->regs->tmr_alarm1_l, lo);
-	qoriq_write(&qoriq_ptp->regs->tmr_alarm1_h, hi);
+	qoriq_write(&regs->alarm_regs->tmr_alarm1_l, lo);
+	qoriq_write(&regs->alarm_regs->tmr_alarm1_h, hi);
 }
 
 /* Caller must hold qoriq_ptp->lock. */
 static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+
 	set_alarm(qoriq_ptp);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+	qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+	qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
 }
 
 /*
@@ -89,16 +94,17 @@ static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 static irqreturn_t isr(int irq, void *priv)
 {
 	struct qoriq_ptp *qoriq_ptp = priv;
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	struct ptp_clock_event event;
 	u64 ns;
 	u32 ack = 0, lo, hi, mask, val;
 
-	val = qoriq_read(&qoriq_ptp->regs->tmr_tevent);
+	val = qoriq_read(&regs->ctrl_regs->tmr_tevent);
 
 	if (val & ETS1) {
 		ack |= ETS1;
-		hi = qoriq_read(&qoriq_ptp->regs->tmr_etts1_h);
-		lo = qoriq_read(&qoriq_ptp->regs->tmr_etts1_l);
+		hi = qoriq_read(&regs->etts_regs->tmr_etts1_h);
+		lo = qoriq_read(&regs->etts_regs->tmr_etts1_l);
 		event.type = PTP_CLOCK_EXTTS;
 		event.index = 0;
 		event.timestamp = ((u64) hi) << 32;
@@ -108,8 +114,8 @@ static irqreturn_t isr(int irq, void *priv)
 
 	if (val & ETS2) {
 		ack |= ETS2;
-		hi = qoriq_read(&qoriq_ptp->regs->tmr_etts2_h);
-		lo = qoriq_read(&qoriq_ptp->regs->tmr_etts2_l);
+		hi = qoriq_read(&regs->etts_regs->tmr_etts2_h);
+		lo = qoriq_read(&regs->etts_regs->tmr_etts2_l);
 		event.type = PTP_CLOCK_EXTTS;
 		event.index = 1;
 		event.timestamp = ((u64) hi) << 32;
@@ -130,16 +136,16 @@ static irqreturn_t isr(int irq, void *priv)
 			hi = ns >> 32;
 			lo = ns & 0xffffffff;
 			spin_lock(&qoriq_ptp->lock);
-			qoriq_write(&qoriq_ptp->regs->tmr_alarm2_l, lo);
-			qoriq_write(&qoriq_ptp->regs->tmr_alarm2_h, hi);
+			qoriq_write(&regs->alarm_regs->tmr_alarm2_l, lo);
+			qoriq_write(&regs->alarm_regs->tmr_alarm2_h, hi);
 			spin_unlock(&qoriq_ptp->lock);
 			qoriq_ptp->alarm_value = ns;
 		} else {
-			qoriq_write(&qoriq_ptp->regs->tmr_tevent, ALM2);
+			qoriq_write(&regs->ctrl_regs->tmr_tevent, ALM2);
 			spin_lock(&qoriq_ptp->lock);
-			mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+			mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 			mask &= ~ALM2EN;
-			qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+			qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
 			spin_unlock(&qoriq_ptp->lock);
 			qoriq_ptp->alarm_value = 0;
 			qoriq_ptp->alarm_interval = 0;
@@ -153,7 +159,7 @@ static irqreturn_t isr(int irq, void *priv)
 	}
 
 	if (ack) {
-		qoriq_write(&qoriq_ptp->regs->tmr_tevent, ack);
+		qoriq_write(&regs->ctrl_regs->tmr_tevent, ack);
 		return IRQ_HANDLED;
 	} else
 		return IRQ_NONE;
@@ -169,6 +175,7 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 	u32 tmr_add;
 	int neg_adj = 0;
 	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 
 	if (scaled_ppm < 0) {
 		neg_adj = 1;
@@ -186,7 +193,7 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 
 	tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
-	qoriq_write(&qoriq_ptp->regs->tmr_add, tmr_add);
+	qoriq_write(&regs->ctrl_regs->tmr_add, tmr_add);
 
 	return 0;
 }
@@ -250,6 +257,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 			      struct ptp_clock_request *rq, int on)
 {
 	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	unsigned long flags;
 	u32 bit, mask;
 
@@ -266,23 +274,23 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 			return -EINVAL;
 		}
 		spin_lock_irqsave(&qoriq_ptp->lock, flags);
-		mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+		mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 		if (on)
 			mask |= bit;
 		else
 			mask &= ~bit;
-		qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+		qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
 		spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 		return 0;
 
 	case PTP_CLK_REQ_PPS:
 		spin_lock_irqsave(&qoriq_ptp->lock, flags);
-		mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+		mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 		if (on)
 			mask |= PP1EN;
 		else
 			mask &= ~PP1EN;
-		qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+		qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
 		spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 		return 0;
 
@@ -313,10 +321,12 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 {
 	struct device_node *node = dev->dev.of_node;
 	struct qoriq_ptp *qoriq_ptp;
+	struct qoriq_ptp_registers *regs;
 	struct timespec64 now;
 	int err = -ENOMEM;
 	u32 tmr_ctrl;
 	unsigned long flags;
+	void __iomem *base;
 
 	qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL);
 	if (!qoriq_ptp)
@@ -351,7 +361,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
-	if (request_irq(qoriq_ptp->irq, isr, 0, DRIVER, qoriq_ptp)) {
+	if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) {
 		pr_err("request_irq failed\n");
 		goto no_node;
 	}
@@ -368,12 +378,27 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 
 	spin_lock_init(&qoriq_ptp->lock);
 
-	qoriq_ptp->regs = ioremap(qoriq_ptp->rsrc->start,
-				resource_size(qoriq_ptp->rsrc));
-	if (!qoriq_ptp->regs) {
+	base = ioremap(qoriq_ptp->rsrc->start,
+		       resource_size(qoriq_ptp->rsrc));
+	if (!base) {
 		pr_err("ioremap ptp registers failed\n");
 		goto no_ioremap;
 	}
+
+	qoriq_ptp->base = base;
+
+	if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
+		qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
+		qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
+		qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET;
+		qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET;
+	} else {
+		qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
+		qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET;
+		qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET;
+		qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET;
+	}
+
 	ktime_get_real_ts64(&now);
 	ptp_qoriq_settime(&qoriq_ptp->caps, &now);
 
@@ -383,13 +408,14 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 
 	spin_lock_irqsave(&qoriq_ptp->lock, flags);
 
-	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl);
-	qoriq_write(&qoriq_ptp->regs->tmr_add,    qoriq_ptp->tmr_add);
-	qoriq_write(&qoriq_ptp->regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+	regs = &qoriq_ptp->regs;
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl);
+	qoriq_write(&regs->ctrl_regs->tmr_add,    qoriq_ptp->tmr_add);
+	qoriq_write(&regs->ctrl_regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
+	qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+	qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
 	set_alarm(qoriq_ptp);
-	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
 	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 
@@ -405,7 +431,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 	return 0;
 
 no_clock:
-	iounmap(qoriq_ptp->regs);
+	iounmap(qoriq_ptp->base);
 no_ioremap:
 	release_resource(qoriq_ptp->rsrc);
 no_resource:
@@ -419,12 +445,13 @@ no_memory:
 static int qoriq_ptp_remove(struct platform_device *dev)
 {
 	struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev);
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 
-	qoriq_write(&qoriq_ptp->regs->tmr_temask, 0);
-	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   0);
+	qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
 
 	ptp_clock_unregister(qoriq_ptp->clock);
-	iounmap(qoriq_ptp->regs);
+	iounmap(qoriq_ptp->base);
 	release_resource(qoriq_ptp->rsrc);
 	free_irq(qoriq_ptp->irq, qoriq_ptp);
 	kfree(qoriq_ptp);
@@ -434,6 +461,7 @@ static int qoriq_ptp_remove(struct platform_device *dev)
 
 static const struct of_device_id match_table[] = {
 	{ .compatible = "fsl,etsec-ptp" },
+	{ .compatible = "fsl,fman-ptp-timer" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, match_table);
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index b462d9ea8007..dc3dac40f069 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -11,9 +11,8 @@
 
 /*
  * qoriq ptp registers
- * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
  */
-struct qoriq_ptp_registers {
+struct ctrl_regs {
 	u32 tmr_ctrl;     /* Timer control register */
 	u32 tmr_tevent;   /* Timestamp event register */
 	u32 tmr_temask;   /* Timer event mask register */
@@ -28,22 +27,47 @@ struct qoriq_ptp_registers {
 	u8  res1[4];
 	u32 tmroff_h;     /* Timer offset high */
 	u32 tmroff_l;     /* Timer offset low */
-	u8  res2[8];
+};
+
+struct alarm_regs {
 	u32 tmr_alarm1_h; /* Timer alarm 1 high register */
 	u32 tmr_alarm1_l; /* Timer alarm 1 high register */
 	u32 tmr_alarm2_h; /* Timer alarm 2 high register */
 	u32 tmr_alarm2_l; /* Timer alarm 2 high register */
-	u8  res3[48];
+};
+
+struct fiper_regs {
 	u32 tmr_fiper1;   /* Timer fixed period interval */
 	u32 tmr_fiper2;   /* Timer fixed period interval */
 	u32 tmr_fiper3;   /* Timer fixed period interval */
-	u8  res4[20];
+};
+
+struct etts_regs {
 	u32 tmr_etts1_h;  /* Timestamp of general purpose external trigger */
 	u32 tmr_etts1_l;  /* Timestamp of general purpose external trigger */
 	u32 tmr_etts2_h;  /* Timestamp of general purpose external trigger */
 	u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
 };
 
+struct qoriq_ptp_registers {
+	struct ctrl_regs __iomem *ctrl_regs;
+	struct alarm_regs __iomem *alarm_regs;
+	struct fiper_regs __iomem *fiper_regs;
+	struct etts_regs __iomem *etts_regs;
+};
+
+/* Offset definitions for the four register groups */
+#define CTRL_REGS_OFFSET	0x0
+#define ALARM_REGS_OFFSET	0x40
+#define FIPER_REGS_OFFSET	0x80
+#define ETTS_REGS_OFFSET	0xa0
+
+#define FMAN_CTRL_REGS_OFFSET	0x80
+#define FMAN_ALARM_REGS_OFFSET	0xb8
+#define FMAN_FIPER_REGS_OFFSET	0xd0
+#define FMAN_ETTS_REGS_OFFSET	0xe0
+
+
 /* Bit definitions for the TMR_CTRL register */
 #define ALM1P                 (1<<31) /* Alarm1 output polarity */
 #define ALM2P                 (1<<30) /* Alarm2 output polarity */
@@ -105,10 +129,10 @@ struct qoriq_ptp_registers {
 #define DRIVER		"ptp_qoriq"
 #define DEFAULT_CKSEL	1
 #define N_EXT_TS	2
-#define REG_SIZE	sizeof(struct qoriq_ptp_registers)
 
 struct qoriq_ptp {
-	struct qoriq_ptp_registers __iomem *regs;
+	void __iomem *base;
+	struct qoriq_ptp_registers regs;
 	spinlock_t lock; /* protects regs */
 	struct ptp_clock *clock;
 	struct ptp_clock_info caps;
-- 
cgit v1.2.3


From 5560f70cad996e7d90d1c141bcbca0df214eefc9 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:08:58 +0800
Subject: hwspinlock: Add one new API to support getting a specific hwlock by
 the name

The hardware spinlock binding already supplied the 'hwlock-names' property
to match and get a specific hwlock, but did not supply one API for users
to get a specific hwlock by the hwlock name. So this patch introduces one
API to support this requirement.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/hwspinlock/hwspinlock_core.c | 29 +++++++++++++++++++++++++++++
 include/linux/hwspinlock.h           |  7 +++++++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
index d16e6a3d38e8..bea358604bb2 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -367,6 +367,35 @@ out:
 }
 EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id);
 
+/**
+ * of_hwspin_lock_get_id_byname() - get lock id for an specified hwlock name
+ * @np: device node from which to request the specific hwlock
+ * @name: hwlock name
+ *
+ * This function provides a means for DT users of the hwspinlock module to
+ * get the global lock id of a specific hwspinlock using the specified name of
+ * the hwspinlock device, so that it can be requested using the normal
+ * hwspin_lock_request_specific() API.
+ *
+ * Returns the global lock id number on success, -EPROBE_DEFER if the hwspinlock
+ * device is not yet registered, -EINVAL on invalid args specifier value or an
+ * appropriate error as returned from the OF parsing of the DT client node.
+ */
+int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name)
+{
+	int index;
+
+	if (!name)
+		return -EINVAL;
+
+	index = of_property_match_string(np, "hwlock-names", name);
+	if (index < 0)
+		return index;
+
+	return of_hwspin_lock_get_id(np, index);
+}
+EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id_byname);
+
 static int hwspin_lock_register_single(struct hwspinlock *hwlock, int id)
 {
 	struct hwspinlock *tmp;
diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index 57537e67b468..2b6f389a3133 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -66,6 +66,7 @@ int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int,
 							unsigned long *);
 int __hwspin_trylock(struct hwspinlock *, int, unsigned long *);
 void __hwspin_unlock(struct hwspinlock *, int, unsigned long *);
+int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name);
 
 #else /* !CONFIG_HWSPINLOCK */
 
@@ -125,6 +126,12 @@ static inline int hwspin_lock_get_id(struct hwspinlock *hwlock)
 	return 0;
 }
 
+static inline
+int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_HWSPINLOCK */
 
 /**
-- 
cgit v1.2.3


From 4f1acd758b08d93306d13010c74d38dd8d9f5768 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:08:59 +0800
Subject: hwspinlock: Add devm_xxx() APIs to request/free hwlock

This patch introduces some devm_xxx() APIs to help to request or free
the hwlocks, which will help to simplify the cleanup code for drivers
requesting one hwlock, ensuring that the hwlock is automatically freed
whenever the device is unbound.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/hwspinlock/hwspinlock_core.c | 110 +++++++++++++++++++++++++++++++++++
 include/linux/hwspinlock.h           |  22 +++++++
 2 files changed, 132 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
index bea358604bb2..d542b6fbf0f2 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -735,6 +735,116 @@ out:
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_free);
 
+static int devm_hwspin_lock_match(struct device *dev, void *res, void *data)
+{
+	struct hwspinlock **hwlock = res;
+
+	if (WARN_ON(!hwlock || !*hwlock))
+		return 0;
+
+	return *hwlock == data;
+}
+
+static void devm_hwspin_lock_release(struct device *dev, void *res)
+{
+	hwspin_lock_free(*(struct hwspinlock **)res);
+}
+
+/**
+ * devm_hwspin_lock_free() - free a specific hwspinlock for a managed device
+ * @dev: the device to free the specific hwspinlock
+ * @hwlock: the specific hwspinlock to free
+ *
+ * This function mark @hwlock as free again.
+ * Should only be called with an @hwlock that was retrieved from
+ * an earlier call to hwspin_lock_request{_specific}.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock)
+{
+	int ret;
+
+	ret = devres_release(dev, devm_hwspin_lock_release,
+			     devm_hwspin_lock_match, hwlock);
+	WARN_ON(ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_free);
+
+/**
+ * devm_hwspin_lock_request() - request an hwspinlock for a managed device
+ * @dev: the device to request an hwspinlock
+ *
+ * This function should be called by users of the hwspinlock device,
+ * in order to dynamically assign them an unused hwspinlock.
+ * Usually the user of this lock will then have to communicate the lock's id
+ * to the remote core before it can be used for synchronization (to get the
+ * id of a given hwlock, use hwspin_lock_get_id()).
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns the address of the assigned hwspinlock, or NULL on error
+ */
+struct hwspinlock *devm_hwspin_lock_request(struct device *dev)
+{
+	struct hwspinlock **ptr, *hwlock;
+
+	ptr = devres_alloc(devm_hwspin_lock_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	hwlock = hwspin_lock_request();
+	if (!IS_ERR(hwlock)) {
+		*ptr = hwlock;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return hwlock;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_request);
+
+/**
+ * devm_hwspin_lock_request_specific() - request for a specific hwspinlock for
+ *					 a managed device
+ * @dev: the device to request the specific hwspinlock
+ * @id: index of the specific hwspinlock that is requested
+ *
+ * This function should be called by users of the hwspinlock module,
+ * in order to assign them a specific hwspinlock.
+ * Usually early board code will be calling this function in order to
+ * reserve specific hwspinlock ids for predefined purposes.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns the address of the assigned hwspinlock, or NULL on error
+ */
+struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
+						     unsigned int id)
+{
+	struct hwspinlock **ptr, *hwlock;
+
+	ptr = devres_alloc(devm_hwspin_lock_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	hwlock = hwspin_lock_request_specific(id);
+	if (!IS_ERR(hwlock)) {
+		*ptr = hwlock;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return hwlock;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_request_specific);
+
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Hardware spinlock interface");
 MODULE_AUTHOR("Ohad Ben-Cohen <ohad@wizery.com>");
diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index 2b6f389a3133..dfd05938a3cb 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -67,6 +67,10 @@ int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int,
 int __hwspin_trylock(struct hwspinlock *, int, unsigned long *);
 void __hwspin_unlock(struct hwspinlock *, int, unsigned long *);
 int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name);
+int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock);
+struct hwspinlock *devm_hwspin_lock_request(struct device *dev);
+struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
+						     unsigned int id);
 
 #else /* !CONFIG_HWSPINLOCK */
 
@@ -132,6 +136,24 @@ int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name)
 	return 0;
 }
 
+static inline
+int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock)
+{
+	return 0;
+}
+
+static inline struct hwspinlock *devm_hwspin_lock_request(struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline
+struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
+						     unsigned int id)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 #endif /* !CONFIG_HWSPINLOCK */
 
 /**
-- 
cgit v1.2.3


From b03799b0cb35dbc39e89602b1203863e2a6e06bf Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 25 Jun 2018 16:49:06 -0500
Subject: PCI: shpchp: Separate existence of SHPC and permission to use it

The shpchp driver registers for all PCI bridge devices.  Its probe method
should fail if either (1) the bridge doesn't have an SHPC or (2) the OS
isn't allowed to use it (the platform firmware may be operating the SHPC
itself).

Separate these two tests into:

  - A new shpc_capable() that looks for the SHPC hardware and is applicable
    on all systems (ACPI and non-ACPI), and

  - A simplified acpi_get_hp_hw_control_from_firmware() that we call only
    when we already know an SHPC exists and there may be ACPI methods to
    either request permission to use it (_OSC) or transfer control to the
    OS (OSHP).

acpi_get_hp_hw_control_from_firmware() is implemented when CONFIG_ACPI=y,
but does nothing if the current platform doesn't support ACPI.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/pci/hotplug/acpi_pcihp.c  | 36 +++++++++++++++++++-----------------
 drivers/pci/hotplug/shpchp_core.c | 21 +++++++++++++++++++++
 drivers/pci/pci-acpi.c            | 19 +------------------
 include/linux/pci.h               |  1 +
 4 files changed, 42 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 5bd6c1573295..6b7c1ed58e7e 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -73,20 +73,6 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev)
 	acpi_handle chandle, handle;
 	struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
 
-	/*
-	 * Per PCI firmware specification, we should run the ACPI _OSC
-	 * method to get control of hotplug hardware before using it. If
-	 * an _OSC is missing, we look for an OSHP to do the same thing.
-	 * To handle different BIOS behavior, we look for _OSC on a root
-	 * bridge preferentially (according to PCI fw spec). Later for
-	 * OSHP within the scope of the hotplug controller and its parents,
-	 * up to the host bridge under which this controller exists.
-	 */
-	if (shpchp_is_native(pdev))
-		return 0;
-
-	/* If _OSC exists, we should not evaluate OSHP */
-
 	/*
 	 * If there's no ACPI host bridge (i.e., ACPI support is compiled
 	 * into the kernel but the hardware platform doesn't support ACPI),
@@ -97,9 +83,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev)
 	if (!root)
 		return 0;
 
-	if (root->osc_support_set)
-		goto no_control;
+	/*
+	 * If _OSC exists, it determines whether we're allowed to manage
+	 * the SHPC.  We executed it while enumerating the host bridge.
+	 */
+	if (root->osc_support_set) {
+		if (host->native_shpc_hotplug)
+			return 0;
+		return -ENODEV;
+	}
 
+	/*
+	 * In the absence of _OSC, we're always allowed to manage the SHPC.
+	 * However, if an OSHP method is present, we must execute it so the
+	 * firmware can transfer control to the OS, e.g., direct interrupts
+	 * to the OS instead of to the firmware.
+	 *
+	 * N.B. The PCI Firmware Spec (r3.2, sec 4.8) does not endorse
+	 * searching up the ACPI hierarchy, so the loops below are suspect.
+	 */
 	handle = ACPI_HANDLE(&pdev->dev);
 	if (!handle) {
 		/*
@@ -128,7 +130,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev)
 		if (ACPI_FAILURE(status))
 			break;
 	}
-no_control:
+
 	pci_info(pdev, "Cannot get control of SHPC hotplug\n");
 	kfree(string.pointer);
 	return -ENODEV;
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index e91be287f292..8e3c6ce12f31 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -270,11 +270,30 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
+static bool shpc_capable(struct pci_dev *bridge)
+{
+	/*
+	 * It is assumed that AMD GOLAM chips support SHPC but they do not
+	 * have SHPC capability.
+	 */
+	if (bridge->vendor == PCI_VENDOR_ID_AMD &&
+	    bridge->device == PCI_DEVICE_ID_AMD_GOLAM_7450)
+		return true;
+
+	if (pci_find_capability(bridge, PCI_CAP_ID_SHPC))
+		return true;
+
+	return false;
+}
+
 static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int rc;
 	struct controller *ctrl;
 
+	if (!shpc_capable(pdev))
+		return -ENODEV;
+
 	if (acpi_get_hp_hw_control_from_firmware(pdev))
 		return -ENODEV;
 
@@ -303,6 +322,7 @@ static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto err_cleanup_slots;
 
+	pdev->shpc_managed = 1;
 	return 0;
 
 err_cleanup_slots:
@@ -319,6 +339,7 @@ static void shpc_remove(struct pci_dev *dev)
 {
 	struct controller *ctrl = pci_get_drvdata(dev);
 
+	dev->shpc_managed = 0;
 	shpchp_remove_ctrl_files(ctrl);
 	ctrl->hpc_ops->release_ctlr(ctrl);
 	kfree(ctrl);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 65113b6eed14..5100fd2d5a75 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -403,24 +403,7 @@ bool pciehp_is_native(struct pci_dev *bridge)
  */
 bool shpchp_is_native(struct pci_dev *bridge)
 {
-	const struct pci_host_bridge *host;
-
-	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_SHPC))
-		return false;
-
-	/*
-	 * It is assumed that AMD GOLAM chips support SHPC but they do not
-	 * have SHPC capability.
-	 */
-	if (bridge->vendor == PCI_VENDOR_ID_AMD &&
-	    bridge->device == PCI_DEVICE_ID_AMD_GOLAM_7450)
-		return true;
-
-	if (!pci_find_capability(bridge, PCI_CAP_ID_SHPC))
-		return false;
-
-	host = pci_find_host_bridge(bridge->bus);
-	return host->native_shpc_hotplug;
+	return bridge->shpc_managed;
 }
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..f776a1cce120 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -388,6 +388,7 @@ struct pci_dev {
 	unsigned int	is_virtfn:1;
 	unsigned int	reset_fn:1;
 	unsigned int	is_hotplug_bridge:1;
+	unsigned int	shpc_managed:1;		/* SHPC owned by shpchp */
 	unsigned int	is_thunderbolt:1;	/* Thunderbolt controller */
 	unsigned int	__aer_firmware_first_valid:1;
 	unsigned int	__aer_firmware_first:1;
-- 
cgit v1.2.3


From c102780acdbc22e5f664cfaf760eaccffbd6c385 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:09:00 +0800
Subject: hwspinlock: Add devm_xxx() APIs to register/unregister one hwlock
 controller

This patch introduces devm_hwspin_lock_register() and devm_hwspin_lock_unregister()
interfaces to help to register or unregister one hardware spinlock controller, that
will help to simplify the cleanup code for hwspinlock drivers.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/hwspinlock/hwspinlock_core.c | 82 ++++++++++++++++++++++++++++++++++++
 include/linux/hwspinlock.h           |  6 +++
 2 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
index d542b6fbf0f2..ba27465c9d9b 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -529,6 +529,88 @@ int hwspin_lock_unregister(struct hwspinlock_device *bank)
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_unregister);
 
+static void devm_hwspin_lock_unreg(struct device *dev, void *res)
+{
+	hwspin_lock_unregister(*(struct hwspinlock_device **)res);
+}
+
+static int devm_hwspin_lock_device_match(struct device *dev, void *res,
+					 void *data)
+{
+	struct hwspinlock_device **bank = res;
+
+	if (WARN_ON(!bank || !*bank))
+		return 0;
+
+	return *bank == data;
+}
+
+/**
+ * devm_hwspin_lock_unregister() - unregister an hw spinlock device for
+ *				   a managed device
+ * @dev: the backing device
+ * @bank: the hwspinlock device, which usually provides numerous hw locks
+ *
+ * This function should be called from the underlying platform-specific
+ * implementation, to unregister an existing (and unused) hwspinlock.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int devm_hwspin_lock_unregister(struct device *dev,
+				struct hwspinlock_device *bank)
+{
+	int ret;
+
+	ret = devres_release(dev, devm_hwspin_lock_unreg,
+			     devm_hwspin_lock_device_match, bank);
+	WARN_ON(ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_unregister);
+
+/**
+ * devm_hwspin_lock_register() - register a new hw spinlock device for
+ *				 a managed device
+ * @dev: the backing device
+ * @bank: the hwspinlock device, which usually provides numerous hw locks
+ * @ops: hwspinlock handlers for this device
+ * @base_id: id of the first hardware spinlock in this bank
+ * @num_locks: number of hwspinlocks provided by this device
+ *
+ * This function should be called from the underlying platform-specific
+ * implementation, to register a new hwspinlock device instance.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int devm_hwspin_lock_register(struct device *dev,
+			      struct hwspinlock_device *bank,
+			      const struct hwspinlock_ops *ops,
+			      int base_id, int num_locks)
+{
+	struct hwspinlock_device **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_hwspin_lock_unreg, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = hwspin_lock_register(bank, dev, ops, base_id, num_locks);
+	if (!ret) {
+		*ptr = bank;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_register);
+
 /**
  * __hwspin_lock_request() - tag an hwspinlock as used and power it up
  *
diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index dfd05938a3cb..94064cce6962 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -71,6 +71,12 @@ int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock);
 struct hwspinlock *devm_hwspin_lock_request(struct device *dev);
 struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
 						     unsigned int id);
+int devm_hwspin_lock_unregister(struct device *dev,
+				struct hwspinlock_device *bank);
+int devm_hwspin_lock_register(struct device *dev,
+			      struct hwspinlock_device *bank,
+			      const struct hwspinlock_ops *ops,
+			      int base_id, int num_locks);
 
 #else /* !CONFIG_HWSPINLOCK */
 
-- 
cgit v1.2.3


From 2ceda54c44530b8e46363b0b12201855da5fd631 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:09:01 +0800
Subject: hwspinlock: Remove redundant config

The hardware core can not be built as a module, so remove the redundant
CONFIG_HWSPINLOCK_MODULE config.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/hwspinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index 94064cce6962..0afe693be5f4 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -52,7 +52,7 @@ struct hwspinlock_pdata {
 	int base_id;
 };
 
-#if defined(CONFIG_HWSPINLOCK) || defined(CONFIG_HWSPINLOCK_MODULE)
+#ifdef CONFIG_HWSPINLOCK
 
 int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
 		const struct hwspinlock_ops *ops, int base_id, int num_locks);
-- 
cgit v1.2.3


From 618fcff3742b4c62fea24bea1f01a2f002ed4b37 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 26 Jun 2018 07:11:55 -0500
Subject: remoteproc: Rename subdev functions to start/stop

"start" and "stop" are more suitable names for how these two operations
are used, and they fit better with the upcoming introduction of two
additional operations in the struct.

Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
[elder@linaro.org: minor comment edits]
Signed-off-by Alex Elder <elder@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 30 +++++++++++++++---------------
 include/linux/remoteproc.h           | 14 +++++++-------
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index a9609d971f7f..5dd58e6bea88 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -774,13 +774,13 @@ static int rproc_handle_resources(struct rproc *rproc,
 	return ret;
 }
 
-static int rproc_probe_subdevices(struct rproc *rproc)
+static int rproc_start_subdevices(struct rproc *rproc)
 {
 	struct rproc_subdev *subdev;
 	int ret;
 
 	list_for_each_entry(subdev, &rproc->subdevs, node) {
-		ret = subdev->probe(subdev);
+		ret = subdev->start(subdev);
 		if (ret)
 			goto unroll_registration;
 	}
@@ -789,17 +789,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
 
 unroll_registration:
 	list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev, true);
+		subdev->stop(subdev, true);
 
 	return ret;
 }
 
-static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
+static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
 {
 	struct rproc_subdev *subdev;
 
 	list_for_each_entry_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev, crashed);
+		subdev->stop(subdev, crashed);
 }
 
 /**
@@ -901,8 +901,8 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 		return ret;
 	}
 
-	/* probe any subdevices for the remote processor */
-	ret = rproc_probe_subdevices(rproc);
+	/* Start any subdevices for the remote processor */
+	ret = rproc_start_subdevices(rproc);
 	if (ret) {
 		dev_err(dev, "failed to probe subdevices for %s: %d\n",
 			rproc->name, ret);
@@ -1014,8 +1014,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
 	struct device *dev = &rproc->dev;
 	int ret;
 
-	/* remove any subdevices for the remote processor */
-	rproc_remove_subdevices(rproc, crashed);
+	/* Stop any subdevices for the remote processor */
+	rproc_stop_subdevices(rproc, crashed);
 
 	/* the installed resource table is no longer accessible */
 	rproc->table_ptr = rproc->cached_table;
@@ -1657,16 +1657,16 @@ EXPORT_SYMBOL(rproc_del);
  * rproc_add_subdev() - add a subdevice to a remoteproc
  * @rproc: rproc handle to add the subdevice to
  * @subdev: subdev handle to register
- * @probe: function to call when the rproc boots
- * @remove: function to call when the rproc shuts down
+ * @start: function to call after the rproc is started
+ * @stop: function to call before the rproc is stopped
  */
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
-		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev, bool crashed))
+		      int (*start)(struct rproc_subdev *subdev),
+		      void (*stop)(struct rproc_subdev *subdev, bool crashed))
 {
-	subdev->probe = probe;
-	subdev->remove = remove;
+	subdev->start = start;
+	subdev->stop = stop;
 
 	list_add_tail(&subdev->node, &rproc->subdevs);
 }
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index dfdaede9139e..bf55bf2a5ee1 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -477,15 +477,15 @@ struct rproc {
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
- * @probe: probe function, called as the rproc is started
- * @remove: remove function, called as the rproc is being stopped, the @crashed
- *	    parameter indicates if this originates from the a recovery
+ * @start: start function, called after the rproc has been started
+ * @stop: stop function, called before the rproc is stopped; the @crashed
+ *	    parameter indicates if this originates from a recovery
  */
 struct rproc_subdev {
 	struct list_head node;
 
-	int (*probe)(struct rproc_subdev *subdev);
-	void (*remove)(struct rproc_subdev *subdev, bool crashed);
+	int (*start)(struct rproc_subdev *subdev);
+	void (*stop)(struct rproc_subdev *subdev, bool crashed);
 };
 
 /* we currently support only two vrings per rvdev */
@@ -568,8 +568,8 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
-		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev, bool crashed));
+		      int (*start)(struct rproc_subdev *subdev),
+		      void (*stop)(struct rproc_subdev *subdev, bool crashed));
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
-- 
cgit v1.2.3


From 4902676f04acc20fe12e49f5f4916f2659c2a7bc Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 26 Jun 2018 07:11:57 -0500
Subject: remoteproc: Make client initialize ops in rproc_subdev

In preparation of adding the additional prepare and unprepare operations
make the client responsible for filling out the function pointers of the
rproc_subdev. This makes the arguments to rproc_add_subdev() more
manageable, in particular when some of the functions are left out.

Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
[elder@linaro.org: added comment about assigning function pointers]
Signed-off-by Alex Elder <elder@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/qcom_common.c     | 18 ++++++++++--------
 drivers/remoteproc/qcom_sysmon.c     |  5 ++++-
 drivers/remoteproc/remoteproc_core.c | 18 +++++++-----------
 include/linux/remoteproc.h           |  5 +----
 4 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index acfc99f82fb8..4ae87c5b8793 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -64,7 +64,10 @@ void qcom_add_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink)
 		return;
 
 	glink->dev = dev;
-	rproc_add_subdev(rproc, &glink->subdev, glink_subdev_probe, glink_subdev_remove);
+	glink->subdev.start = glink_subdev_probe;
+	glink->subdev.stop = glink_subdev_remove;
+
+	rproc_add_subdev(rproc, &glink->subdev);
 }
 EXPORT_SYMBOL_GPL(qcom_add_glink_subdev);
 
@@ -157,7 +160,10 @@ void qcom_add_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd)
 		return;
 
 	smd->dev = dev;
-	rproc_add_subdev(rproc, &smd->subdev, smd_subdev_probe, smd_subdev_remove);
+	smd->subdev.start = smd_subdev_probe;
+	smd->subdev.stop = smd_subdev_remove;
+
+	rproc_add_subdev(rproc, &smd->subdev);
 }
 EXPORT_SYMBOL_GPL(qcom_add_smd_subdev);
 
@@ -202,11 +208,6 @@ void qcom_unregister_ssr_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
 
-static int ssr_notify_start(struct rproc_subdev *subdev)
-{
-	return  0;
-}
-
 static void ssr_notify_stop(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
@@ -227,8 +228,9 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
 			 const char *ssr_name)
 {
 	ssr->name = ssr_name;
+	ssr->subdev.stop = ssr_notify_stop;
 
-	rproc_add_subdev(rproc, &ssr->subdev, ssr_notify_start, ssr_notify_stop);
+	rproc_add_subdev(rproc, &ssr->subdev);
 }
 EXPORT_SYMBOL_GPL(qcom_add_ssr_subdev);
 
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
index f085545d7da5..e976a602b015 100644
--- a/drivers/remoteproc/qcom_sysmon.c
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -469,7 +469,10 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
 
 	qmi_add_lookup(&sysmon->qmi, 43, 0, 0);
 
-	rproc_add_subdev(rproc, &sysmon->subdev, sysmon_start, sysmon_stop);
+	sysmon->subdev.start = sysmon_start;
+	sysmon->subdev.stop = sysmon_stop;
+
+	rproc_add_subdev(rproc, &sysmon->subdev);
 
 	sysmon->nb.notifier_call = sysmon_notify;
 	blocking_notifier_chain_register(&sysmon_notifiers, &sysmon->nb);
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 981ae6dff145..ca39fad175f2 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -399,8 +399,10 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 
 	list_add_tail(&rvdev->node, &rproc->rvdevs);
 
-	rproc_add_subdev(rproc, &rvdev->subdev,
-			 rproc_vdev_do_probe, rproc_vdev_do_remove);
+	rvdev->subdev.start = rproc_vdev_do_probe;
+	rvdev->subdev.stop = rproc_vdev_do_remove;
+
+	rproc_add_subdev(rproc, &rvdev->subdev);
 
 	return 0;
 
@@ -1663,17 +1665,11 @@ EXPORT_SYMBOL(rproc_del);
  * rproc_add_subdev() - add a subdevice to a remoteproc
  * @rproc: rproc handle to add the subdevice to
  * @subdev: subdev handle to register
- * @start: function to call after the rproc is started
- * @stop: function to call before the rproc is stopped
+ *
+ * Caller is responsible for populating optional subdevice function pointers.
  */
-void rproc_add_subdev(struct rproc *rproc,
-		      struct rproc_subdev *subdev,
-		      int (*start)(struct rproc_subdev *subdev),
-		      void (*stop)(struct rproc_subdev *subdev, bool crashed))
+void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev)
 {
-	subdev->start = start;
-	subdev->stop = stop;
-
 	list_add_tail(&subdev->node, &rproc->subdevs);
 }
 EXPORT_SYMBOL(rproc_add_subdev);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index bf55bf2a5ee1..8f1426330cca 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -566,10 +566,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 	return rvdev->rproc;
 }
 
-void rproc_add_subdev(struct rproc *rproc,
-		      struct rproc_subdev *subdev,
-		      int (*start)(struct rproc_subdev *subdev),
-		      void (*stop)(struct rproc_subdev *subdev, bool crashed));
+void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
-- 
cgit v1.2.3


From c455daa4af3c3ee6e841fc9ed42024d6b8d50d66 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 26 Jun 2018 07:11:59 -0500
Subject: remoteproc: Introduce prepare and unprepare for subdevices

On rare occasions a subdevice might need to prepare some hardware
resources before a remote processor is booted, and clean up some
state after it has been shut down.

One such example is the IP Accelerator found in various Qualcomm
platforms, which is accessed directly from both the modem remoteproc
and the application subsystem and requires an intricate lockstep
process when bringing the modem up and down.

Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
[elder@linaro.org: minor description and comment edits]
Signed-off-by Alex Elder <elder@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 56 ++++++++++++++++++++++++++++++++++--
 include/linux/remoteproc.h           |  4 +++
 2 files changed, 57 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 2ede7ae6f5bc..283b258f5e0f 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -776,6 +776,30 @@ static int rproc_handle_resources(struct rproc *rproc,
 	return ret;
 }
 
+static int rproc_prepare_subdevices(struct rproc *rproc)
+{
+	struct rproc_subdev *subdev;
+	int ret;
+
+	list_for_each_entry(subdev, &rproc->subdevs, node) {
+		if (subdev->prepare) {
+			ret = subdev->prepare(subdev);
+			if (ret)
+				goto unroll_preparation;
+		}
+	}
+
+	return 0;
+
+unroll_preparation:
+	list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node) {
+		if (subdev->unprepare)
+			subdev->unprepare(subdev);
+	}
+
+	return ret;
+}
+
 static int rproc_start_subdevices(struct rproc *rproc)
 {
 	struct rproc_subdev *subdev;
@@ -810,6 +834,16 @@ static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
 	}
 }
 
+static void rproc_unprepare_subdevices(struct rproc *rproc)
+{
+	struct rproc_subdev *subdev;
+
+	list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
+		if (subdev->unprepare)
+			subdev->unprepare(subdev);
+	}
+}
+
 /**
  * rproc_coredump_cleanup() - clean up dump_segments list
  * @rproc: the remote processor handle
@@ -902,11 +936,18 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 		rproc->table_ptr = loaded_table;
 	}
 
+	ret = rproc_prepare_subdevices(rproc);
+	if (ret) {
+		dev_err(dev, "failed to prepare subdevices for %s: %d\n",
+			rproc->name, ret);
+		return ret;
+	}
+
 	/* power up the remote processor */
 	ret = rproc->ops->start(rproc);
 	if (ret) {
 		dev_err(dev, "can't start rproc %s: %d\n", rproc->name, ret);
-		return ret;
+		goto unprepare_subdevices;
 	}
 
 	/* Start any subdevices for the remote processor */
@@ -914,8 +955,7 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 	if (ret) {
 		dev_err(dev, "failed to probe subdevices for %s: %d\n",
 			rproc->name, ret);
-		rproc->ops->stop(rproc);
-		return ret;
+		goto stop_rproc;
 	}
 
 	rproc->state = RPROC_RUNNING;
@@ -923,6 +963,14 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 	dev_info(dev, "remote processor %s is now up\n", rproc->name);
 
 	return 0;
+
+stop_rproc:
+	rproc->ops->stop(rproc);
+
+unprepare_subdevices:
+	rproc_unprepare_subdevices(rproc);
+
+	return ret;
 }
 
 /*
@@ -1035,6 +1083,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
 		return ret;
 	}
 
+	rproc_unprepare_subdevices(rproc);
+
 	rproc->state = RPROC_OFFLINE;
 
 	dev_info(dev, "stopped remote processor %s\n", rproc->name);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8f1426330cca..e3c5d856b6da 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -477,15 +477,19 @@ struct rproc {
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
+ * @prepare: prepare function, called before the rproc is started
  * @start: start function, called after the rproc has been started
  * @stop: stop function, called before the rproc is stopped; the @crashed
  *	    parameter indicates if this originates from a recovery
+ * @unprepare: unprepare function, called after the rproc has been stopped
  */
 struct rproc_subdev {
 	struct list_head node;
 
+	int (*prepare)(struct rproc_subdev *subdev);
 	int (*start)(struct rproc_subdev *subdev);
 	void (*stop)(struct rproc_subdev *subdev, bool crashed);
+	void (*unprepare)(struct rproc_subdev *subdev);
 };
 
 /* we currently support only two vrings per rvdev */
-- 
cgit v1.2.3


From 0efc8562491b7d36f6bbc4fbc8f3348cb6641e9c Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 31 May 2018 11:16:18 +0300
Subject: net/mlx5: E-Switch, Avoid setup attempt if not being e-switch manager

In smartnic env, the host (PF) driver might not be an e-switch
manager, hence the FW will err on driver attempts to deal with
setting/unsetting the eswitch and as a result the overall setup
of sriov will fail.

Fix that by avoiding the operation if e-switch management is not
allowed for this driver instance. While here, move to use the
correct name for the esw manager capability name.

Fixes: 81848731ff40 ('net/mlx5: E-Switch, Add SR-IOV (FDB) support')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Guy Kushnir <guyk@mellanox.com>
Reviewed-by: Eli Cohen <eli@melloanox.com>
Tested-by: Eli Cohen <eli@melloanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c      | 5 +++--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c   | 7 ++++++-
 include/linux/mlx5/eswitch.h                      | 2 ++
 include/linux/mlx5/mlx5_ifc.h                     | 2 +-
 7 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 378ad74518ec..60236f73373c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -839,7 +839,7 @@ static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep;
 
-	if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table))
+	if (!MLX5_ESWITCH_MANAGER(priv->mdev))
 		return false;
 
 	rep = rpriv->rep;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index f63dfbcd29fe..103fd6a0cc65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1604,7 +1604,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	if (!ESW_ALLOWED(esw))
 		return 0;
 
-	if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
+	if (!MLX5_ESWITCH_MANAGER(esw->dev) ||
 	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
 		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 49a75d31185e..f1a86cea86a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -32,6 +32,7 @@
 
 #include <linux/mutex.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
 
 #include "mlx5_core.h"
 #include "fs_core.h"
@@ -2652,7 +2653,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
-	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	if (MLX5_ESWITCH_MANAGER(dev)) {
 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
 			err = init_fdb_root_ns(steering);
 			if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index afd9f4fa22f4..41ad24f0de2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -32,6 +32,7 @@
 
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
+#include <linux/mlx5/eswitch.h>
 #include <linux/module.h>
 #include "mlx5_core.h"
 #include "../../mlxfw/mlxfw.h"
@@ -159,13 +160,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	}
 
 	if (MLX5_CAP_GEN(dev, vport_group_manager) &&
-	    MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	    MLX5_ESWITCH_MANAGER(dev)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
-	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	if (MLX5_ESWITCH_MANAGER(dev)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
 		if (err)
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 2a8b529ce6dd..a0674962f02c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -88,6 +88,9 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 		return -EBUSY;
 	}
 
+	if (!MLX5_ESWITCH_MANAGER(dev))
+		goto enable_vfs_hca;
+
 	err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
 	if (err) {
 		mlx5_core_warn(dev,
@@ -95,6 +98,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 		return err;
 	}
 
+enable_vfs_hca:
 	for (vf = 0; vf < num_vfs; vf++) {
 		err = mlx5_core_enable_hca(dev, vf + 1);
 		if (err) {
@@ -140,7 +144,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 	}
 
 out:
-	mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+	if (MLX5_ESWITCH_MANAGER(dev))
+		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
 
 	if (mlx5_wait_for_vf_pages(dev))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index d3c9db492b30..fab5121ffb8f 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -8,6 +8,8 @@
 
 #include <linux/mlx5/driver.h>
 
+#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
+
 enum {
 	SRIOV_NONE,
 	SRIOV_LEGACY,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 27134c4fcb76..ac281f5ec9b8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -922,7 +922,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         vnic_env_queue_counters[0x1];
 	u8         ets[0x1];
 	u8         nic_flow_table[0x1];
-	u8         eswitch_flow_table[0x1];
+	u8         eswitch_manager[0x1];
 	u8         device_memory[0x1];
 	u8         mcam_reg[0x1];
 	u8         pcam_reg[0x1];
-- 
cgit v1.2.3


From 77bc8c28ddac90287bc42c129002eb703288d550 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 18 Jun 2018 17:32:30 +0200
Subject: ARM: mvebu: convert secondary CPU clock sync to hotplug state

The current call site in boot_secondary is causing sleep in invalid context
warnings, as this part of the code is running with interrrupts disabled and
some of the calls into the clock framework might sleep on a mutex.

Convert the secondary CPU clock sync to a hotplug state, which allows to
call it from a sleepable context.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 arch/arm/mach-mvebu/platsmp.c | 49 ++++++++++++++++++++-----------------------
 include/linux/cpuhotplug.h    |  1 +
 2 files changed, 24 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mvebu/platsmp.c b/arch/arm/mach-mvebu/platsmp.c
index 4ffbbd217e82..c130497dc6cc 100644
--- a/arch/arm/mach-mvebu/platsmp.c
+++ b/arch/arm/mach-mvebu/platsmp.c
@@ -35,6 +35,8 @@
 #define AXP_BOOTROM_BASE 0xfff00000
 #define AXP_BOOTROM_SIZE 0x100000
 
+static struct clk *boot_cpu_clk;
+
 static struct clk *get_cpu_clk(int cpu)
 {
 	struct clk *cpu_clk;
@@ -48,30 +50,6 @@ static struct clk *get_cpu_clk(int cpu)
 	return cpu_clk;
 }
 
-static void set_secondary_cpu_clock(unsigned int cpu)
-{
-	int thiscpu;
-	unsigned long rate;
-	struct clk *cpu_clk;
-
-	thiscpu = get_cpu();
-
-	cpu_clk = get_cpu_clk(thiscpu);
-	if (!cpu_clk)
-		goto out;
-	clk_prepare_enable(cpu_clk);
-	rate = clk_get_rate(cpu_clk);
-
-	cpu_clk = get_cpu_clk(cpu);
-	if (!cpu_clk)
-		goto out;
-	clk_set_rate(cpu_clk, rate);
-	clk_prepare_enable(cpu_clk);
-
-out:
-	put_cpu();
-}
-
 static int armada_xp_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	int ret, hw_cpu;
@@ -79,7 +57,6 @@ static int armada_xp_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	pr_info("Booting CPU %d\n", cpu);
 
 	hw_cpu = cpu_logical_map(cpu);
-	set_secondary_cpu_clock(hw_cpu);
 	mvebu_pmsu_set_cpu_boot_addr(hw_cpu, armada_xp_secondary_startup);
 
 	/*
@@ -122,6 +99,19 @@ static void __init armada_xp_smp_init_cpus(void)
 		panic("Invalid number of CPUs in DT\n");
 }
 
+static int armada_xp_sync_secondary_clk(unsigned int cpu)
+{
+	struct clk *cpu_clk = get_cpu_clk(cpu);
+
+	if (!cpu_clk || !boot_cpu_clk)
+		return 0;
+
+	clk_prepare_enable(cpu_clk);
+	clk_set_rate(cpu_clk, clk_get_rate(boot_cpu_clk));
+
+	return 0;
+}
+
 static void __init armada_xp_smp_prepare_cpus(unsigned int max_cpus)
 {
 	struct device_node *node;
@@ -131,6 +121,14 @@ static void __init armada_xp_smp_prepare_cpus(unsigned int max_cpus)
 	flush_cache_all();
 	set_cpu_coherent();
 
+	boot_cpu_clk = get_cpu_clk(smp_processor_id());
+	if (boot_cpu_clk) {
+		clk_prepare_enable(boot_cpu_clk);
+		cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
+					  "arm/mvebu/sync_clocks:online",
+					  armada_xp_sync_secondary_clk, NULL);
+	}
+
 	/*
 	 * In order to boot the secondary CPUs we need to ensure
 	 * the bootROM is mapped at the correct address.
@@ -223,7 +221,6 @@ static int mv98dx3236_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	int ret, hw_cpu;
 
 	hw_cpu = cpu_logical_map(cpu);
-	set_secondary_cpu_clock(hw_cpu);
 	mv98dx3236_resume_set_cpu_boot_addr(hw_cpu,
 					    armada_xp_secondary_startup);
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..fa54e5fbea38 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -143,6 +143,7 @@ enum cpuhp_state {
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
+	CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
 	CPUHP_AP_PERF_ONLINE,
 	CPUHP_AP_PERF_X86_ONLINE,
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
-- 
cgit v1.2.3


From 9b6e543450dc03635899ba56b1c39cc593694560 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:47 +0300
Subject: fsnotify: use typedef fsnotify_connp_t for brevity

The object marks manipulation functions fsnotify_destroy_marks()
fsnotify_find_mark() and their helpers take an argument of type
struct fsnotify_mark_connector __rcu ** to dereference the connector
pointer. use a typedef to describe this type for brevity.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.h             |  4 ++--
 fs/notify/mark.c                 | 20 +++++++++-----------
 include/linux/fsnotify_backend.h |  7 ++++---
 3 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 34515d2c4ba3..94cedf8264ba 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -19,8 +19,8 @@ extern struct srcu_struct fsnotify_mark_srcu;
 extern int fsnotify_compare_groups(struct fsnotify_group *a,
 				   struct fsnotify_group *b);
 
-/* Destroy all marks connected via given connector */
-extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp);
+/* Destroy all marks attached to an object via connector */
+extern void fsnotify_destroy_marks(fsnotify_connp_t *connp);
 /* run the list of all marks associated with inode and destroy them */
 static inline void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 61f4c5fa34c7..7b595acd8ec9 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -436,10 +436,9 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 	return -1;
 }
 
-static int fsnotify_attach_connector_to_object(
-				struct fsnotify_mark_connector __rcu **connp,
-				struct inode *inode,
-				struct vfsmount *mnt)
+static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+					       struct inode *inode,
+					       struct vfsmount *mnt)
 {
 	struct fsnotify_mark_connector *conn;
 
@@ -476,7 +475,7 @@ static int fsnotify_attach_connector_to_object(
  * they are sure list cannot go away under them.
  */
 static struct fsnotify_mark_connector *fsnotify_grab_connector(
-				struct fsnotify_mark_connector __rcu **connp)
+						fsnotify_connp_t *connp)
 {
 	struct fsnotify_mark_connector *conn;
 	int idx;
@@ -508,7 +507,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	struct fsnotify_mark_connector *conn;
-	struct fsnotify_mark_connector __rcu **connp;
+	fsnotify_connp_t *connp;
 	int cmp;
 	int err = 0;
 
@@ -629,9 +628,8 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
  * Given a list of marks, find the mark associated with given group. If found
  * take a reference to that mark and return it, else return NULL.
  */
-struct fsnotify_mark *fsnotify_find_mark(
-				struct fsnotify_mark_connector __rcu **connp,
-				struct fsnotify_group *group)
+struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
+					 struct fsnotify_group *group)
 {
 	struct fsnotify_mark_connector *conn;
 	struct fsnotify_mark *mark;
@@ -697,8 +695,8 @@ clear:
 	}
 }
 
-/* Destroy all marks attached to inode / vfsmount */
-void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp)
+/* Destroy all marks attached to an object via connector */
+void fsnotify_destroy_marks(fsnotify_connp_t *connp)
 {
 	struct fsnotify_mark_connector *conn;
 	struct fsnotify_mark *mark, *old_mark = NULL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b38964a7a521..e7cd7fc6e3cf 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -268,6 +268,8 @@ struct fsnotify_mark_connector {
 	struct hlist_head list;
 };
 
+typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
+
 /*
  * A mark is simply an object attached to an in core inode which allows an
  * fsnotify listener to indicate they are either no longer interested in events
@@ -394,9 +396,8 @@ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn);
 extern void fsnotify_init_mark(struct fsnotify_mark *mark,
 			       struct fsnotify_group *group);
 /* Find mark belonging to given group in the list of marks */
-extern struct fsnotify_mark *fsnotify_find_mark(
-				struct fsnotify_mark_connector __rcu **connp,
-				struct fsnotify_group *group);
+extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
+						struct fsnotify_group *group);
 /* attach the mark to the inode or vfsmount */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
 			     struct vfsmount *mnt, int allow_dups);
-- 
cgit v1.2.3


From b812a9f5896379b6cff2ac168ddb5b89037d8e78 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:48 +0300
Subject: fsnotify: pass connp and object type to fsnotify_add_mark()

Instead of passing inode and vfsmount arguments to fsnotify_add_mark()
and its _locked variant, pass an abstract object pointer and the object
type.

The helpers fsnotify_obj_{inode,mount} are added to get the concrete
object pointer from abstract object pointer.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 19 +++++++++--------
 fs/notify/fsnotify.h               | 10 +++++++++
 fs/notify/mark.c                   | 42 +++++++++++++++-----------------------
 include/linux/fsnotify_backend.h   | 20 ++++++++++++------
 4 files changed, 52 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ec4d8c59d0e3..81212b251189 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -615,8 +615,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 }
 
 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
-						   struct inode *inode,
-						   struct vfsmount *mnt)
+						   fsnotify_connp_t *connp,
+						   unsigned int type)
 {
 	struct fsnotify_mark *mark;
 	int ret;
@@ -629,7 +629,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 		return ERR_PTR(-ENOMEM);
 
 	fsnotify_init_mark(mark, group);
-	ret = fsnotify_add_mark_locked(mark, inode, mnt, 0);
+	ret = fsnotify_add_mark_locked(mark, connp, type, 0);
 	if (ret) {
 		fsnotify_put_mark(mark);
 		return ERR_PTR(ret);
@@ -643,14 +643,15 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 				      struct vfsmount *mnt, __u32 mask,
 				      unsigned int flags)
 {
+	fsnotify_connp_t *connp = &real_mount(mnt)->mnt_fsnotify_marks;
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks,
-				      group);
+	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsn_mark = fanotify_add_new_mark(group, NULL, mnt);
+		fsn_mark = fanotify_add_new_mark(group, connp,
+						 FSNOTIFY_OBJ_TYPE_VFSMOUNT);
 		if (IS_ERR(fsn_mark)) {
 			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
@@ -669,6 +670,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 				   struct inode *inode, __u32 mask,
 				   unsigned int flags)
 {
+	fsnotify_connp_t *connp = &inode->i_fsnotify_marks;
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
 
@@ -685,9 +687,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 		return 0;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
+	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsn_mark = fanotify_add_new_mark(group, inode, NULL);
+		fsn_mark = fanotify_add_new_mark(group, connp,
+						 FSNOTIFY_OBJ_TYPE_INODE);
 		if (IS_ERR(fsn_mark)) {
 			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 94cedf8264ba..caeee042d1cc 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -9,6 +9,16 @@
 
 #include "../mount.h"
 
+static inline struct inode *fsnotify_obj_inode(fsnotify_connp_t *connp)
+{
+	return container_of(connp, struct inode, i_fsnotify_marks);
+}
+
+static inline struct mount *fsnotify_obj_mount(fsnotify_connp_t *connp)
+{
+	return container_of(connp, struct mount, mnt_fsnotify_marks);
+}
+
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 7b595acd8ec9..7abb73b5beba 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -437,9 +437,9 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 }
 
 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
-					       struct inode *inode,
-					       struct vfsmount *mnt)
+					       unsigned int type)
 {
+	struct inode *inode = NULL;
 	struct fsnotify_mark_connector *conn;
 
 	conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
@@ -447,13 +447,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		return -ENOMEM;
 	spin_lock_init(&conn->lock);
 	INIT_HLIST_HEAD(&conn->list);
-	if (inode) {
-		conn->type = FSNOTIFY_OBJ_TYPE_INODE;
-		conn->inode = igrab(inode);
-	} else {
-		conn->type = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
-		conn->mnt = mnt;
-	}
+	conn->type = type;
+	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
+		inode = conn->inode = igrab(fsnotify_obj_inode(connp));
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+		conn->mnt = &fsnotify_obj_mount(connp)->mnt;
 	/*
 	 * cmpxchg() provides the barrier so that readers of *connp can see
 	 * only initialized structure
@@ -502,27 +500,22 @@ out:
  * priority, highest number first, and then by the group's location in memory.
  */
 static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
-				  struct inode *inode, struct vfsmount *mnt,
+				  fsnotify_connp_t *connp, unsigned int type,
 				  int allow_dups)
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	struct fsnotify_mark_connector *conn;
-	fsnotify_connp_t *connp;
 	int cmp;
 	int err = 0;
 
-	if (WARN_ON(!inode && !mnt))
+	if (WARN_ON(!fsnotify_valid_obj_type(type)))
 		return -EINVAL;
-	if (inode)
-		connp = &inode->i_fsnotify_marks;
-	else
-		connp = &real_mount(mnt)->mnt_fsnotify_marks;
 restart:
 	spin_lock(&mark->lock);
 	conn = fsnotify_grab_connector(connp);
 	if (!conn) {
 		spin_unlock(&mark->lock);
-		err = fsnotify_attach_connector_to_object(connp, inode, mnt);
+		err = fsnotify_attach_connector_to_object(connp, type);
 		if (err)
 			return err;
 		goto restart;
@@ -568,14 +561,13 @@ out_err:
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group.
  */
-int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
-			     struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+			     fsnotify_connp_t *connp, unsigned int type,
+			     int allow_dups)
 {
 	struct fsnotify_group *group = mark->group;
 	int ret = 0;
 
-	BUG_ON(inode && mnt);
-	BUG_ON(!inode && !mnt);
 	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 
 	/*
@@ -592,7 +584,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
 	fsnotify_get_mark(mark); /* for g_list */
 	spin_unlock(&mark->lock);
 
-	ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups);
+	ret = fsnotify_add_mark_list(mark, connp, type, allow_dups);
 	if (ret)
 		goto err;
 
@@ -612,14 +604,14 @@ err:
 	return ret;
 }
 
-int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
-		      struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
+		      unsigned int type, int allow_dups)
 {
 	int ret;
 	struct fsnotify_group *group = mark->group;
 
 	mutex_lock(&group->mark_mutex);
-	ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups);
+	ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups);
 	mutex_unlock(&group->mark_mutex);
 	return ret;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e7cd7fc6e3cf..8efb8663453d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -210,6 +210,11 @@ enum fsnotify_obj_type {
 #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL	(1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
 #define FSNOTIFY_OBJ_ALL_TYPES_MASK	((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
 
+static inline bool fsnotify_valid_obj_type(unsigned int type)
+{
+	return (type < FSNOTIFY_OBJ_TYPE_COUNT);
+}
+
 struct fsnotify_iter_info {
 	struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT];
 	unsigned int report_mask;
@@ -398,24 +403,27 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark,
 /* Find mark belonging to given group in the list of marks */
 extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
 						struct fsnotify_group *group);
-/* attach the mark to the inode or vfsmount */
-extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
-			     struct vfsmount *mnt, int allow_dups);
+/* attach the mark to the object */
+extern int fsnotify_add_mark(struct fsnotify_mark *mark,
+			     fsnotify_connp_t *connp, unsigned int type,
+			     int allow_dups);
 extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
-				    struct inode *inode, struct vfsmount *mnt,
+				    fsnotify_connp_t *connp, unsigned int type,
 				    int allow_dups);
 /* attach the mark to the inode */
 static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 					  struct inode *inode,
 					  int allow_dups)
 {
-	return fsnotify_add_mark(mark, inode, NULL, allow_dups);
+	return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
+				 FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
 }
 static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
 						 struct inode *inode,
 						 int allow_dups)
 {
-	return fsnotify_add_mark_locked(mark, inode, NULL, allow_dups);
+	return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
+					FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
 }
 /* given a group and a mark, flag mark to be freed when all references are dropped */
 extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
-- 
cgit v1.2.3


From 36f10f55ff1d2867bfc48ed898a9cc0dc6b49dd2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:49 +0300
Subject: fsnotify: let connector point to an abstract object

Make the code to attach/detach a connector to object more generic
by letting the fsnotify connector point to an abstract fsnotify_connp_t.
Code that needs to dereference an inode or mount object now uses the
helpers fsnotify_conn_{inode,mount}.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fdinfo.c               |  8 ++++----
 fs/notify/fsnotify.h             | 10 ++++++----
 fs/notify/mark.c                 | 32 ++++++++++++++++----------------
 include/linux/fsnotify_backend.h | 15 ++++++++++-----
 kernel/audit_tree.c              | 17 +++++++++--------
 5 files changed, 45 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 10aac1942c9f..86fcf5814279 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -15,7 +15,7 @@
 #include <linux/exportfs.h>
 
 #include "inotify/inotify.h"
-#include "../fs/mount.h"
+#include "fsnotify.h"
 
 #if defined(CONFIG_PROC_FS)
 
@@ -81,7 +81,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		return;
 
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
-	inode = igrab(mark->connector->inode);
+	inode = igrab(fsnotify_conn_inode(mark->connector));
 	if (inode) {
 		/*
 		 * IN_ALL_EVENTS represents all of the mask bits
@@ -117,7 +117,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
 
 	if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) {
-		inode = igrab(mark->connector->inode);
+		inode = igrab(fsnotify_conn_inode(mark->connector));
 		if (!inode)
 			return;
 		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
@@ -127,7 +127,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		seq_putc(m, '\n');
 		iput(inode);
 	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
-		struct mount *mnt = real_mount(mark->connector->mnt);
+		struct mount *mnt = fsnotify_conn_mount(mark->connector);
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index caeee042d1cc..7902653dd577 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -9,14 +9,16 @@
 
 #include "../mount.h"
 
-static inline struct inode *fsnotify_obj_inode(fsnotify_connp_t *connp)
+static inline struct inode *fsnotify_conn_inode(
+				struct fsnotify_mark_connector *conn)
 {
-	return container_of(connp, struct inode, i_fsnotify_marks);
+	return container_of(conn->obj, struct inode, i_fsnotify_marks);
 }
 
-static inline struct mount *fsnotify_obj_mount(fsnotify_connp_t *connp)
+static inline struct mount *fsnotify_conn_mount(
+				struct fsnotify_mark_connector *conn)
 {
-	return container_of(connp, struct mount, mnt_fsnotify_marks);
+	return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
 }
 
 /* destroy all events sitting in this groups notification queue */
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 7abb73b5beba..959bc73aaae7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -120,14 +120,14 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 			new_mask |= mark->mask;
 	}
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		conn->inode->i_fsnotify_mask = new_mask;
+		fsnotify_conn_inode(conn)->i_fsnotify_mask = new_mask;
 	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
-		real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask;
+		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = new_mask;
 }
 
 /*
  * Calculate mask of events for a list of marks. The caller must make sure
- * connector and connector->inode cannot disappear under us.  Callers achieve
+ * connector and connector->obj cannot disappear under us.  Callers achieve
  * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
  * list.
  */
@@ -140,7 +140,8 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 	__fsnotify_recalc_mask(conn);
 	spin_unlock(&conn->lock);
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		__fsnotify_update_child_dentry_flags(conn->inode);
+		__fsnotify_update_child_dentry_flags(
+					fsnotify_conn_inode(conn));
 }
 
 /* Free all connectors queued for freeing once SRCU period ends */
@@ -166,20 +167,20 @@ static struct inode *fsnotify_detach_connector_from_object(
 {
 	struct inode *inode = NULL;
 
+	if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
+		return NULL;
+
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
-		inode = conn->inode;
-		rcu_assign_pointer(inode->i_fsnotify_marks, NULL);
+		inode = fsnotify_conn_inode(conn);
 		inode->i_fsnotify_mask = 0;
-		conn->inode = NULL;
-		conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
-		rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks,
-				   NULL);
-		real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
-		conn->mnt = NULL;
-		conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
+		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 	}
 
+	rcu_assign_pointer(*(conn->obj), NULL);
+	conn->obj = NULL;
+	conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
+
 	return inode;
 }
 
@@ -448,10 +449,9 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 	spin_lock_init(&conn->lock);
 	INIT_HLIST_HEAD(&conn->list);
 	conn->type = type;
+	conn->obj = connp;
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		inode = conn->inode = igrab(fsnotify_obj_inode(connp));
-	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
-		conn->mnt = &fsnotify_obj_mount(connp)->mnt;
+		inode = igrab(fsnotify_conn_inode(conn));
 	/*
 	 * cmpxchg() provides the barrier so that readers of *connp can see
 	 * only initialized structure
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 8efb8663453d..381cfb0e67fa 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -255,6 +255,13 @@ FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
 #define fsnotify_foreach_obj_type(type) \
 	for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++)
 
+/*
+ * fsnotify_connp_t is what we embed in objects which connector can be attached
+ * to. fsnotify_connp_t * is how we refer from connector back to object.
+ */
+struct fsnotify_mark_connector;
+typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
+
 /*
  * Inode / vfsmount point to this structure which tracks all marks attached to
  * the inode / vfsmount. The reference to inode / vfsmount is held by this
@@ -264,17 +271,15 @@ FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
 struct fsnotify_mark_connector {
 	spinlock_t lock;
 	unsigned int type;	/* Type of object [lock] */
-	union {	/* Object pointer [lock] */
-		struct inode *inode;
-		struct vfsmount *mnt;
+	union {
+		/* Object pointer [lock] */
+		fsnotify_connp_t *obj;
 		/* Used listing heads to free after srcu period expires */
 		struct fsnotify_mark_connector *destroy_next;
 	};
 	struct hlist_head list;
 };
 
-typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
-
 /*
  * A mark is simply an object attached to an in core inode which allows an
  * fsnotify listener to indicate they are either no longer interested in events
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index c99ebaae5abc..02feef939560 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -168,7 +168,8 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
 /* Function to return search key in our hash from inode. */
 static unsigned long inode_to_key(const struct inode *inode)
 {
-	return (unsigned long)inode;
+	/* Use address pointed to by connector->obj as the key */
+	return (unsigned long)&inode->i_fsnotify_marks;
 }
 
 /*
@@ -183,7 +184,7 @@ static unsigned long chunk_to_key(struct audit_chunk *chunk)
 	 */
 	if (WARN_ON_ONCE(!chunk->mark.connector))
 		return 0;
-	return (unsigned long)chunk->mark.connector->inode;
+	return (unsigned long)chunk->mark.connector->obj;
 }
 
 static inline struct list_head *chunk_hash(unsigned long key)
@@ -258,7 +259,7 @@ static void untag_chunk(struct node *p)
 	spin_lock(&entry->lock);
 	/*
 	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->inode getting NULL.
+	 * mark->connector->obj getting NULL.
 	 */
 	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 		spin_unlock(&entry->lock);
@@ -288,8 +289,8 @@ static void untag_chunk(struct node *p)
 	if (!new)
 		goto Fallback;
 
-	if (fsnotify_add_inode_mark_locked(&new->mark, entry->connector->inode,
-					   1)) {
+	if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
+				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
 		fsnotify_put_mark(&new->mark);
 		goto Fallback;
 	}
@@ -423,7 +424,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	spin_lock(&old_entry->lock);
 	/*
 	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->inode getting NULL.
+	 * mark->connector->obj getting NULL.
 	 */
 	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 		/* old_entry is being shot, lets just lie */
@@ -434,8 +435,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 		return -ENOENT;
 	}
 
-	if (fsnotify_add_inode_mark_locked(chunk_entry,
-			     old_entry->connector->inode, 1)) {
+	if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
+				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
 		spin_unlock(&old_entry->lock);
 		mutex_unlock(&old_entry->group->mark_mutex);
 		fsnotify_put_mark(chunk_entry);
-- 
cgit v1.2.3


From 3ac70bfcde812b1b97d8a88a832df59941fa293f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:50 +0300
Subject: fsnotify: add helper to get mask from connector

Use a helper to get the mask from the object (i.e. i_fsnotify_mask)
to generalize code of add/remove inode/vfsmount mark.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 16 ++++++++--------
 fs/notify/mark.c                   | 25 +++++++++++++++++++++----
 include/linux/fsnotify_backend.h   |  2 ++
 3 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 81212b251189..3899ad177651 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -542,8 +542,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
-	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
-		fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
+	if (removed & fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	if (destroy_mark)
 		fsnotify_detach_mark(fsn_mark);
 	mutex_unlock(&group->mark_mutex);
@@ -571,8 +571,8 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
-	if (removed & inode->i_fsnotify_mask)
-		fsnotify_recalc_mask(inode->i_fsnotify_marks);
+	if (removed & fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	if (destroy_mark)
 		fsnotify_detach_mark(fsn_mark);
 	mutex_unlock(&group->mark_mutex);
@@ -658,8 +658,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
-		fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
+	if (added & ~fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
@@ -697,8 +697,8 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	if (added & ~inode->i_fsnotify_mask)
-		fsnotify_recalc_mask(inode->i_fsnotify_marks);
+	if (added & ~fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 959bc73aaae7..05506d60131c 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -109,6 +109,23 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
 	refcount_inc(&mark->refcnt);
 }
 
+static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
+{
+	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
+		return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+		return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
+	return NULL;
+}
+
+__u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
+{
+	if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
+		return 0;
+
+	return *fsnotify_conn_mask_p(conn);
+}
+
 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
@@ -119,10 +136,10 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 		if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
 			new_mask |= mark->mask;
 	}
-	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		fsnotify_conn_inode(conn)->i_fsnotify_mask = new_mask;
-	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
-		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = new_mask;
+	if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
+		return;
+
+	*fsnotify_conn_mask_p(conn) = new_mask;
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 381cfb0e67fa..2b9b6f1ff5e0 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -401,6 +401,8 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group
 
 /* functions used to manipulate the marks attached to inodes */
 
+/* Get mask of events for a list of marks */
+extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn);
 /* Calculate mask of events for a list of marks */
 extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn);
 extern void fsnotify_init_mark(struct fsnotify_mark *mark,
-- 
cgit v1.2.3


From 22eceb8bf3e8f1f9b2f566062d06b25807725d7f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 19 Jun 2018 13:57:26 +0200
Subject: printk: Make CONSOLE_LOGLEVEL_QUIET configurable

The goal of passing the "quiet" option to the kernel is for the kernel
to be quiet unless something really is wrong.

Sofar passing quiet has been (mostly) equivalent to passing
loglevel=4 on the kernel commandline. Which means to show any messages
with a level of KERN_ERR or higher severity on the console.

In practice this often does not result in a quiet boot though, since
there are many false-positive or otherwise harmless error messages printed,
defeating the purpose of the quiet option. Esp. the ACPICA code is really
bad wrt this, but there are plenty of others too.

This commit makes CONSOLE_LOGLEVEL_QUIET configurable.

This for example will allow distros which want quiet to really mean quiet
to set CONSOLE_LOGLEVEL_QUIET so that only messages with a higher severity
then KERN_ERR (CRIT, ALERT, EMERG) get printed, avoiding an endless game
of whack-a-mole silencing harmless error messages.

Link: http://lkml.kernel.org/r/20180619115726.3098-1-hdegoede@redhat.com
To: Petr Mladek <pmladek@suse.com>
To: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h |  6 +++---
 lib/Kconfig.debug      | 11 +++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 6d7e800affd8..18602bb3eca8 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -50,15 +50,15 @@ static inline const char *printk_skip_headers(const char *buffer)
 /* We show everything that is MORE important than this.. */
 #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
 #define CONSOLE_LOGLEVEL_MIN	 1 /* Minimum loglevel we let people use */
-#define CONSOLE_LOGLEVEL_QUIET	 4 /* Shhh ..., when booted with "quiet" */
 #define CONSOLE_LOGLEVEL_DEBUG	10 /* issue debug messages */
 #define CONSOLE_LOGLEVEL_MOTORMOUTH 15	/* You can't shut this one up */
 
 /*
- * Default used to be hard-coded at 7, we're now allowing it to be set from
- * kernel config.
+ * Default used to be hard-coded at 7, quiet used to be hardcoded at 4,
+ * we're now allowing both to be set from kernel config.
  */
 #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
+#define CONSOLE_LOGLEVEL_QUIET	 CONFIG_CONSOLE_LOGLEVEL_QUIET
 
 extern int console_printk[];
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 76555479ae36..a7ef03009e9e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -30,6 +30,17 @@ config CONSOLE_LOGLEVEL_DEFAULT
 	  usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
 	  option.
 
+config CONSOLE_LOGLEVEL_QUIET
+	int "quiet console loglevel (1-15)"
+	range 1 15
+	default "4"
+	help
+	  loglevel to use when "quiet" is passed on the kernel commandline.
+
+	  When "quiet" is passed on the kernel commandline this loglevel
+	  will be used as the loglevel. IOW passing "quiet" will be the
+	  equivalent of passing "loglevel=<CONSOLE_LOGLEVEL_QUIET>"
+
 config MESSAGE_LOGLEVEL_DEFAULT
 	int "Default message log level (1-7)"
 	range 1 7
-- 
cgit v1.2.3


From e7d4a95da86e0b048702765bbdcdc968aaf312e7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 20 Jun 2018 08:58:28 +0200
Subject: bitfield: fix *_encode_bits()

There's a bug in *_encode_bits() in using ~field_multiplier() for
the check whether or not the constant value fits into the field,
this is wrong and clearly ~field_mask() was intended. This was
triggering for me for both constant and non-constant values.

Additionally, make this case actually into an compile error.
Declaring the extern function that will never exist with just a
warning is pointless as then later we'll just get a link error.

While at it, also fix the indentation in those lines I'm touching.

Finally, as suggested by Andy Shevchenko, add some tests and for
that introduce also u8 helpers. The tests don't compile without
the fix, showing that it's necessary.

Fixes: 00b0c9b82663 ("Add primitives for manipulating bitfields both in host- and fixed-endian.")
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bitfield.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index cf2588d81148..147a7bb341dd 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -104,7 +104,7 @@
 		(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask));	\
 	})
 
-extern void __compiletime_warning("value doesn't fit into mask")
+extern void __compiletime_error("value doesn't fit into mask")
 __field_overflow(void);
 extern void __compiletime_error("bad bitfield mask")
 __bad_mask(void);
@@ -121,8 +121,8 @@ static __always_inline u64 field_mask(u64 field)
 #define ____MAKE_OP(type,base,to,from)					\
 static __always_inline __##type type##_encode_bits(base v, base field)	\
 {									\
-        if (__builtin_constant_p(v) &&	(v & ~field_multiplier(field)))	\
-			    __field_overflow();				\
+	if (__builtin_constant_p(v) && (v & ~field_mask(field)))	\
+		__field_overflow();					\
 	return to((v & field_mask(field)) * field_multiplier(field));	\
 }									\
 static __always_inline __##type type##_replace_bits(__##type old,	\
-- 
cgit v1.2.3


From 37a3862e1238262e9866d5d66e5f5f9069cee3a1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 20 Jun 2018 08:58:29 +0200
Subject: bitfield: add u8 helpers

There's no reason why we shouldn't pack/unpack bits into/from
u8 values/registers/etc., so add u8 helpers.

Use the ____MAKE_OP() macro directly to avoid having nonsense
le8_encode_bits() and similar functions.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bitfield.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 147a7bb341dd..65a6981eef7b 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -143,6 +143,7 @@ static __always_inline base type##_get_bits(__##type v, base field)	\
 	____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu)	\
 	____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu)	\
 	____MAKE_OP(u##size,u##size,,)
+____MAKE_OP(u8,u8,,)
 __MAKE_OP(16)
 __MAKE_OP(32)
 __MAKE_OP(64)
-- 
cgit v1.2.3


From 4d97f7d53da7dc830dbf416a3d2a6778d267ae68 Mon Sep 17 00:00:00 2001
From: Henry Wilson <henry.wilson@acentic.com>
Date: Thu, 31 May 2018 09:43:03 +0000
Subject: inotify: Add flag IN_MASK_CREATE for inotify_add_watch()

The flag IN_MASK_CREATE is introduced as a flag for inotiy_add_watch()
which prevents inotify from modifying any existing watches when invoked.
If the pathname specified in the call has a watched inode associated
with it and IN_MASK_CREATE is specified, fail with an errno of EEXIST.

Use of IN_MASK_CREATE with IN_MASK_ADD is reserved for future use and
will return EINVAL.

RATIONALE

In the current implementation, there is no way to prevent
inotify_add_watch() from modifying existing watch descriptors. Even if
the caller keeps a record of all watch descriptors collected, this is
only sufficient to detect that an existing watch descriptor may have
been modified.

The assumption that a particular path will map to the same inode over
multiple calls to inotify_add_watch() cannot be made as files can be
renamed or deleted.  It is also not possible to assume that two distinct
paths do no map to the same inode, due to hard-links or a dereferenced
symbolic link. Further uses of inotify_add_watch() to revert the change
may cause other watch descriptors to be modified or created, merely
compunding the problem. There is currently no system call such as
inotify_modify_watch() to explicity modify a watch descriptor, which
would be able to revert unwanted changes. Thus the caller cannot
guarantee to be able to revert any changes to existing watch decriptors.

Additionally the caller cannot assume that the events that are
associated with a watch descriptor are within the set requested, as any
future calls to inotify_add_watch() may unintentionally modify a watch
descriptor's mask. Thus it cannot currently be guaranteed that a watch
descriptor will only generate events which have been requested. The
program must filter events which come through its watch descriptor to
within its expected range.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Henry Wilson <henry.wilson@acentic.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/inotify/inotify_user.c | 9 ++++++++-
 include/linux/inotify.h          | 2 +-
 include/uapi/linux/inotify.h     | 1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf5b779d862..6f48d325c350 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -510,6 +510,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	__u32 old_mask, new_mask;
 	__u32 mask;
 	int add = (arg & IN_MASK_ADD);
+	int create = (arg & IN_MASK_CREATE);
 	int ret;
 
 	mask = inotify_arg_to_mask(arg);
@@ -517,6 +518,8 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	if (!fsn_mark)
 		return -ENOENT;
+	else if (create)
+		return -EEXIST;
 
 	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
@@ -718,6 +721,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	if (unlikely(!f.file))
 		return -EBADF;
 
+	/* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
+	if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE)))
+		return -EINVAL;
+
 	/* verify that this is indeed an inotify instance */
 	if (unlikely(f.file->f_op != &inotify_fops)) {
 		ret = -EINVAL;
@@ -806,7 +813,7 @@ static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
-	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
+	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22);
 
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 44f9ffe72c87..6a24905f6e1e 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -18,6 +18,6 @@ extern struct ctl_table inotify_table[]; /* for sysctl */
 			  IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT | \
 			  IN_Q_OVERFLOW | IN_IGNORED | IN_ONLYDIR | \
 			  IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD | \
-			  IN_ISDIR | IN_ONESHOT)
+			  IN_MASK_CREATE | IN_ISDIR | IN_ONESHOT)
 
 #endif	/* _LINUX_INOTIFY_H */
diff --git a/include/uapi/linux/inotify.h b/include/uapi/linux/inotify.h
index 4800bf2a531d..884b4846b630 100644
--- a/include/uapi/linux/inotify.h
+++ b/include/uapi/linux/inotify.h
@@ -53,6 +53,7 @@ struct inotify_event {
 #define IN_ONLYDIR		0x01000000	/* only watch the path if it is a directory */
 #define IN_DONT_FOLLOW		0x02000000	/* don't follow a sym link */
 #define IN_EXCL_UNLINK		0x04000000	/* exclude events on unlinked objects */
+#define IN_MASK_CREATE		0x10000000	/* only create watches */
 #define IN_MASK_ADD		0x20000000	/* add to the mask of an already existing watch */
 #define IN_ISDIR		0x40000000	/* event occurred against dir */
 #define IN_ONESHOT		0x80000000	/* only send event once */
-- 
cgit v1.2.3


From 5e85a04c8c0d271d7561a770b85741f186398868 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 27 Jun 2018 07:45:22 +0800
Subject: usb: typec: add fwnode to tcpc

Add fwnode handle to get the fwnode so we can get typec configs
it contains.

Suggested-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/tcpci.c | 7 +++++++
 drivers/usb/typec/tcpm.c      | 1 +
 include/linux/usb/tcpm.h      | 2 ++
 3 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/staging/typec/tcpci.c b/drivers/staging/typec/tcpci.c
index dd2928824ef6..e59547a32605 100644
--- a/drivers/staging/typec/tcpci.c
+++ b/drivers/staging/typec/tcpci.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/usb/pd.h>
 #include <linux/usb/tcpm.h>
@@ -474,6 +475,12 @@ static int tcpci_parse_config(struct tcpci *tcpci)
 
 	/* TODO: Populate struct tcpc_config from ACPI/device-tree */
 	tcpci->tcpc.config = &tcpci_tcpc_config;
+	tcpci->tcpc.fwnode = device_get_named_child_node(tcpci->dev,
+							 "connector");
+	if (!tcpci->tcpc.fwnode) {
+		dev_err(tcpci->dev, "Can't find connector node.\n");
+		return -EINVAL;
+	}
 
 	return 0;
 }
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index 8a201dd53d36..d22b37bf4e1a 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -4576,6 +4576,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 	else
 		port->try_role = TYPEC_NO_PREFERRED_ROLE;
 
+	port->typec_caps.fwnode = tcpc->fwnode;
 	port->typec_caps.prefer_role = tcpc->config->default_role;
 	port->typec_caps.type = tcpc->config->type;
 	port->typec_caps.data = tcpc->config->data;
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index b231b9314240..193920a2e05f 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -110,6 +110,7 @@ enum tcpc_mux_mode {
 /**
  * struct tcpc_dev - Port configuration and callback functions
  * @config:	Pointer to port configuration
+ * @fwnode:	Pointer to port fwnode
  * @get_vbus:	Called to read current VBUS state
  * @get_current_limit:
  *		Optional; called by the tcpm core when configured as a snk
@@ -138,6 +139,7 @@ enum tcpc_mux_mode {
  */
 struct tcpc_dev {
 	const struct tcpc_config *config;
+	struct fwnode_handle *fwnode;
 
 	int (*init)(struct tcpc_dev *dev);
 	int (*get_vbus)(struct tcpc_dev *dev);
-- 
cgit v1.2.3


From 9c90e02434b66f9bcfc5c9a91a808eee5b7aa21b Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 27 Jun 2018 07:45:23 +0800
Subject: usb: typec: add API to get typec basic port power and data config

This patch adds 3 APIs to get the typec port power and data type,
and preferred power role by its name string.

Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 58 +++++++++++++++++++++++++++++++++++++++++++----
 include/linux/usb/typec.h |  3 +++
 2 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 53df10df2f9d..633105917fa6 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -796,12 +796,18 @@ static const char * const typec_data_roles[] = {
 	[TYPEC_HOST]	= "host",
 };
 
-static const char * const typec_port_types[] = {
+static const char * const typec_port_power_roles[] = {
 	[TYPEC_PORT_SRC] = "source",
 	[TYPEC_PORT_SNK] = "sink",
 	[TYPEC_PORT_DRP] = "dual",
 };
 
+static const char * const typec_port_data_roles[] = {
+	[TYPEC_PORT_DFP] = "host",
+	[TYPEC_PORT_UFP] = "device",
+	[TYPEC_PORT_DRD] = "dual",
+};
+
 static const char * const typec_port_types_drp[] = {
 	[TYPEC_PORT_SRC] = "dual [source] sink",
 	[TYPEC_PORT_SNK] = "dual source [sink]",
@@ -932,7 +938,7 @@ static ssize_t power_role_store(struct device *dev,
 	mutex_lock(&port->port_type_lock);
 	if (port->port_type != TYPEC_PORT_DRP) {
 		dev_dbg(dev, "port type fixed at \"%s\"",
-			     typec_port_types[port->port_type]);
+			     typec_port_power_roles[port->port_type]);
 		ret = -EOPNOTSUPP;
 		goto unlock_and_ret;
 	}
@@ -973,7 +979,7 @@ port_type_store(struct device *dev, struct device_attribute *attr,
 		return -EOPNOTSUPP;
 	}
 
-	ret = sysfs_match_string(typec_port_types, buf);
+	ret = sysfs_match_string(typec_port_power_roles, buf);
 	if (ret < 0)
 		return ret;
 
@@ -1007,7 +1013,7 @@ port_type_show(struct device *dev, struct device_attribute *attr,
 		return sprintf(buf, "%s\n",
 			       typec_port_types_drp[port->port_type]);
 
-	return sprintf(buf, "[%s]\n", typec_port_types[port->cap->type]);
+	return sprintf(buf, "[%s]\n", typec_port_power_roles[port->cap->type]);
 }
 static DEVICE_ATTR_RW(port_type);
 
@@ -1252,6 +1258,50 @@ void typec_set_pwr_opmode(struct typec_port *port,
 }
 EXPORT_SYMBOL_GPL(typec_set_pwr_opmode);
 
+/**
+ * typec_find_port_power_role - Get the typec port power capability
+ * @name: port power capability string
+ *
+ * This routine is used to find the typec_port_type by its string name.
+ *
+ * Returns typec_port_type if success, otherwise negative error code.
+ */
+int typec_find_port_power_role(const char *name)
+{
+	return match_string(typec_port_power_roles,
+			    ARRAY_SIZE(typec_port_power_roles), name);
+}
+EXPORT_SYMBOL_GPL(typec_find_port_power_role);
+
+/**
+ * typec_find_power_role - Find the typec one specific power role
+ * @name: power role string
+ *
+ * This routine is used to find the typec_role by its string name.
+ *
+ * Returns typec_role if success, otherwise negative error code.
+ */
+int typec_find_power_role(const char *name)
+{
+	return match_string(typec_roles, ARRAY_SIZE(typec_roles), name);
+}
+EXPORT_SYMBOL_GPL(typec_find_power_role);
+
+/**
+ * typec_find_port_data_role - Get the typec port data capability
+ * @name: port data capability string
+ *
+ * This routine is used to find the typec_port_data by its string name.
+ *
+ * Returns typec_port_data if success, otherwise negative error code.
+ */
+int typec_find_port_data_role(const char *name)
+{
+	return match_string(typec_port_data_roles,
+			    ARRAY_SIZE(typec_port_data_roles), name);
+}
+EXPORT_SYMBOL_GPL(typec_find_port_data_role);
+
 /* ------------------------------------------ */
 /* API for Multiplexer/DeMultiplexer Switches */
 
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 672b39bb0adc..15f8d9a50b31 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -267,4 +267,7 @@ int typec_set_orientation(struct typec_port *port,
 			  enum typec_orientation orientation);
 int typec_set_mode(struct typec_port *port, int mode);
 
+int typec_find_port_power_role(const char *name);
+int typec_find_power_role(const char *name);
+int typec_find_port_data_role(const char *name);
 #endif /* __LINUX_USB_TYPEC_H */
-- 
cgit v1.2.3


From 82f5d7749fa4f3851f705c75b33babf68edb90b8 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:46 +0300
Subject: usb: pd: include kernel.h

This makes life a bit easier for the drivers that include
pd.h. All pd_header_*_le() inline functions defined in pd.h
call le16_to_cpu(), and all *_LE() macros in pd.h call
cpu_to_le16().

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/pd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index 09b570feb297..f2162e0fe531 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -15,6 +15,7 @@
 #ifndef __LINUX_USB_PD_H
 #define __LINUX_USB_PD_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/usb/typec.h>
 
-- 
cgit v1.2.3


From aaf3f4e925dc2bdc4715142103660285632a245c Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:47 +0300
Subject: usb: typec: function for checking cable plug orientation

This adds function typec_get_orientation() that can be used
for checking the current cable plug orientation.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 12 ++++++++++++
 include/linux/usb/typec.h |  1 +
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 633105917fa6..3ef7b99b080f 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1329,6 +1329,18 @@ int typec_set_orientation(struct typec_port *port,
 }
 EXPORT_SYMBOL_GPL(typec_set_orientation);
 
+/**
+ * typec_get_orientation - Get USB Type-C cable plug orientation
+ * @port: USB Type-C Port
+ *
+ * Get current cable plug orientation for @port.
+ */
+enum typec_orientation typec_get_orientation(struct typec_port *port)
+{
+	return port->orientation;
+}
+EXPORT_SYMBOL_GPL(typec_get_orientation);
+
 /**
  * typec_set_mode - Set mode of operation for USB Type-C connector
  * @port: USB Type-C port for the connector
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 15f8d9a50b31..6c33b53d1162 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -265,6 +265,7 @@ void typec_set_pwr_opmode(struct typec_port *port, enum typec_pwr_opmode mode);
 
 int typec_set_orientation(struct typec_port *port,
 			  enum typec_orientation orientation);
+enum typec_orientation typec_get_orientation(struct typec_port *port);
 int typec_set_mode(struct typec_port *port, int mode);
 
 int typec_find_port_power_role(const char *name);
-- 
cgit v1.2.3


From 2b6a440351436d792b1960822da4b7d6e673f568 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 1 Jun 2018 10:22:52 +0200
Subject: gnss: add GNSS receiver subsystem

Add a new subsystem for GNSS (e.g. GPS) receivers.

While GNSS receivers are typically accessed using a UART interface they
often also support other I/O interfaces such as I2C, SPI and USB, while
yet other devices use iomem or even some form of remote-processor
messaging (rpmsg).

The new GNSS subsystem abstracts the underlying interface and provides a
new "gnss" class type, which exposes a character-device interface (e.g.
/dev/gnss0) to user space. This allows GNSS receivers to have a
representation in the Linux device model, something which is important
not least for power management purposes.

Note that the character-device interface provides raw access to whatever
protocol the receiver is (currently) using, such as NMEA 0183, UBX or
SiRF Binary. These protocols are expected to be continued to be handled
by user space for the time being, even if some hybrid solutions are also
conceivable (e.g. to have kernel drivers issue management commands).

This will still allow for better platform integration by allowing GNSS
devices and their resources (e.g. regulators and enable-gpios) to be
described by firmware and managed by kernel drivers rather than
platform-specific scripts and services.

While the current interface is kept minimal, it could be extended using
IOCTLs, sysfs or uevents as needs and proper abstraction levels are
identified and determined (e.g. for device and feature identification).

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS           |   6 +
 drivers/Kconfig       |   2 +
 drivers/Makefile      |   1 +
 drivers/gnss/Kconfig  |  11 ++
 drivers/gnss/Makefile |   7 +
 drivers/gnss/core.c   | 371 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/gnss.h  |  66 +++++++++
 7 files changed, 464 insertions(+)
 create mode 100644 drivers/gnss/Kconfig
 create mode 100644 drivers/gnss/Makefile
 create mode 100644 drivers/gnss/core.c
 create mode 100644 include/linux/gnss.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 6cfd16790add..436e6dcabf34 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6038,6 +6038,12 @@ F:	Documentation/isdn/README.gigaset
 F:	drivers/isdn/gigaset/
 F:	include/uapi/linux/gigaset_dev.h
 
+GNSS SUBSYSTEM
+M:	Johan Hovold <johan@kernel.org>
+S:	Maintained
+F:	drivers/gnss/
+F:	include/linux/gnss.h
+
 GO7007 MPEG CODEC
 M:	Hans Verkuil <hans.verkuil@cisco.com>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 95b9ccc08165..ab4d43923c4d 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -9,6 +9,8 @@ source "drivers/bus/Kconfig"
 
 source "drivers/connector/Kconfig"
 
+source "drivers/gnss/Kconfig"
+
 source "drivers/mtd/Kconfig"
 
 source "drivers/of/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 24cd47014657..cc9a7c5f7d2c 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -185,3 +185,4 @@ obj-$(CONFIG_TEE)		+= tee/
 obj-$(CONFIG_MULTIPLEXER)	+= mux/
 obj-$(CONFIG_UNISYS_VISORBUS)	+= visorbus/
 obj-$(CONFIG_SIOX)		+= siox/
+obj-$(CONFIG_GNSS)		+= gnss/
diff --git a/drivers/gnss/Kconfig b/drivers/gnss/Kconfig
new file mode 100644
index 000000000000..103fcc70992e
--- /dev/null
+++ b/drivers/gnss/Kconfig
@@ -0,0 +1,11 @@
+#
+# GNSS receiver configuration
+#
+
+menuconfig GNSS
+	tristate "GNSS receiver support"
+	---help---
+	  Say Y here if you have a GNSS receiver (e.g. a GPS receiver).
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called gnss.
diff --git a/drivers/gnss/Makefile b/drivers/gnss/Makefile
new file mode 100644
index 000000000000..1f7a7baab1d9
--- /dev/null
+++ b/drivers/gnss/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the GNSS subsystem.
+#
+
+obj-$(CONFIG_GNSS)			+= gnss.o
+gnss-y := core.o
diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c
new file mode 100644
index 000000000000..307894ca2725
--- /dev/null
+++ b/drivers/gnss/core.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GNSS receiver core
+ *
+ * Copyright (C) 2018 Johan Hovold <johan@kernel.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/gnss.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+
+#define GNSS_FLAG_HAS_WRITE_RAW		BIT(0)
+
+#define GNSS_MINORS	16
+
+static DEFINE_IDA(gnss_minors);
+static dev_t gnss_first;
+
+/* FIFO size must be a power of two */
+#define GNSS_READ_FIFO_SIZE	4096
+#define GNSS_WRITE_BUF_SIZE	1024
+
+#define to_gnss_device(d) container_of((d), struct gnss_device, dev)
+
+static int gnss_open(struct inode *inode, struct file *file)
+{
+	struct gnss_device *gdev;
+	int ret = 0;
+
+	gdev = container_of(inode->i_cdev, struct gnss_device, cdev);
+
+	get_device(&gdev->dev);
+
+	nonseekable_open(inode, file);
+	file->private_data = gdev;
+
+	down_write(&gdev->rwsem);
+	if (gdev->disconnected) {
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	if (gdev->count++ == 0) {
+		ret = gdev->ops->open(gdev);
+		if (ret)
+			gdev->count--;
+	}
+unlock:
+	up_write(&gdev->rwsem);
+
+	if (ret)
+		put_device(&gdev->dev);
+
+	return ret;
+}
+
+static int gnss_release(struct inode *inode, struct file *file)
+{
+	struct gnss_device *gdev = file->private_data;
+
+	down_write(&gdev->rwsem);
+	if (gdev->disconnected)
+		goto unlock;
+
+	if (--gdev->count == 0) {
+		gdev->ops->close(gdev);
+		kfifo_reset(&gdev->read_fifo);
+	}
+unlock:
+	up_write(&gdev->rwsem);
+
+	put_device(&gdev->dev);
+
+	return 0;
+}
+
+static ssize_t gnss_read(struct file *file, char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct gnss_device *gdev = file->private_data;
+	unsigned int copied;
+	int ret;
+
+	mutex_lock(&gdev->read_mutex);
+	while (kfifo_is_empty(&gdev->read_fifo)) {
+		mutex_unlock(&gdev->read_mutex);
+
+		if (gdev->disconnected)
+			return 0;
+
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		ret = wait_event_interruptible(gdev->read_queue,
+				gdev->disconnected ||
+				!kfifo_is_empty(&gdev->read_fifo));
+		if (ret)
+			return -ERESTARTSYS;
+
+		mutex_lock(&gdev->read_mutex);
+	}
+
+	ret = kfifo_to_user(&gdev->read_fifo, buf, count, &copied);
+	if (ret == 0)
+		ret = copied;
+
+	mutex_unlock(&gdev->read_mutex);
+
+	return ret;
+}
+
+static ssize_t gnss_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct gnss_device *gdev = file->private_data;
+	size_t written = 0;
+	int ret;
+
+	if (gdev->disconnected)
+		return -EIO;
+
+	if (!count)
+		return 0;
+
+	if (!(gdev->flags & GNSS_FLAG_HAS_WRITE_RAW))
+		return -EIO;
+
+	/* Ignoring O_NONBLOCK, write_raw() is synchronous. */
+
+	ret = mutex_lock_interruptible(&gdev->write_mutex);
+	if (ret)
+		return -ERESTARTSYS;
+
+	for (;;) {
+		size_t n = count - written;
+
+		if (n > GNSS_WRITE_BUF_SIZE)
+			n = GNSS_WRITE_BUF_SIZE;
+
+		if (copy_from_user(gdev->write_buf, buf, n)) {
+			ret = -EFAULT;
+			goto out_unlock;
+		}
+
+		/*
+		 * Assumes write_raw can always accept GNSS_WRITE_BUF_SIZE
+		 * bytes.
+		 *
+		 * FIXME: revisit
+		 */
+		down_read(&gdev->rwsem);
+		if (!gdev->disconnected)
+			ret = gdev->ops->write_raw(gdev, gdev->write_buf, n);
+		else
+			ret = -EIO;
+		up_read(&gdev->rwsem);
+
+		if (ret < 0)
+			break;
+
+		written += ret;
+		buf += ret;
+
+		if (written == count)
+			break;
+	}
+
+	if (written)
+		ret = written;
+out_unlock:
+	mutex_unlock(&gdev->write_mutex);
+
+	return ret;
+}
+
+static __poll_t gnss_poll(struct file *file, poll_table *wait)
+{
+	struct gnss_device *gdev = file->private_data;
+	__poll_t mask = 0;
+
+	poll_wait(file, &gdev->read_queue, wait);
+
+	if (!kfifo_is_empty(&gdev->read_fifo))
+		mask |= EPOLLIN | EPOLLRDNORM;
+	if (gdev->disconnected)
+		mask |= EPOLLHUP;
+
+	return mask;
+}
+
+static const struct file_operations gnss_fops = {
+	.owner		= THIS_MODULE,
+	.open		= gnss_open,
+	.release	= gnss_release,
+	.read		= gnss_read,
+	.write		= gnss_write,
+	.poll		= gnss_poll,
+	.llseek		= no_llseek,
+};
+
+static struct class *gnss_class;
+
+static void gnss_device_release(struct device *dev)
+{
+	struct gnss_device *gdev = to_gnss_device(dev);
+
+	kfree(gdev->write_buf);
+	kfifo_free(&gdev->read_fifo);
+	ida_simple_remove(&gnss_minors, gdev->id);
+	kfree(gdev);
+}
+
+struct gnss_device *gnss_allocate_device(struct device *parent)
+{
+	struct gnss_device *gdev;
+	struct device *dev;
+	int id;
+	int ret;
+
+	gdev = kzalloc(sizeof(*gdev), GFP_KERNEL);
+	if (!gdev)
+		return NULL;
+
+	id = ida_simple_get(&gnss_minors, 0, GNSS_MINORS, GFP_KERNEL);
+	if (id < 0) {
+		kfree(gdev);
+		return ERR_PTR(id);
+	}
+
+	gdev->id = id;
+
+	dev = &gdev->dev;
+	device_initialize(dev);
+	dev->devt = gnss_first + id;
+	dev->class = gnss_class;
+	dev->parent = parent;
+	dev->release = gnss_device_release;
+	dev_set_drvdata(dev, gdev);
+	dev_set_name(dev, "gnss%d", id);
+
+	init_rwsem(&gdev->rwsem);
+	mutex_init(&gdev->read_mutex);
+	mutex_init(&gdev->write_mutex);
+	init_waitqueue_head(&gdev->read_queue);
+
+	ret = kfifo_alloc(&gdev->read_fifo, GNSS_READ_FIFO_SIZE, GFP_KERNEL);
+	if (ret)
+		goto err_put_device;
+
+	gdev->write_buf = kzalloc(GNSS_WRITE_BUF_SIZE, GFP_KERNEL);
+	if (!gdev->write_buf)
+		goto err_put_device;
+
+	cdev_init(&gdev->cdev, &gnss_fops);
+	gdev->cdev.owner = THIS_MODULE;
+
+	return gdev;
+
+err_put_device:
+	put_device(dev);
+
+	return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL_GPL(gnss_allocate_device);
+
+void gnss_put_device(struct gnss_device *gdev)
+{
+	put_device(&gdev->dev);
+}
+EXPORT_SYMBOL_GPL(gnss_put_device);
+
+int gnss_register_device(struct gnss_device *gdev)
+{
+	int ret;
+
+	/* Set a flag which can be accessed without holding the rwsem. */
+	if (gdev->ops->write_raw != NULL)
+		gdev->flags |= GNSS_FLAG_HAS_WRITE_RAW;
+
+	ret = cdev_device_add(&gdev->cdev, &gdev->dev);
+	if (ret) {
+		dev_err(&gdev->dev, "failed to add device: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gnss_register_device);
+
+void gnss_deregister_device(struct gnss_device *gdev)
+{
+	down_write(&gdev->rwsem);
+	gdev->disconnected = true;
+	if (gdev->count) {
+		wake_up_interruptible(&gdev->read_queue);
+		gdev->ops->close(gdev);
+	}
+	up_write(&gdev->rwsem);
+
+	cdev_device_del(&gdev->cdev, &gdev->dev);
+}
+EXPORT_SYMBOL_GPL(gnss_deregister_device);
+
+/*
+ * Caller guarantees serialisation.
+ *
+ * Must not be called for a closed device.
+ */
+int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf,
+				size_t count)
+{
+	int ret;
+
+	ret = kfifo_in(&gdev->read_fifo, buf, count);
+
+	wake_up_interruptible(&gdev->read_queue);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gnss_insert_raw);
+
+static int __init gnss_module_init(void)
+{
+	int ret;
+
+	ret = alloc_chrdev_region(&gnss_first, 0, GNSS_MINORS, "gnss");
+	if (ret < 0) {
+		pr_err("failed to allocate device numbers: %d\n", ret);
+		return ret;
+	}
+
+	gnss_class = class_create(THIS_MODULE, "gnss");
+	if (IS_ERR(gnss_class)) {
+		ret = PTR_ERR(gnss_class);
+		pr_err("failed to create class: %d\n", ret);
+		goto err_unregister_chrdev;
+	}
+
+	pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first));
+
+	return 0;
+
+err_unregister_chrdev:
+	unregister_chrdev_region(gnss_first, GNSS_MINORS);
+
+	return ret;
+}
+module_init(gnss_module_init);
+
+static void __exit gnss_module_exit(void)
+{
+	class_destroy(gnss_class);
+	unregister_chrdev_region(gnss_first, GNSS_MINORS);
+	ida_destroy(&gnss_minors);
+}
+module_exit(gnss_module_exit);
+
+MODULE_AUTHOR("Johan Hovold <johan@kernel.org>");
+MODULE_DESCRIPTION("GNSS receiver core");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/gnss.h b/include/linux/gnss.h
new file mode 100644
index 000000000000..e26aeac1e0e2
--- /dev/null
+++ b/include/linux/gnss.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GNSS receiver support
+ *
+ * Copyright (C) 2018 Johan Hovold <johan@kernel.org>
+ */
+
+#ifndef _LINUX_GNSS_H
+#define _LINUX_GNSS_H
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/kfifo.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+struct gnss_device;
+
+struct gnss_operations {
+	int (*open)(struct gnss_device *gdev);
+	void (*close)(struct gnss_device *gdev);
+	int (*write_raw)(struct gnss_device *gdev, const unsigned char *buf,
+				size_t count);
+};
+
+struct gnss_device {
+	struct device dev;
+	struct cdev cdev;
+	int id;
+
+	unsigned long flags;
+
+	struct rw_semaphore rwsem;
+	const struct gnss_operations *ops;
+	unsigned int count;
+	unsigned int disconnected:1;
+
+	struct mutex read_mutex;
+	struct kfifo read_fifo;
+	wait_queue_head_t read_queue;
+
+	struct mutex write_mutex;
+	char *write_buf;
+};
+
+struct gnss_device *gnss_allocate_device(struct device *parent);
+void gnss_put_device(struct gnss_device *gdev);
+int gnss_register_device(struct gnss_device *gdev);
+void gnss_deregister_device(struct gnss_device *gdev);
+
+int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf,
+			size_t count);
+
+static inline void gnss_set_drvdata(struct gnss_device *gdev, void *data)
+{
+	dev_set_drvdata(&gdev->dev, data);
+}
+
+static inline void *gnss_get_drvdata(struct gnss_device *gdev)
+{
+	return dev_get_drvdata(&gdev->dev);
+}
+
+#endif /* _LINUX_GNSS_H */
-- 
cgit v1.2.3


From 10f146639fee5ffaf7cf0081c1af518f7d0c533c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 1 Jun 2018 10:22:59 +0200
Subject: gnss: add receiver type support

Add a "type" device attribute and a "GNSS_TYPE" uevent variable which
can be used to determine the type of a GNSS receiver. The currently
identified types reflect the protocol(s) supported by a receiver:

	"NMEA"	NMEA 0183
	"SiRF"	SiRF Binary
	"UBX"	UBX

Note that both SiRF and UBX type receivers typically support a subset of
NMEA 0183 with vendor extensions (e.g. to allow switching to the vendor
protocol).

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-gnss | 15 +++++++++
 MAINTAINERS                                |  1 +
 drivers/gnss/core.c                        | 49 ++++++++++++++++++++++++++++++
 drivers/gnss/sirf.c                        |  1 +
 drivers/gnss/ubx.c                         |  2 ++
 include/linux/gnss.h                       |  9 ++++++
 6 files changed, 77 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-gnss

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-gnss b/Documentation/ABI/testing/sysfs-class-gnss
new file mode 100644
index 000000000000..2467b6900eae
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-gnss
@@ -0,0 +1,15 @@
+What:		/sys/class/gnss/gnssN/type
+Date:		May 2018
+KernelVersion:	4.18
+Contact:	Johan Hovold <johan@kernel.org>
+Description:
+		The GNSS receiver type. The currently identified types reflect
+		the protocol(s) supported by the receiver:
+
+			"NMEA"		NMEA 0183
+			"SiRF"		SiRF Binary
+			"UBX"		UBX
+
+		Note that also non-"NMEA" type receivers typically support a
+		subset of NMEA 0183 with vendor extensions (e.g. to allow
+		switching to a vendor protocol).
diff --git a/MAINTAINERS b/MAINTAINERS
index f980c6186094..e01d220a6f05 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6041,6 +6041,7 @@ F:	include/uapi/linux/gigaset_dev.h
 GNSS SUBSYSTEM
 M:	Johan Hovold <johan@kernel.org>
 S:	Maintained
+F:	Documentation/ABI/testing/sysfs-class-gnss
 F:	Documentation/devicetree/bindings/gnss/
 F:	drivers/gnss/
 F:	include/linux/gnss.h
diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c
index 307894ca2725..f30ef8338b3a 100644
--- a/drivers/gnss/core.c
+++ b/drivers/gnss/core.c
@@ -330,6 +330,52 @@ int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf,
 }
 EXPORT_SYMBOL_GPL(gnss_insert_raw);
 
+static const char * const gnss_type_names[GNSS_TYPE_COUNT] = {
+	[GNSS_TYPE_NMEA]	= "NMEA",
+	[GNSS_TYPE_SIRF]	= "SiRF",
+	[GNSS_TYPE_UBX]		= "UBX",
+};
+
+static const char *gnss_type_name(struct gnss_device *gdev)
+{
+	const char *name = NULL;
+
+	if (gdev->type < GNSS_TYPE_COUNT)
+		name = gnss_type_names[gdev->type];
+
+	if (!name)
+		dev_WARN(&gdev->dev, "type name not defined\n");
+
+	return name;
+}
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct gnss_device *gdev = to_gnss_device(dev);
+
+	return sprintf(buf, "%s\n", gnss_type_name(gdev));
+}
+static DEVICE_ATTR_RO(type);
+
+static struct attribute *gnss_attrs[] = {
+	&dev_attr_type.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(gnss);
+
+static int gnss_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct gnss_device *gdev = to_gnss_device(dev);
+	int ret;
+
+	ret = add_uevent_var(env, "GNSS_TYPE=%s", gnss_type_name(gdev));
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int __init gnss_module_init(void)
 {
 	int ret;
@@ -347,6 +393,9 @@ static int __init gnss_module_init(void)
 		goto err_unregister_chrdev;
 	}
 
+	gnss_class->dev_groups = gnss_groups;
+	gnss_class->dev_uevent = gnss_uevent;
+
 	pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first));
 
 	return 0;
diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c
index 5fb0f730db48..79cb98950013 100644
--- a/drivers/gnss/sirf.c
+++ b/drivers/gnss/sirf.c
@@ -267,6 +267,7 @@ static int sirf_probe(struct serdev_device *serdev)
 	if (!gdev)
 		return -ENOMEM;
 
+	gdev->type = GNSS_TYPE_SIRF;
 	gdev->ops = &sirf_gnss_ops;
 	gnss_set_drvdata(gdev, data);
 
diff --git a/drivers/gnss/ubx.c b/drivers/gnss/ubx.c
index ecddfb362a6f..902b6854b7db 100644
--- a/drivers/gnss/ubx.c
+++ b/drivers/gnss/ubx.c
@@ -77,6 +77,8 @@ static int ubx_probe(struct serdev_device *serdev)
 
 	gserial->ops = &ubx_gserial_ops;
 
+	gserial->gdev->type = GNSS_TYPE_UBX;
+
 	data = gnss_serial_get_drvdata(gserial);
 
 	data->vcc = devm_regulator_get(&serdev->dev, "vcc");
diff --git a/include/linux/gnss.h b/include/linux/gnss.h
index e26aeac1e0e2..43546977098c 100644
--- a/include/linux/gnss.h
+++ b/include/linux/gnss.h
@@ -18,6 +18,14 @@
 
 struct gnss_device;
 
+enum gnss_type {
+	GNSS_TYPE_NMEA = 0,
+	GNSS_TYPE_SIRF,
+	GNSS_TYPE_UBX,
+
+	GNSS_TYPE_COUNT
+};
+
 struct gnss_operations {
 	int (*open)(struct gnss_device *gdev);
 	void (*close)(struct gnss_device *gdev);
@@ -30,6 +38,7 @@ struct gnss_device {
 	struct cdev cdev;
 	int id;
 
+	enum gnss_type type;
 	unsigned long flags;
 
 	struct rw_semaphore rwsem;
-- 
cgit v1.2.3


From 5fd691afdf929061c391d897fa627822c3b2fd5a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 5 Jun 2018 16:53:34 +0200
Subject: atomic/tty: Fix up atomic abuse in ldsem

Mark found ldsem_cmpxchg() needed an (atomic_long_t *) cast to keep
working after making the atomic_long interface type safe.

Needing casts is bad form, which made me look at the code. There are no
ld_semaphore::count users outside of these functions so there is no
reason why it can not be an atomic_long_t in the first place, obviating
the need for this cast.

That also ensures the loads use atomic_long_read(), which implies (at
least) READ_ONCE() in order to guarantee single-copy-atomic loads.

When using atomic_long_try_cmpxchg() the ldsem_cmpxchg() wrapper gets
very thin (the only difference is not changing *old on success, which
most callers don't seem to care about).

So rework the whole thing to use atomic_long_t and its accessors
directly.

While there, fixup all the horrible comment styles.

Cc: Peter Hurley <peter@hurleysoftware.com>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldsem.c   | 82 ++++++++++++++++++++---------------------------
 include/linux/tty_ldisc.h |  4 +--
 2 files changed, 37 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 37a91b3df980..0c98d88f795a 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -74,28 +74,6 @@ struct ldsem_waiter {
 	struct task_struct *task;
 };
 
-static inline long ldsem_atomic_update(long delta, struct ld_semaphore *sem)
-{
-	return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
-}
-
-/*
- * ldsem_cmpxchg() updates @*old with the last-known sem->count value.
- * Returns 1 if count was successfully changed; @*old will have @new value.
- * Returns 0 if count was not changed; @*old will have most recent sem->count
- */
-static inline int ldsem_cmpxchg(long *old, long new, struct ld_semaphore *sem)
-{
-	long tmp = atomic_long_cmpxchg(&sem->count, *old, new);
-	if (tmp == *old) {
-		*old = new;
-		return 1;
-	} else {
-		*old = tmp;
-		return 0;
-	}
-}
-
 /*
  * Initialize an ldsem:
  */
@@ -109,7 +87,7 @@ void __init_ldsem(struct ld_semaphore *sem, const char *name,
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
-	sem->count = LDSEM_UNLOCKED;
+	atomic_long_set(&sem->count, LDSEM_UNLOCKED);
 	sem->wait_readers = 0;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->read_wait);
@@ -122,16 +100,17 @@ static void __ldsem_wake_readers(struct ld_semaphore *sem)
 	struct task_struct *tsk;
 	long adjust, count;
 
-	/* Try to grant read locks to all readers on the read wait list.
+	/*
+	 * Try to grant read locks to all readers on the read wait list.
 	 * Note the 'active part' of the count is incremented by
 	 * the number of readers before waking any processes up.
 	 */
 	adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS);
-	count = ldsem_atomic_update(adjust, sem);
+	count = atomic_long_add_return(adjust, &sem->count);
 	do {
 		if (count > 0)
 			break;
-		if (ldsem_cmpxchg(&count, count - adjust, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust))
 			return;
 	} while (1);
 
@@ -148,14 +127,15 @@ static void __ldsem_wake_readers(struct ld_semaphore *sem)
 
 static inline int writer_trylock(struct ld_semaphore *sem)
 {
-	/* only wake this writer if the active part of the count can be
+	/*
+	 * Only wake this writer if the active part of the count can be
 	 * transitioned from 0 -> 1
 	 */
-	long count = ldsem_atomic_update(LDSEM_ACTIVE_BIAS, sem);
+	long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count);
 	do {
 		if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS)
 			return 1;
-		if (ldsem_cmpxchg(&count, count - LDSEM_ACTIVE_BIAS, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS))
 			return 0;
 	} while (1);
 }
@@ -205,12 +185,16 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout)
 	/* set up my own style of waitqueue */
 	raw_spin_lock_irq(&sem->wait_lock);
 
-	/* Try to reverse the lock attempt but if the count has changed
+	/*
+	 * Try to reverse the lock attempt but if the count has changed
 	 * so that reversing fails, check if there are are no waiters,
-	 * and early-out if not */
+	 * and early-out if not
+	 */
 	do {
-		if (ldsem_cmpxchg(&count, count + adjust, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) {
+			count += adjust;
 			break;
+		}
 		if (count > 0) {
 			raw_spin_unlock_irq(&sem->wait_lock);
 			return sem;
@@ -243,12 +227,14 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout)
 	__set_current_state(TASK_RUNNING);
 
 	if (!timeout) {
-		/* lock timed out but check if this task was just
+		/*
+		 * Lock timed out but check if this task was just
 		 * granted lock ownership - if so, pretend there
-		 * was no timeout; otherwise, cleanup lock wait */
+		 * was no timeout; otherwise, cleanup lock wait.
+		 */
 		raw_spin_lock_irq(&sem->wait_lock);
 		if (waiter.task) {
-			ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem);
+			atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count);
 			list_del(&waiter.list);
 			raw_spin_unlock_irq(&sem->wait_lock);
 			put_task_struct(waiter.task);
@@ -273,11 +259,13 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout)
 	/* set up my own style of waitqueue */
 	raw_spin_lock_irq(&sem->wait_lock);
 
-	/* Try to reverse the lock attempt but if the count has changed
+	/*
+	 * Try to reverse the lock attempt but if the count has changed
 	 * so that reversing fails, check if the lock is now owned,
-	 * and early-out if so */
+	 * and early-out if so.
+	 */
 	do {
-		if (ldsem_cmpxchg(&count, count + adjust, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust))
 			break;
 		if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) {
 			raw_spin_unlock_irq(&sem->wait_lock);
@@ -303,7 +291,7 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout)
 	}
 
 	if (!locked)
-		ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem);
+		atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count);
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
 
@@ -324,7 +312,7 @@ static int __ldsem_down_read_nested(struct ld_semaphore *sem,
 
 	lockdep_acquire_read(sem, subclass, 0, _RET_IP_);
 
-	count = ldsem_atomic_update(LDSEM_READ_BIAS, sem);
+	count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count);
 	if (count <= 0) {
 		lock_stat(sem, contended);
 		if (!down_read_failed(sem, count, timeout)) {
@@ -343,7 +331,7 @@ static int __ldsem_down_write_nested(struct ld_semaphore *sem,
 
 	lockdep_acquire(sem, subclass, 0, _RET_IP_);
 
-	count = ldsem_atomic_update(LDSEM_WRITE_BIAS, sem);
+	count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count);
 	if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) {
 		lock_stat(sem, contended);
 		if (!down_write_failed(sem, count, timeout)) {
@@ -370,10 +358,10 @@ int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout)
  */
 int ldsem_down_read_trylock(struct ld_semaphore *sem)
 {
-	long count = sem->count;
+	long count = atomic_long_read(&sem->count);
 
 	while (count >= 0) {
-		if (ldsem_cmpxchg(&count, count + LDSEM_READ_BIAS, sem)) {
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) {
 			lockdep_acquire_read(sem, 0, 1, _RET_IP_);
 			lock_stat(sem, acquired);
 			return 1;
@@ -396,10 +384,10 @@ int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout)
  */
 int ldsem_down_write_trylock(struct ld_semaphore *sem)
 {
-	long count = sem->count;
+	long count = atomic_long_read(&sem->count);
 
 	while ((count & LDSEM_ACTIVE_MASK) == 0) {
-		if (ldsem_cmpxchg(&count, count + LDSEM_WRITE_BIAS, sem)) {
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) {
 			lockdep_acquire(sem, 0, 1, _RET_IP_);
 			lock_stat(sem, acquired);
 			return 1;
@@ -417,7 +405,7 @@ void ldsem_up_read(struct ld_semaphore *sem)
 
 	lockdep_release(sem, 1, _RET_IP_);
 
-	count = ldsem_atomic_update(-LDSEM_READ_BIAS, sem);
+	count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count);
 	if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0)
 		ldsem_wake(sem);
 }
@@ -431,7 +419,7 @@ void ldsem_up_write(struct ld_semaphore *sem)
 
 	lockdep_release(sem, 1, _RET_IP_);
 
-	count = ldsem_atomic_update(-LDSEM_WRITE_BIAS, sem);
+	count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count);
 	if (count < 0)
 		ldsem_wake(sem);
 }
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 1ef64d4ad887..840894ca3fc0 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -119,13 +119,13 @@
 
 #include <linux/fs.h>
 #include <linux/wait.h>
-
+#include <linux/atomic.h>
 
 /*
  * the semaphore definition
  */
 struct ld_semaphore {
-	long			count;
+	atomic_long_t		count;
 	raw_spinlock_t		wait_lock;
 	unsigned int		wait_readers;
 	struct list_head	read_wait;
-- 
cgit v1.2.3


From d8ae7242718738ee1bf9bfdd632d2a4b150fdd26 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 26 Jun 2018 23:56:40 -0400
Subject: vt: preserve unicode values corresponding to screen characters

The vt code translates UTF-8 strings into glyph index values and stores
those glyph values directly in the screen buffer. Because there can only
be at most 512 glyphs, it is impossible to represent most unicode
characters, in which case a default glyph (often '?') is displayed
instead. The original unicode value is then lost.

This patch implements the basic screen buffer handling to preserve unicode
values alongside corresponding display glyphs.  It is not activated by
default, meaning that people not relying on that functionality won't get
the implied overhead.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Dave Mielke <Dave@mielke.cc>
Acked-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c            | 220 ++++++++++++++++++++++++++++++++++++++---
 include/linux/console_struct.h |   2 +
 2 files changed, 211 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 1eb1a376a041..7b636638b36d 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -317,6 +317,171 @@ void schedule_console_callback(void)
 	schedule_work(&console_work);
 }
 
+/*
+ * Code to manage unicode-based screen buffers
+ */
+
+#ifdef NO_VC_UNI_SCREEN
+/* this disables and optimizes related code away at compile time */
+#define get_vc_uniscr(vc) NULL
+#else
+#define get_vc_uniscr(vc) vc->vc_uni_screen
+#endif
+
+typedef uint32_t char32_t;
+
+/*
+ * Our screen buffer is preceded by an array of line pointers so that
+ * scrolling only implies some pointer shuffling.
+ */
+struct uni_screen {
+	char32_t *lines[0];
+};
+
+static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows)
+{
+	struct uni_screen *uniscr;
+	void *p;
+	unsigned int memsize, i;
+
+	/* allocate everything in one go */
+	memsize = cols * rows * sizeof(char32_t);
+	memsize += rows * sizeof(char32_t *);
+	p = kmalloc(memsize, GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	/* initial line pointers */
+	uniscr = p;
+	p = uniscr->lines + rows;
+	for (i = 0; i < rows; i++) {
+		uniscr->lines[i] = p;
+		p += cols * sizeof(char32_t);
+	}
+	return uniscr;
+}
+
+static void vc_uniscr_set(struct vc_data *vc, struct uni_screen *new_uniscr)
+{
+	kfree(vc->vc_uni_screen);
+	vc->vc_uni_screen = new_uniscr;
+}
+
+static void vc_uniscr_putc(struct vc_data *vc, char32_t uc)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr)
+		uniscr->lines[vc->vc_y][vc->vc_x] = uc;
+}
+
+static void vc_uniscr_insert(struct vc_data *vc, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		char32_t *ln = uniscr->lines[vc->vc_y];
+		unsigned int x = vc->vc_x, cols = vc->vc_cols;
+
+		memmove(&ln[x + nr], &ln[x], (cols - x - nr) * sizeof(*ln));
+		memset32(&ln[x], ' ', nr);
+	}
+}
+
+static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		char32_t *ln = uniscr->lines[vc->vc_y];
+		unsigned int x = vc->vc_x, cols = vc->vc_cols;
+
+		memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln));
+		memset32(&ln[cols - nr], ' ', nr);
+	}
+}
+
+static void vc_uniscr_clear_line(struct vc_data *vc, unsigned int x,
+				 unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		char32_t *ln = uniscr->lines[vc->vc_y];
+
+		memset32(&ln[x], ' ', nr);
+	}
+}
+
+static void vc_uniscr_clear_lines(struct vc_data *vc, unsigned int y,
+				  unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		unsigned int cols = vc->vc_cols;
+
+		while (nr--)
+			memset32(uniscr->lines[y++], ' ', cols);
+	}
+}
+
+static void vc_uniscr_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+			     enum con_scroll dir, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		unsigned int s, d, rescue, clear;
+		char32_t *save[nr];
+
+		s = clear = t;
+		d = t + nr;
+		rescue = b - nr;
+		if (dir == SM_UP) {
+			swap(s, d);
+			swap(clear, rescue);
+		}
+		memcpy(save, uniscr->lines + rescue, nr * sizeof(*save));
+		memmove(uniscr->lines + d, uniscr->lines + s,
+			(b - t - nr) * sizeof(*uniscr->lines));
+		memcpy(uniscr->lines + clear, save, nr * sizeof(*save));
+		vc_uniscr_clear_lines(vc, clear, nr);
+	}
+}
+
+static void vc_uniscr_copy_area(struct uni_screen *dst,
+				unsigned int dst_cols,
+				unsigned int dst_rows,
+				struct uni_screen *src,
+				unsigned int src_cols,
+				unsigned int src_top_row,
+				unsigned int src_bot_row)
+{
+	unsigned int dst_row = 0;
+
+	if (!dst)
+		return;
+
+	while (src_top_row < src_bot_row) {
+		char32_t *src_line = src->lines[src_top_row];
+		char32_t *dst_line = dst->lines[dst_row];
+
+		memcpy(dst_line, src_line, src_cols * sizeof(char32_t));
+		if (dst_cols - src_cols)
+			memset32(dst_line + src_cols, ' ', dst_cols - src_cols);
+		src_top_row++;
+		dst_row++;
+	}
+	while (dst_row < dst_rows) {
+		char32_t *dst_line = dst->lines[dst_row];
+
+		memset32(dst_line, ' ', dst_cols);
+		dst_row++;
+	}
+}
+
+
 static void con_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 		enum con_scroll dir, unsigned int nr)
 {
@@ -326,6 +491,7 @@ static void con_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 		nr = b - t - 1;
 	if (b > vc->vc_rows || t >= b || nr < 1)
 		return;
+	vc_uniscr_scroll(vc, t, b, dir, nr);
 	if (con_is_visible(vc) && vc->vc_sw->con_scroll(vc, t, b, dir, nr))
 		return;
 
@@ -533,6 +699,7 @@ static void insert_char(struct vc_data *vc, unsigned int nr)
 {
 	unsigned short *p = (unsigned short *) vc->vc_pos;
 
+	vc_uniscr_insert(vc, nr);
 	scr_memmovew(p + nr, p, (vc->vc_cols - vc->vc_x - nr) * 2);
 	scr_memsetw(p, vc->vc_video_erase_char, nr * 2);
 	vc->vc_need_wrap = 0;
@@ -545,6 +712,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr)
 {
 	unsigned short *p = (unsigned short *) vc->vc_pos;
 
+	vc_uniscr_delete(vc, nr);
 	scr_memcpyw(p, p + nr, (vc->vc_cols - vc->vc_x - nr) * 2);
 	scr_memsetw(p + vc->vc_cols - vc->vc_x - nr, vc->vc_video_erase_char,
 			nr * 2);
@@ -845,10 +1013,11 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 {
 	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
 	unsigned long end;
-	unsigned int old_rows, old_row_size;
+	unsigned int old_rows, old_row_size, first_copied_row;
 	unsigned int new_cols, new_rows, new_row_size, new_screen_size;
 	unsigned int user;
 	unsigned short *newscreen;
+	struct uni_screen *new_uniscr = NULL;
 
 	WARN_CONSOLE_UNLOCKED();
 
@@ -875,6 +1044,14 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 	if (!newscreen)
 		return -ENOMEM;
 
+	if (get_vc_uniscr(vc)) {
+		new_uniscr = vc_uniscr_alloc(new_cols, new_rows);
+		if (!new_uniscr) {
+			kfree(newscreen);
+			return -ENOMEM;
+		}
+	}
+
 	if (vc == sel_cons)
 		clear_selection();
 
@@ -884,6 +1061,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 	err = resize_screen(vc, new_cols, new_rows, user);
 	if (err) {
 		kfree(newscreen);
+		kfree(new_uniscr);
 		return err;
 	}
 
@@ -904,18 +1082,24 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 			 * Cursor near the bottom, copy contents from the
 			 * bottom of buffer
 			 */
-			old_origin += (old_rows - new_rows) * old_row_size;
+			first_copied_row = (old_rows - new_rows);
 		} else {
 			/*
 			 * Cursor is in no man's land, copy 1/2 screenful
 			 * from the top and bottom of cursor position
 			 */
-			old_origin += (vc->vc_y - new_rows/2) * old_row_size;
+			first_copied_row = (vc->vc_y - new_rows/2);
 		}
-	}
-
+		old_origin += first_copied_row * old_row_size;
+	} else
+		first_copied_row = 0;
 	end = old_origin + old_row_size * min(old_rows, new_rows);
 
+	vc_uniscr_copy_area(new_uniscr, new_cols, new_rows,
+			    get_vc_uniscr(vc), rlth/2, first_copied_row,
+			    min(old_rows, new_rows));
+	vc_uniscr_set(vc, new_uniscr);
+
 	update_attr(vc);
 
 	while (old_origin < end) {
@@ -1013,6 +1197,7 @@ struct vc_data *vc_deallocate(unsigned int currcons)
 		vc->vc_sw->con_deinit(vc);
 		put_pid(vc->vt_pid);
 		module_put(vc->vc_sw->owner);
+		vc_uniscr_set(vc, NULL);
 		kfree(vc->vc_screenbuf);
 		vc_cons[currcons].d = NULL;
 	}
@@ -1171,15 +1356,22 @@ static void csi_J(struct vc_data *vc, int vpar)
 
 	switch (vpar) {
 		case 0:	/* erase from cursor to end of display */
+			vc_uniscr_clear_line(vc, vc->vc_x,
+					     vc->vc_cols - vc->vc_x);
+			vc_uniscr_clear_lines(vc, vc->vc_y + 1,
+					      vc->vc_rows - vc->vc_y - 1);
 			count = (vc->vc_scr_end - vc->vc_pos) >> 1;
 			start = (unsigned short *)vc->vc_pos;
 			break;
 		case 1:	/* erase from start to cursor */
+			vc_uniscr_clear_line(vc, 0, vc->vc_x + 1);
+			vc_uniscr_clear_lines(vc, 0, vc->vc_y);
 			count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1;
 			start = (unsigned short *)vc->vc_origin;
 			break;
 		case 2: /* erase whole display */
 		case 3: /* (and scrollback buffer later) */
+			vc_uniscr_clear_lines(vc, 0, vc->vc_rows);
 			count = vc->vc_cols * vc->vc_rows;
 			start = (unsigned short *)vc->vc_origin;
 			break;
@@ -1200,25 +1392,27 @@ static void csi_J(struct vc_data *vc, int vpar)
 static void csi_K(struct vc_data *vc, int vpar)
 {
 	unsigned int count;
-	unsigned short * start;
+	unsigned short *start = (unsigned short *)vc->vc_pos;
+	int offset;
 
 	switch (vpar) {
 		case 0:	/* erase from cursor to end of line */
+			offset = 0;
 			count = vc->vc_cols - vc->vc_x;
-			start = (unsigned short *)vc->vc_pos;
 			break;
 		case 1:	/* erase from start of line to cursor */
-			start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+			offset = -vc->vc_x;
 			count = vc->vc_x + 1;
 			break;
 		case 2: /* erase whole line */
-			start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+			offset = -vc->vc_x;
 			count = vc->vc_cols;
 			break;
 		default:
 			return;
 	}
-	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
+	vc_uniscr_clear_line(vc, vc->vc_x + offset, count);
+	scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count);
 	vc->vc_need_wrap = 0;
 	if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) start, count);
@@ -1232,6 +1426,7 @@ static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar posi
 		vpar++;
 	count = (vpar > vc->vc_cols - vc->vc_x) ? (vc->vc_cols - vc->vc_x) : vpar;
 
+	vc_uniscr_clear_line(vc, vc->vc_x, count);
 	scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count);
 	if (con_should_update(vc))
 		vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1, count);
@@ -2188,7 +2383,7 @@ static void con_flush(struct vc_data *vc, unsigned long draw_from,
 /* acquires console_lock */
 static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
 {
-	int c, tc, ok, n = 0, draw_x = -1;
+	int c, next_c, tc, ok, n = 0, draw_x = -1;
 	unsigned int currcons;
 	unsigned long draw_from = 0, draw_to = 0;
 	struct vc_data *vc;
@@ -2382,6 +2577,7 @@ rescan_last_byte:
 				con_flush(vc, draw_from, draw_to, &draw_x);
 			}
 
+			next_c = c;
 			while (1) {
 				if (vc->vc_need_wrap || vc->vc_decim)
 					con_flush(vc, draw_from, draw_to,
@@ -2392,6 +2588,7 @@ rescan_last_byte:
 				}
 				if (vc->vc_decim)
 					insert_char(vc, 1);
+				vc_uniscr_putc(vc, next_c);
 				scr_writew(himask ?
 					     ((vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
 					     (vc_attr << 8) + tc,
@@ -2412,6 +2609,7 @@ rescan_last_byte:
 
 				tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
 				if (tc < 0) tc = ' ';
+				next_c = ' ';
 			}
 			notify_write(vc, c);
 
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index c0ec478ea5bf..2c8d3239899b 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -19,6 +19,7 @@
 
 struct vt_struct;
 struct uni_pagedir;
+struct uni_screen;
 
 #define NPAR 16
 
@@ -140,6 +141,7 @@ struct vc_data {
 	struct vc_data **vc_display_fg;		/* [!] Ptr to var holding fg console for this display */
 	struct uni_pagedir *vc_uni_pagedir;
 	struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */
+	struct uni_screen *vc_uni_screen;	/* unicode screen content */
 	bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */
 	/* additional information is in vt_kern.h */
 };
-- 
cgit v1.2.3


From d21b0be246bf3bbf569e6e239f56abb529c7154e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 26 Jun 2018 23:56:41 -0400
Subject: vt: introduce unicode mode for /dev/vcs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that the core vt code knows how to preserve unicode values for each
displayed character, it is then possible to let user space access it via
/dev/vcs*.

Unicode characters are presented as 32 bit values in native endianity
via the /dev/vcsu* devices, mimicking the simple /dev/vcs* devices.
Unicode with attributes (similarly to /dev/vcsa*) is not supported at
the moment.

Data is available only as long as the console is in UTF-8 mode. ENODATA
is returned otherwise.

This was tested with the latest development version (to become
version 5.7) of BRLTTY. Amongst other things, this allows ⠋⠕⠗ ⠞⠓⠊⠎
⠃⠗⠁⠊⠇⠇⠑⠀⠞⠑⠭⠞⠀to appear directly on braille displays regardless of the
console font being used.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Dave Mielke <Dave@mielke.cc>
Acked-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vc_screen.c | 89 ++++++++++++++++++++++++++++++++++++++--------
 drivers/tty/vt/vt.c        | 61 +++++++++++++++++++++++++++++++
 include/linux/selection.h  |  5 +++
 3 files changed, 141 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index e4a66e1fd05f..9c44252e52a3 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -10,6 +10,12 @@
  *	Attribute/character pair is in native endianity.
  *            [minor: N+128]
  *
+ * /dev/vcsuN: similar to /dev/vcsaN but using 4-byte unicode values
+ *	instead of 1-byte screen glyph values.
+ *            [minor: N+64]
+ *
+ * /dev/vcsuaN: same idea as /dev/vcsaN for unicode (not yet implemented).
+ *
  * This replaces screendump and part of selection, so that the system
  * administrator can control access using file system permissions.
  *
@@ -51,6 +57,26 @@
 
 #define CON_BUF_SIZE (CONFIG_BASE_SMALL ? 256 : PAGE_SIZE)
 
+/*
+ * Our minor space:
+ *
+ *   0 ... 63	glyph mode without attributes
+ *  64 ... 127	unicode mode without attributes
+ * 128 ... 191	glyph mode with attributes
+ * 192 ... 255	unused (reserved for unicode with attributes)
+ *
+ * This relies on MAX_NR_CONSOLES being  <= 63, meaning 63 actual consoles
+ * with minors 0, 64, 128 and 192 being proxies for the foreground console.
+ */
+#if MAX_NR_CONSOLES > 63
+#warning "/dev/vcs* devices may not accommodate more than 63 consoles"
+#endif
+
+#define console(inode)		(iminor(inode) & 63)
+#define use_unicode(inode)	(iminor(inode) & 64)
+#define use_attributes(inode)	(iminor(inode) & 128)
+
+
 struct vcs_poll_data {
 	struct notifier_block notifier;
 	unsigned int cons_num;
@@ -102,7 +128,7 @@ vcs_poll_data_get(struct file *file)
 	poll = kzalloc(sizeof(*poll), GFP_KERNEL);
 	if (!poll)
 		return NULL;
-	poll->cons_num = iminor(file_inode(file)) & 127;
+	poll->cons_num = console(file_inode(file));
 	init_waitqueue_head(&poll->waitq);
 	poll->notifier.notifier_call = vcs_notifier;
 	if (register_vt_notifier(&poll->notifier) != 0) {
@@ -140,7 +166,7 @@ vcs_poll_data_get(struct file *file)
 static struct vc_data*
 vcs_vc(struct inode *inode, int *viewed)
 {
-	unsigned int currcons = iminor(inode) & 127;
+	unsigned int currcons = console(inode);
 
 	WARN_CONSOLE_UNLOCKED();
 
@@ -164,7 +190,6 @@ static int
 vcs_size(struct inode *inode)
 {
 	int size;
-	int minor = iminor(inode);
 	struct vc_data *vc;
 
 	WARN_CONSOLE_UNLOCKED();
@@ -175,8 +200,12 @@ vcs_size(struct inode *inode)
 
 	size = vc->vc_rows * vc->vc_cols;
 
-	if (minor & 128)
+	if (use_attributes(inode)) {
+		if (use_unicode(inode))
+			return -EOPNOTSUPP;
 		size = 2*size + HEADER_SIZE;
+	} else if (use_unicode(inode))
+		size *= 4;
 	return size;
 }
 
@@ -197,12 +226,10 @@ static ssize_t
 vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct inode *inode = file_inode(file);
-	unsigned int currcons = iminor(inode);
 	struct vc_data *vc;
 	struct vcs_poll_data *poll;
-	long pos;
-	long attr, read;
-	int col, maxcol, viewed;
+	long pos, read;
+	int attr, uni_mode, row, col, maxcol, viewed;
 	unsigned short *org = NULL;
 	ssize_t ret;
 	char *con_buf;
@@ -218,7 +245,8 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	 */
 	console_lock();
 
-	attr = (currcons & 128);
+	uni_mode = use_unicode(inode);
+	attr = use_attributes(inode);
 	ret = -ENXIO;
 	vc = vcs_vc(inode, &viewed);
 	if (!vc)
@@ -227,6 +255,10 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	ret = -EINVAL;
 	if (pos < 0)
 		goto unlock_out;
+	/* we enforce 32-bit alignment for pos and count in unicode mode */
+	if (uni_mode && (pos | count) & 3)
+		goto unlock_out;
+
 	poll = file->private_data;
 	if (count && poll)
 		poll->seen_last_update = true;
@@ -266,7 +298,27 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 		con_buf_start = con_buf0 = con_buf;
 		orig_count = this_round;
 		maxcol = vc->vc_cols;
-		if (!attr) {
+		if (uni_mode) {
+			unsigned int nr;
+
+			ret = vc_uniscr_check(vc);
+			if (ret)
+				break;
+			p /= 4;
+			row = p / vc->vc_cols;
+			col = p % maxcol;
+			nr = maxcol - col;
+			do {
+				if (nr > this_round/4)
+					nr = this_round/4;
+				vc_uniscr_copy_line(vc, con_buf0, row, col, nr);
+				con_buf0 += nr * 4;
+				this_round -= nr * 4;
+				row++;
+				col = 0;
+				nr = maxcol;
+			} while (this_round);
+		} else if (!attr) {
 			org = screen_pos(vc, p, viewed);
 			col = p % maxcol;
 			p += maxcol - col;
@@ -375,7 +427,6 @@ static ssize_t
 vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	struct inode *inode = file_inode(file);
-	unsigned int currcons = iminor(inode);
 	struct vc_data *vc;
 	long pos;
 	long attr, size, written;
@@ -396,7 +447,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 	 */
 	console_lock();
 
-	attr = (currcons & 128);
+	attr = use_attributes(inode);
 	ret = -ENXIO;
 	vc = vcs_vc(inode, &viewed);
 	if (!vc)
@@ -593,9 +644,15 @@ vcs_fasync(int fd, struct file *file, int on)
 static int
 vcs_open(struct inode *inode, struct file *filp)
 {
-	unsigned int currcons = iminor(inode) & 127;
+	unsigned int currcons = console(inode);
+	bool attr = use_attributes(inode);
+	bool uni_mode = use_unicode(inode);
 	int ret = 0;
-	
+
+	/* we currently don't support attributes in unicode mode */
+	if (attr && uni_mode)
+		return -EOPNOTSUPP;
+
 	console_lock();
 	if(currcons && !vc_cons_allocated(currcons-1))
 		ret = -ENXIO;
@@ -628,6 +685,8 @@ void vcs_make_sysfs(int index)
 {
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, index + 1), NULL,
 		      "vcs%u", index + 1);
+	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, index + 65), NULL,
+		      "vcsu%u", index + 1);
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, index + 129), NULL,
 		      "vcsa%u", index + 1);
 }
@@ -635,6 +694,7 @@ void vcs_make_sysfs(int index)
 void vcs_remove_sysfs(int index)
 {
 	device_destroy(vc_class, MKDEV(VCS_MAJOR, index + 1));
+	device_destroy(vc_class, MKDEV(VCS_MAJOR, index + 65));
 	device_destroy(vc_class, MKDEV(VCS_MAJOR, index + 129));
 }
 
@@ -647,6 +707,7 @@ int __init vcs_init(void)
 	vc_class = class_create(THIS_MODULE, "vc");
 
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, 0), NULL, "vcs");
+	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, 64), NULL, "vcsu");
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, 128), NULL, "vcsa");
 	for (i = 0; i < MIN_NR_CONSOLES; i++)
 		vcs_make_sysfs(i);
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 7b636638b36d..062ce6be7957 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -481,6 +481,67 @@ static void vc_uniscr_copy_area(struct uni_screen *dst,
 	}
 }
 
+/*
+ * Called from vcs_read() to make sure unicode screen retrieval is possible.
+ * This will initialize the unicode screen buffer if not already done.
+ * This returns 0 if OK, or a negative error code otherwise.
+ * In particular, -ENODATA is returned if the console is not in UTF-8 mode.
+ */
+int vc_uniscr_check(struct vc_data *vc)
+{
+	struct uni_screen *uniscr;
+	unsigned short *p;
+	int x, y, mask;
+
+	if (__is_defined(NO_VC_UNI_SCREEN))
+		return -EOPNOTSUPP;
+
+	WARN_CONSOLE_UNLOCKED();
+
+	if (!vc->vc_utf)
+		return -ENODATA;
+
+	if (vc->vc_uni_screen)
+		return 0;
+
+	uniscr = vc_uniscr_alloc(vc->vc_cols, vc->vc_rows);
+	if (!uniscr)
+		return -ENOMEM;
+
+	/*
+	 * Let's populate it initially with (imperfect) reverse translation.
+	 * This is the next best thing we can do short of having it enabled
+	 * from the start even when no users rely on this functionality. True
+	 * unicode content will be available after a complete screen refresh.
+	 */
+	p = (unsigned short *)vc->vc_origin;
+	mask = vc->vc_hi_font_mask | 0xff;
+	for (y = 0; y < vc->vc_rows; y++) {
+		char32_t *line = uniscr->lines[y];
+		for (x = 0; x < vc->vc_cols; x++) {
+			u16 glyph = scr_readw(p++) & mask;
+			line[x] = inverse_translate(vc, glyph, true);
+		}
+	}
+
+	vc->vc_uni_screen = uniscr;
+	return 0;
+}
+
+/*
+ * Called from vcs_read() to get the unicode data from the screen.
+ * This must be preceded by a successful call to vc_uniscr_check() once
+ * the console lock has been taken.
+ */
+void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+			 unsigned int row, unsigned int col, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	BUG_ON(!uniscr);
+	memcpy(dest, &uniscr->lines[row][col], nr * sizeof(char32_t));
+}
+
 
 static void con_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 		enum con_scroll dir, unsigned int nr)
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 5b278ce99d8d..2b34df9f1e26 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -42,4 +42,9 @@ extern u16 vcs_scr_readw(struct vc_data *vc, const u16 *org);
 extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org);
 extern void vcs_scr_updated(struct vc_data *vc);
 
+extern int vc_uniscr_check(struct vc_data *vc);
+extern void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+				unsigned int row, unsigned int col,
+				unsigned int nr);
+
 #endif
-- 
cgit v1.2.3


From 708d0bff9121506db08adb73845a3c70312fadf3 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 26 Jun 2018 23:56:42 -0400
Subject: vt: unicode fallback for scrollback

There is currently no provision for scrollback content in the core code,
leaving that to backend video drivers where this can be highly optimized.
There is currently no common method for those drivers to tell the core
what part of the scrollback is actually displayed and what size the
scrollback buffer is either. Because of that, the unicode screen buffer
has no provision for any scrollback.

At least we can provide backtranslated glyph values when the scrollback
is active which should be plenty good enough for now.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Dave Mielke <Dave@mielke.cc>
Acked-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vc_screen.c |  3 ++-
 drivers/tty/vt/vt.c        | 31 +++++++++++++++++++++++++++++--
 include/linux/selection.h  |  2 +-
 3 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index 9c44252e52a3..2384ea85ffaf 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -311,7 +311,8 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 			do {
 				if (nr > this_round/4)
 					nr = this_round/4;
-				vc_uniscr_copy_line(vc, con_buf0, row, col, nr);
+				vc_uniscr_copy_line(vc, con_buf0, viewed,
+						    row, col, nr);
 				con_buf0 += nr * 4;
 				this_round -= nr * 4;
 				row++;
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 062ce6be7957..2d14bb195d98 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -533,13 +533,40 @@ int vc_uniscr_check(struct vc_data *vc)
  * This must be preceded by a successful call to vc_uniscr_check() once
  * the console lock has been taken.
  */
-void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+void vc_uniscr_copy_line(struct vc_data *vc, void *dest, int viewed,
 			 unsigned int row, unsigned int col, unsigned int nr)
 {
 	struct uni_screen *uniscr = get_vc_uniscr(vc);
+	int offset = row * vc->vc_size_row + col * 2;
+	unsigned long pos;
 
 	BUG_ON(!uniscr);
-	memcpy(dest, &uniscr->lines[row][col], nr * sizeof(char32_t));
+
+	pos = (unsigned long)screenpos(vc, offset, viewed);
+	if (pos >= vc->vc_origin && pos < vc->vc_scr_end) {
+		/*
+		 * Desired position falls in the main screen buffer.
+		 * However the actual row/col might be different if
+		 * scrollback is active.
+		 */
+		row = (pos - vc->vc_origin) / vc->vc_size_row;
+		col = ((pos - vc->vc_origin) % vc->vc_size_row) / 2;
+		memcpy(dest, &uniscr->lines[row][col], nr * sizeof(char32_t));
+	} else {
+		/*
+		 * Scrollback is active. For now let's simply backtranslate
+		 * the screen glyphs until the unicode screen buffer does
+		 * synchronize with console display drivers for a scrollback
+		 * buffer of its own.
+		 */
+		u16 *p = (u16 *)pos;
+		int mask = vc->vc_hi_font_mask | 0xff;
+		char32_t *uni_buf = dest;
+		while (nr--) {
+			u16 glyph = scr_readw(p++) & mask;
+			*uni_buf++ = inverse_translate(vc, glyph, true);
+		}
+	}
 }
 
 
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 2b34df9f1e26..067d2e99c79f 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -43,7 +43,7 @@ extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org);
 extern void vcs_scr_updated(struct vc_data *vc);
 
 extern int vc_uniscr_check(struct vc_data *vc);
-extern void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+extern void vc_uniscr_copy_line(struct vc_data *vc, void *dest, int viewed,
 				unsigned int row, unsigned int col,
 				unsigned int nr);
 
-- 
cgit v1.2.3


From 4b09791ba059cc5a5ec7d69049f5d05da65b6418 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnáček <omosnace@redhat.com>
Date: Tue, 26 Jun 2018 13:04:42 +0200
Subject: cred: conditionally declare groups-related functions

The groups-related functions declared in include/linux/cred.h are
defined in kernel/groups.c, which is compiled only when
CONFIG_MULTIUSER=y. Move all these function declarations under #ifdef
CONFIG_MULTIUSER to help avoid accidental usage in contexts where
CONFIG_MULTIUSER might be disabled.

This patch also adds a fallback for groups_search(). Currently this
function is only called from kernel/groups.c itself and
security/keys/permissions.c, where the call is (by coincidence)
optimized away in case CONFIG_MULTIUSER=n. However, the audit subsystem
(which does not depend on CONFIG_MULTIUSER) calls this function in
-next, so the fallback will be needed to avoid compilation errors or
ugly workarounds.

See also:
https://lkml.org/lkml/2018/6/20/670
https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git/commit/?h=next&id=af85d1772e31fed34165a1b3decef340cf4080c0

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/cred.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 631286535d0f..7eed6101c791 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -65,6 +65,12 @@ extern void groups_free(struct group_info *);
 
 extern int in_group_p(kgid_t);
 extern int in_egroup_p(kgid_t);
+extern int groups_search(const struct group_info *, kgid_t);
+
+extern int set_current_groups(struct group_info *);
+extern void set_groups(struct cred *, struct group_info *);
+extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
 #else
 static inline void groups_free(struct group_info *group_info)
 {
@@ -78,12 +84,11 @@ static inline int in_egroup_p(kgid_t grp)
 {
         return 1;
 }
+static inline int groups_search(const struct group_info *group_info, kgid_t grp)
+{
+	return 1;
+}
 #endif
-extern int set_current_groups(struct group_info *);
-extern void set_groups(struct cred *, struct group_info *);
-extern int groups_search(const struct group_info *, kgid_t);
-extern bool may_setgroups(void);
-extern void groups_sort(struct group_info *);
 
 /*
  * The security context of a task
-- 
cgit v1.2.3


From 0894aa28aece680a81e28906e2f328084a3d55c3 Mon Sep 17 00:00:00 2001
From: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Date: Wed, 13 Jun 2018 13:04:48 +0530
Subject: dmaengine: xilinx_dma: Enable VDMA S2MM vertical flip support

Vertical flip state is exported in xilinx_vdma_config and depending
on IP configuration(c_enable_vert_flip) vertical flip state is
programmed in hardware.

Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Acked-by: Kedareswara rao Appana <appanad@xilinx.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 22 ++++++++++++++++++++++
 include/linux/dma/xilinx_dma.h  |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 27b523530c4a..c12442312595 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -115,6 +115,9 @@
 #define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
 #define XILINX_VDMA_REG_START_ADDRESS_64(n)	(0x000c + 8 * (n))
 
+#define XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP	0x00ec
+#define XILINX_VDMA_ENABLE_VERTICAL_FLIP	BIT(0)
+
 /* HW specific definitions */
 #define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x20
 
@@ -340,6 +343,7 @@ struct xilinx_dma_tx_descriptor {
  * @start_transfer: Differentiate b/w DMA IP's transfer
  * @stop_transfer: Differentiate b/w DMA IP's quiesce
  * @tdest: TDEST value for mcdma
+ * @has_vflip: S2MM vertical flip
  */
 struct xilinx_dma_chan {
 	struct xilinx_dma_device *xdev;
@@ -376,6 +380,7 @@ struct xilinx_dma_chan {
 	void (*start_transfer)(struct xilinx_dma_chan *chan);
 	int (*stop_transfer)(struct xilinx_dma_chan *chan);
 	u16 tdest;
+	bool has_vflip;
 };
 
 /**
@@ -1092,6 +1097,14 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 				desc->async_tx.phys);
 
 	/* Configure the hardware using info in the config structure */
+	if (chan->has_vflip) {
+		reg = dma_read(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP);
+		reg &= ~XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		reg |= config->vflip_en;
+		dma_write(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP,
+			  reg);
+	}
+
 	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
 
 	if (config->frm_cnt_en)
@@ -2105,6 +2118,8 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
 	}
 
 	chan->config.frm_cnt_en = cfg->frm_cnt_en;
+	chan->config.vflip_en = cfg->vflip_en;
+
 	if (cfg->park)
 		chan->config.park_frm = cfg->park_frm;
 	else
@@ -2428,6 +2443,13 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 		chan->direction = DMA_DEV_TO_MEM;
 		chan->id = chan_id;
 		chan->tdest = chan_id - xdev->nr_channels;
+		chan->has_vflip = of_property_read_bool(node,
+					"xlnx,enable-vert-flip");
+		if (chan->has_vflip) {
+			chan->config.vflip_en = dma_read(chan,
+				XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP) &
+				XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		}
 
 		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h
index 34b98f276ed0..5b6e61e4b3aa 100644
--- a/include/linux/dma/xilinx_dma.h
+++ b/include/linux/dma/xilinx_dma.h
@@ -27,6 +27,7 @@
  * @delay: Delay counter
  * @reset: Reset Channel
  * @ext_fsync: External Frame Sync source
+ * @vflip_en:  Vertical Flip enable
  */
 struct xilinx_vdma_config {
 	int frm_dly;
@@ -39,6 +40,7 @@ struct xilinx_vdma_config {
 	int delay;
 	int reset;
 	int ext_fsync;
+	bool vflip_en;
 };
 
 int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
-- 
cgit v1.2.3


From 85782e037f8aba8922dadb24a1523ca0b82ab8bc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 28 Jun 2018 23:34:59 +0200
Subject: bpf: undo prog rejection on read-only lock failure

Partially undo commit 9facc336876f ("bpf: reject any prog that failed
read-only lock") since it caused a regression, that is, syzkaller was
able to manage to cause a panic via fault injection deep in set_memory_ro()
path by letting an allocation fail: In x86's __change_page_attr_set_clr()
it was able to change the attributes of the primary mapping but not in
the alias mapping via cpa_process_alias(), so the second, inner call
to the __change_page_attr() via __change_page_attr_set_clr() had to split
a larger page and failed in the alloc_pages() with the artifically triggered
allocation error which is then propagated down to the call site.

Thus, for set_memory_ro() this means that it returned with an error, but
from debugging a probe_kernel_write() revealed EFAULT on that memory since
the primary mapping succeeded to get changed. Therefore the subsequent
hdr->locked = 0 reset triggered the panic as it was performed on read-only
memory, so call-site assumptions were infact wrong to assume that it would
either succeed /or/ not succeed at all since there's no such rollback in
set_memory_*() calls from partial change of mappings, in other words, we're
left in a state that is "half done". A later undo via set_memory_rw() is
succeeding though due to matching permissions on that part (aka due to the
try_preserve_large_page() succeeding). While reproducing locally with
explicitly triggering this error, the initial splitting only happens on
rare occasions and in real world it would additionally need oom conditions,
but that said, it could partially fail. Therefore, it is definitely wrong
to bail out on set_memory_ro() error and reject the program with the
set_memory_*() semantics we have today. Shouldn't have gone the extra mile
since no other user in tree today infact checks for any set_memory_*()
errors, e.g. neither module_enable_ro() / module_disable_ro() for module
RO/NX handling which is mostly default these days nor kprobes core with
alloc_insn_page() / free_insn_page() as examples that could be invoked long
after bootup and original 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks") did neither when it got first introduced to BPF
so "improving" with bailing out was clearly not right when set_memory_*()
cannot handle it today.

Kees suggested that if set_memory_*() can fail, we should annotate it with
__must_check, and all callers need to deal with it gracefully given those
set_memory_*() markings aren't "advisory", but they're expected to actually
do what they say. This might be an option worth to move forward in future
but would at the same time require that set_memory_*() calls from supporting
archs are guaranteed to be "atomic" in that they provide rollback if part
of the range fails, once that happened, the transition from RW -> RO could
be made more robust that way, while subsequent RO -> RW transition /must/
continue guaranteeing to always succeed the undo part.

Reported-by: syzbot+a4eb8c7766952a1ca872@syzkaller.appspotmail.com
Reported-by: syzbot+d866d1925855328eac3b@syzkaller.appspotmail.com
Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock")
Cc: Laura Abbott <labbott@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 56 ++++++++------------------------------------------
 kernel/bpf/core.c      | 30 +--------------------------
 2 files changed, 9 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20f2659dd829..300baad62c88 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -470,9 +470,7 @@ struct sock_fprog_kern {
 };
 
 struct bpf_binary_header {
-	u16 pages;
-	u16 locked:1;
-
+	u32 pages;
 	/* Some arches need word alignment for their instructions */
 	u8 image[] __aligned(4);
 };
@@ -481,7 +479,7 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				jit_requested:1,/* archs need to JIT the prog */
-				locked:1,	/* Program image locked? */
+				undo_set_mem:1,	/* Passed set_memory_ro() checkpoint */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	fp->locked = 1;
-	if (set_memory_ro((unsigned long)fp, fp->pages))
-		fp->locked = 0;
-#endif
+	fp->undo_set_mem = 1;
+	set_memory_ro((unsigned long)fp, fp->pages);
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (fp->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		fp->locked = 0;
-	}
-#endif
+	if (fp->undo_set_mem)
+		set_memory_rw((unsigned long)fp, fp->pages);
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	hdr->locked = 1;
-	if (set_memory_ro((unsigned long)hdr, hdr->pages))
-		hdr->locked = 0;
-#endif
+	set_memory_ro((unsigned long)hdr, hdr->pages);
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (hdr->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		hdr->locked = 0;
-	}
-#endif
+	set_memory_rw((unsigned long)hdr, hdr->pages);
 }
 
 static inline struct bpf_binary_header *
@@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 	return (void *)addr;
 }
 
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
-{
-	if (!fp->locked)
-		return -ENOLCK;
-	if (fp->jited) {
-		const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
-
-		if (!hdr->locked)
-			return -ENOLCK;
-	}
-
-	return 0;
-}
-#endif
-
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a9e6c04d0f4a..1e5625d46414 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 	bpf_fill_ill_insns(hdr, size);
 
 	hdr->pages = size / PAGE_SIZE;
-	hdr->locked = 0;
-
 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 		     PAGE_SIZE - sizeof(*hdr));
 	start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 	return 0;
 }
 
-static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
-{
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	int i, err;
-
-	for (i = 0; i < fp->aux->func_cnt; i++) {
-		err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
-		if (err)
-			return err;
-	}
-
-	return bpf_prog_check_pages_ro_single(fp);
-#endif
-	return 0;
-}
-
 static void bpf_prog_select_func(struct bpf_prog *fp)
 {
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -1524,17 +1506,7 @@ finalize:
 	 * all eBPF JITs might immediately support all features.
 	 */
 	*err = bpf_check_tail_call(fp);
-	if (*err)
-		return fp;
-
-	/* Checkpoint: at this point onwards any cBPF -> eBPF or
-	 * native eBPF program is read-only. If we failed to change
-	 * the page attributes (e.g. allocation failure from
-	 * splitting large pages), then reject the whole program
-	 * in order to guarantee not ending up with any W+X pages
-	 * from BPF side in kernel.
-	 */
-	*err = bpf_prog_check_pages_ro_locked(fp);
+
 	return fp;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
-- 
cgit v1.2.3


From cfdfc14e7fb8ae77290e9d5afaeecc0a234a3846 Mon Sep 17 00:00:00 2001
From: Doug Meyer <dmeyer@gigaio.com>
Date: Wed, 23 May 2018 13:18:05 -0700
Subject: switchtec: Use generic PCI Vendor ID and Class Code

Move the Microsemi Switchtec PCI Vendor ID (same as
PCI_VENDOR_ID_PMC_Sierra) to pci_ids.h.   Also, replace Microsemi class
constants with the standard PCI definitions.

Signed-off-by: Doug Meyer <dmeyer@gigaio.com>
[bhelgaas: restore SPDX (I assume it was removed by mistake), remove
device ID definitions]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
---
 drivers/ntb/hw/mscc/ntb_hw_switchtec.c |  3 ++-
 drivers/pci/switch/switchtec.c         | 14 +++++++-------
 include/linux/pci_ids.h                |  1 +
 include/linux/switchtec.h              |  4 ----
 4 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index f624ae27eabe..5ee5f40b4dfc 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -19,6 +19,7 @@
 #include <linux/kthread.h>
 #include <linux/interrupt.h>
 #include <linux/ntb.h>
+#include <linux/pci.h>
 
 MODULE_DESCRIPTION("Microsemi Switchtec(tm) NTB Driver");
 MODULE_VERSION("0.1");
@@ -1487,7 +1488,7 @@ static int switchtec_ntb_add(struct device *dev,
 
 	stdev->sndev = NULL;
 
-	if (stdev->pdev->class != MICROSEMI_NTB_CLASSCODE)
+	if (stdev->pdev->class != (PCI_CLASS_BRIDGE_OTHER << 8))
 		return -ENODEV;
 
 	sndev = kzalloc_node(sizeof(*sndev), GFP_KERNEL, dev_to_node(dev));
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 47cd0c037433..9940cc70f38b 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -641,7 +641,7 @@ static int ioctl_event_summary(struct switchtec_dev *stdev,
 
 	for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) {
 		reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id);
-		if (reg != MICROSEMI_VENDOR_ID)
+		if (reg != PCI_VENDOR_ID_MICROSEMI)
 			break;
 
 		reg = ioread32(&stdev->mmio_pff_csr[i].pff_event_summary);
@@ -1203,7 +1203,7 @@ static void init_pff(struct switchtec_dev *stdev)
 
 	for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) {
 		reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id);
-		if (reg != MICROSEMI_VENDOR_ID)
+		if (reg != PCI_VENDOR_ID_MICROSEMI)
 			break;
 	}
 
@@ -1267,7 +1267,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 	struct switchtec_dev *stdev;
 	int rc;
 
-	if (pdev->class == MICROSEMI_NTB_CLASSCODE)
+	if (pdev->class == (PCI_CLASS_BRIDGE_OTHER << 8))
 		request_module_nowait("ntb_hw_switchtec");
 
 	stdev = stdev_create(pdev);
@@ -1321,19 +1321,19 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
 
 #define SWITCHTEC_PCI_DEVICE(device_id) \
 	{ \
-		.vendor     = MICROSEMI_VENDOR_ID, \
+		.vendor     = PCI_VENDOR_ID_MICROSEMI, \
 		.device     = device_id, \
 		.subvendor  = PCI_ANY_ID, \
 		.subdevice  = PCI_ANY_ID, \
-		.class      = MICROSEMI_MGMT_CLASSCODE, \
+		.class      = (PCI_CLASS_MEMORY_OTHER << 8), \
 		.class_mask = 0xFFFFFFFF, \
 	}, \
 	{ \
-		.vendor     = MICROSEMI_VENDOR_ID, \
+		.vendor     = PCI_VENDOR_ID_MICROSEMI, \
 		.device     = device_id, \
 		.subvendor  = PCI_ANY_ID, \
 		.subdevice  = PCI_ANY_ID, \
-		.class      = MICROSEMI_NTB_CLASSCODE, \
+		.class      = (PCI_CLASS_BRIDGE_OTHER << 8), \
 		.class_mask = 0xFFFFFFFF, \
 	}
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 29502238e510..80aec5b9a6c1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1668,6 +1668,7 @@
 #define PCI_DEVICE_ID_COMPEX_ENET100VG4	0x0112
 
 #define PCI_VENDOR_ID_PMC_Sierra	0x11f8
+#define PCI_VENDOR_ID_MICROSEMI		0x11f8
 
 #define PCI_VENDOR_ID_RP		0x11fe
 #define PCI_DEVICE_ID_RP32INTF		0x0001
diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index ec93e93371fa..ab400af6f0ce 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -19,10 +19,6 @@
 #include <linux/pci.h>
 #include <linux/cdev.h>
 
-#define MICROSEMI_VENDOR_ID         0x11f8
-#define MICROSEMI_NTB_CLASSCODE     0x068000
-#define MICROSEMI_MGMT_CLASSCODE    0x058000
-
 #define SWITCHTEC_MRPC_PAYLOAD_SIZE 1024
 #define SWITCHTEC_MAX_PFF_CSR 48
 
-- 
cgit v1.2.3


From 783e84961b1d7a75045383586b8c49e02b7704cd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 29 Jun 2018 21:17:26 -0500
Subject: PCI: Make pci_get_rom_size() static

pci_get_rom_size() is called only from pci_map_rom(), so it can be static.
Make it static and remove the declaration from include/linux/pci.h.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/rom.c   | 3 ++-
 include/linux/pci.h | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 3a33f5ce314a..137bf0cee897 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -80,7 +80,8 @@ EXPORT_SYMBOL_GPL(pci_disable_rom);
  * The PCI window size could be much larger than the
  * actual image size.
  */
-size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size)
+static size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom,
+			       size_t size)
 {
 	void __iomem *image;
 	int last_image;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..f40d3e05ccd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1122,7 +1122,6 @@ int pci_enable_rom(struct pci_dev *pdev);
 void pci_disable_rom(struct pci_dev *pdev);
 void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
 void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
-size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size);
 void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size);
 
 /* Power management related routines */
-- 
cgit v1.2.3


From 7ce3f912ae0a79e5d738a3ae1f158b281973e849 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sat, 30 Jun 2018 11:24:24 -0400
Subject: PCI: Enable PASID only if entire path supports End-End TLP prefixes

A PCIe endpoint carries the process address space identifier (PASID) in
the TLP prefix as part of the memory read/write transaction. The address
information in the TLP is relevant only for a given PASID context.

An IOMMU takes PASID value and the address information from the
TLP to look up the physical address in the system.

PASID is an End-End TLP Prefix (PCIe r4.0, sec 6.20).  Sec 2.2.10.2 says

  It is an error to receive a TLP with an End-End TLP Prefix by a
  Receiver that does not support End-End TLP Prefixes. A TLP in
  violation of this rule is handled as a Malformed TLP. This is a
  reported error associated with the Receiving Port (see Section 6.2).

Prevent error condition by proactively requiring End-End TLP prefix to be
supported on the entire data path between the endpoint and the root port
before enabling PASID.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/ats.c             |  3 +++
 drivers/pci/probe.c           | 24 ++++++++++++++++++++++++
 include/linux/pci.h           |  1 +
 include/uapi/linux/pci_regs.h |  1 +
 4 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 4923a2a8e14b..5b78f3b1b918 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -273,6 +273,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (WARN_ON(pdev->pasid_enabled))
 		return -EBUSY;
 
+	if (!pdev->eetlp_prefix_path)
+		return -EINVAL;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..4c35c2909d57 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2042,6 +2042,29 @@ static void pci_configure_ltr(struct pci_dev *dev)
 #endif
 }
 
+static void pci_configure_eetlp_prefix(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_PASID
+	struct pci_dev *bridge;
+	u32 cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_EE_PREFIX))
+		return;
+
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
+		dev->eetlp_prefix_path = 1;
+	else {
+		bridge = pci_upstream_bridge(dev);
+		if (bridge && bridge->eetlp_prefix_path)
+			dev->eetlp_prefix_path = 1;
+	}
+#endif
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
 	struct hotplug_params hpp;
@@ -2051,6 +2074,7 @@ static void pci_configure_device(struct pci_dev *dev)
 	pci_configure_extended_tags(dev, NULL);
 	pci_configure_relaxed_ordering(dev);
 	pci_configure_ltr(dev);
+	pci_configure_eetlp_prefix(dev);
 
 	memset(&hpp, 0, sizeof(hpp));
 	ret = pci_get_hp_params(dev, &hpp);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..6ba818449095 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -350,6 +350,7 @@ struct pci_dev {
 	unsigned int	ltr_path:1;	/* Latency Tolerance Reporting
 					   supported from root to here */
 #endif
+	unsigned int	eetlp_prefix_path:1;	/* End-to-End TLP Prefix */
 
 	pci_channel_state_t error_state;	/* Current connectivity state */
 	struct device	dev;			/* Generic device interface */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 4da87e2ef8a8..04d7480db714 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -636,6 +636,7 @@
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
+#define PCI_EXP_DEVCAP2_EE_PREFIX	0x00200000 /* End-End TLP Prefix */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */
-- 
cgit v1.2.3


From 80d19669ecd34423e85ca04f2210b0e42a47cb16 Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:26:41 -0700
Subject: net: Refactor XPS for CPUs and Rx queues

Refactor XPS code to support Tx queue selection based on
CPU(s) map or Rx queue(s) map.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h   |  11 ++-
 include/linux/netdevice.h |  98 ++++++++++++++++++++-
 net/core/dev.c            | 211 ++++++++++++++++++++++++++++++----------------
 net/core/net-sysfs.c      |   4 +-
 4 files changed, 244 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bf53d893ad02..57f20a0a7794 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_active(cpu)		((cpu) == 0)
 #endif
 
-/* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	WARN_ON_ONCE(cpu >= nr_cpumask_bits);
+	WARN_ON_ONCE(cpu >= bits);
 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+}
+
+/* verify cpu argument to cpumask_* operators */
+static inline unsigned int cpumask_check(unsigned int cpu)
+{
+	cpu_max_bits_warn(cpu, nr_cpumask_bits);
 	return cpu;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c6b377a15869..8bf8d6149f79 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -731,10 +731,15 @@ struct xps_map {
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
-	struct xps_map __rcu *cpu_map[0];
+	struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */
 };
-#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +		\
+
+#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +	\
 	(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
+
+#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
+	(_rxqs * (_tcs) * sizeof(struct xps_map *)))
+
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE	16
@@ -1910,7 +1915,8 @@ struct net_device {
 	int			watchdog_timeo;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps __rcu *xps_maps;
+	struct xps_dev_maps __rcu *xps_cpus_map;
+	struct xps_dev_maps __rcu *xps_rxqs_map;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -3259,6 +3265,92 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index);
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, bool is_rxqs_map);
+
+/**
+ *	netif_attr_test_mask - Test a CPU or Rx queue set in a mask
+ *	@j: CPU/Rx queue index
+ *	@mask: bitmask of all cpus/rx queues
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
+ */
+static inline bool netif_attr_test_mask(unsigned long j,
+					const unsigned long *mask,
+					unsigned int nr_bits)
+{
+	cpu_max_bits_warn(j, nr_bits);
+	return test_bit(j, mask);
+}
+
+/**
+ *	netif_attr_test_online - Test for online CPU/Rx queue
+ *	@j: CPU/Rx queue index
+ *	@online_mask: bitmask for CPUs/Rx queues that are online
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns true if a CPU/Rx queue is online.
+ */
+static inline bool netif_attr_test_online(unsigned long j,
+					  const unsigned long *online_mask,
+					  unsigned int nr_bits)
+{
+	cpu_max_bits_warn(j, nr_bits);
+
+	if (online_mask)
+		return test_bit(j, online_mask);
+
+	return (j < nr_bits);
+}
+
+/**
+ *	netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask
+ *	@n: CPU/Rx queue index
+ *	@srcp: the cpumask/Rx queue mask pointer
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set.
+ */
+static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
+					       unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
+
+	if (srcp)
+		return find_next_bit(srcp, nr_bits, n + 1);
+
+	return n + 1;
+}
+
+/**
+ *	netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
+ *	@n: CPU/Rx queue index
+ *	@src1p: the first CPUs/Rx queues mask pointer
+ *	@src2p: the second CPUs/Rx queues mask pointer
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set in both.
+ */
+static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
+					  const unsigned long *src2p,
+					  unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
+
+	if (src1p && src2p)
+		return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
+	else if (src1p)
+		return find_next_bit(src1p, nr_bits, n + 1);
+	else if (src2p)
+		return find_next_bit(src2p, nr_bits, n + 1);
+
+	return n + 1;
+}
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
 				      const struct cpumask *mask,
diff --git a/net/core/dev.c b/net/core/dev.c
index dffed642e686..71059558dc39 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 	int pos;
 
 	if (dev_maps)
-		map = xmap_dereference(dev_maps->cpu_map[tci]);
+		map = xmap_dereference(dev_maps->attr_map[tci]);
 	if (!map)
 		return false;
 
@@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 			break;
 		}
 
-		RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+		RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 		kfree_rcu(map, rcu);
 		return false;
 	}
@@ -2135,31 +2135,58 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 	return active;
 }
 
+static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+			   u16 offset, u16 count, bool is_rxqs_map)
+{
+	bool active = false;
+	int i, j;
+
+	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
+	     j < nr_ids;)
+		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+					       count);
+	if (!active) {
+		if (is_rxqs_map) {
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		} else {
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+
+			for (i = offset + (count - 1); count--; i--)
+				netdev_queue_numa_node_write(
+					netdev_get_tx_queue(dev, i),
+							NUMA_NO_NODE);
+		}
+		kfree_rcu(dev_maps, rcu);
+	}
+}
+
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
+	const unsigned long *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps;
-	int cpu, i;
-	bool active = false;
+	unsigned int nr_ids;
 
 	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
 
-	if (!dev_maps)
-		goto out_no_maps;
-
-	for_each_possible_cpu(cpu)
-		active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
-					       offset, count);
+	dev_maps = xmap_dereference(dev->xps_rxqs_map);
+	if (dev_maps) {
+		nr_ids = dev->num_rx_queues;
+		clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset,
+			       count, true);
 
-	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-		kfree_rcu(dev_maps, rcu);
 	}
 
-	for (i = offset + (count - 1); count--; i--)
-		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
-					     NUMA_NO_NODE);
+	dev_maps = xmap_dereference(dev->xps_cpus_map);
+	if (!dev_maps)
+		goto out_no_maps;
+
+	if (num_possible_cpus() > 1)
+		possible_mask = cpumask_bits(cpu_possible_mask);
+	nr_ids = nr_cpu_ids;
+	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
+		       false);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
@@ -2170,8 +2197,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 	netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
 }
 
-static struct xps_map *expand_xps_map(struct xps_map *map,
-				      int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+				      u16 index, bool is_rxqs_map)
 {
 	struct xps_map *new_map;
 	int alloc_len = XPS_MIN_MAP_ALLOC;
@@ -2183,7 +2210,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		return map;
 	}
 
-	/* Need to add queue to this CPU's existing map */
+	/* Need to add tx-queue to this CPU's/rx-queue's existing map */
 	if (map) {
 		if (pos < map->alloc_len)
 			return map;
@@ -2191,9 +2218,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		alloc_len = map->alloc_len * 2;
 	}
 
-	/* Need to allocate new map to store queue on this CPU's map */
-	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
-			       cpu_to_node(cpu));
+	/* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+	 *  map
+	 */
+	if (is_rxqs_map)
+		new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+	else
+		new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+				       cpu_to_node(attr_index));
 	if (!new_map)
 		return NULL;
 
@@ -2205,14 +2237,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 	return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-			u16 index)
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, bool is_rxqs_map)
 {
+	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-	int i, cpu, tci, numa_node_id = -2;
+	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
 	bool active = false;
+	unsigned int nr_ids;
 
 	if (dev->num_tc) {
 		num_tc = dev->num_tc;
@@ -2221,16 +2255,27 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -EINVAL;
 	}
 
-	maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
-	if (maps_sz < L1_CACHE_BYTES)
-		maps_sz = L1_CACHE_BYTES;
-
 	mutex_lock(&xps_map_mutex);
+	if (is_rxqs_map) {
+		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+		dev_maps = xmap_dereference(dev->xps_rxqs_map);
+		nr_ids = dev->num_rx_queues;
+	} else {
+		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
+		if (num_possible_cpus() > 1) {
+			online_mask = cpumask_bits(cpu_online_mask);
+			possible_mask = cpumask_bits(cpu_possible_mask);
+		}
+		dev_maps = xmap_dereference(dev->xps_cpus_map);
+		nr_ids = nr_cpu_ids;
+	}
 
-	dev_maps = xmap_dereference(dev->xps_maps);
+	if (maps_sz < L1_CACHE_BYTES)
+		maps_sz = L1_CACHE_BYTES;
 
 	/* allocate memory for queue storage */
-	for_each_cpu_and(cpu, cpu_online_mask, mask) {
+	for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
+	     j < nr_ids;) {
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
@@ -2238,73 +2283,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -ENOMEM;
 		}
 
-		tci = cpu * num_tc + tc;
-		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
+		tci = j * num_tc + tc;
+		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
 				 NULL;
 
-		map = expand_xps_map(map, cpu, index);
+		map = expand_xps_map(map, j, index, is_rxqs_map);
 		if (!map)
 			goto error;
 
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 	}
 
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
-	for_each_possible_cpu(cpu) {
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
 		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* We need to explicitly update tci as prevous loop
 		 * could break out early if dev_maps is NULL.
 		 */
-		tci = cpu * num_tc + tc;
+		tci = j * num_tc + tc;
 
-		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
-			/* add queue to CPU maps */
+		if (netif_attr_test_mask(j, mask, nr_ids) &&
+		    netif_attr_test_online(j, online_mask, nr_ids)) {
+			/* add tx-queue to CPU/rx-queue maps */
 			int pos = 0;
 
-			map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+			map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
+			if (!is_rxqs_map) {
+				if (numa_node_id == -2)
+					numa_node_id = cpu_to_node(j);
+				else if (numa_node_id != cpu_to_node(j))
+					numa_node_id = -1;
+			}
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* copy maps belonging to foreign traffic classes */
 		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 	}
 
-	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	if (is_rxqs_map)
+		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
+	else
+		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
 
 	/* Cleanup old maps */
 	if (!dev_maps)
 		goto out_no_old_maps;
 
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
@@ -2317,19 +2370,23 @@ out_no_old_maps:
 	active = true;
 
 out_no_new_maps:
-	/* update Tx queue numa node */
-	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-				     (numa_node_id >= 0) ? numa_node_id :
-				     NUMA_NO_NODE);
+	if (!is_rxqs_map) {
+		/* update Tx queue numa node */
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+					     (numa_node_id >= 0) ?
+					     numa_node_id : NUMA_NO_NODE);
+	}
 
 	if (!dev_maps)
 		goto out_no_maps;
 
-	/* removes queue from unused CPUs */
-	for_each_possible_cpu(cpu) {
-		for (i = tc, tci = cpu * num_tc; i--; tci++)
+	/* removes tx-queue from unused CPUs/rx-queues */
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = tc, tci = j * num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+		if (!netif_attr_test_mask(j, mask, nr_ids) ||
+		    !netif_attr_test_online(j, online_mask, nr_ids))
 			active |= remove_xps_queue(dev_maps, tci, index);
 		for (i = num_tc - tc, tci++; --i; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
@@ -2337,7 +2394,10 @@ out_no_new_maps:
 
 	/* free map if not active */
 	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		if (is_rxqs_map)
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		else
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 
@@ -2347,11 +2407,12 @@ out_no_maps:
 	return 0;
 error:
 	/* remove any maps that we added */
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			map = dev_maps ?
-			      xmap_dereference(dev_maps->cpu_map[tci]) :
+			      xmap_dereference(dev_maps->attr_map[tci]) :
 			      NULL;
 			if (new_map && new_map != map)
 				kfree(new_map);
@@ -2363,6 +2424,12 @@ error:
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
+
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+			u16 index)
+{
+	return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+}
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
@@ -3384,7 +3451,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	int queue_index = -1;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {
 		unsigned int tci = skb->sender_cpu - 1;
 
@@ -3393,7 +3460,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 			tci += netdev_get_prio_tc_map(dev, skb->priority);
 		}
 
-		map = rcu_dereference(dev_maps->cpu_map[tci]);
+		map = rcu_dereference(dev_maps->attr_map[tci]);
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bb7e80f4ced3..b39987c81d53 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 		return -ENOMEM;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {
 		for_each_possible_cpu(cpu) {
 			int i, tci = cpu * num_tc + tc;
 			struct xps_map *map;
 
-			map = rcu_dereference(dev_maps->cpu_map[tci]);
+			map = rcu_dereference(dev_maps->attr_map[tci]);
 			if (!map)
 				continue;
 
-- 
cgit v1.2.3


From f308d7353d1f5c3ddedc784a367edc72fc51bbaa Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 24 Jun 2018 23:27:22 +0200
Subject: mtd: rawnand: add Reed-Solomon error correction algorithm

Add Reed-Solomon (RS) to the enumeration of ECC algorithms.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/devicetree/bindings/mtd/nand.txt | 2 +-
 drivers/mtd/nand/raw/nand_base.c               | 1 +
 include/linux/mtd/rawnand.h                    | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
index 8bb11d809429..eaef8c657aa5 100644
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -25,7 +25,7 @@ Optional NAND chip properties:
 		  Deprecated values:
 		  "soft_bch": use "soft" and nand-ecc-algo instead
 - nand-ecc-algo: string, algorithm of NAND ECC.
-		 Supported values are: "hamming", "bch".
+		 Valid values are: "hamming", "bch", "rs".
 - nand-bus-width : 8 or 16 bus width if not present 8
 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index b01d15ec4c56..d0af5347f89d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5777,6 +5777,7 @@ static int of_get_nand_ecc_mode(struct device_node *np)
 static const char * const nand_ecc_algos[] = {
 	[NAND_ECC_HAMMING]	= "hamming",
 	[NAND_ECC_BCH]		= "bch",
+	[NAND_ECC_RS]		= "rs",
 };
 
 static int of_get_nand_ecc_algo(struct device_node *np)
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 3e8ec3b8a39c..2d9cb7acbc3d 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -121,6 +121,7 @@ enum nand_ecc_algo {
 	NAND_ECC_UNKNOWN,
 	NAND_ECC_HAMMING,
 	NAND_ECC_BCH,
+	NAND_ECC_RS,
 };
 
 /*
-- 
cgit v1.2.3


From f922bd798bb94e0adcce26ddd811245443173268 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 24 Jun 2018 23:27:23 +0200
Subject: mtd: rawnand: add an option to specify NAND chip as a boot device

Allow to define a NAND chip as a boot device. This can be helpful
for the selection of the ECC algorithm and strength in case the boot
ROM supports only a subset of controller provided options.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/devicetree/bindings/mtd/nand.txt | 4 ++++
 drivers/mtd/nand/raw/nand_base.c               | 3 +++
 include/linux/mtd/rawnand.h                    | 6 ++++++
 3 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
index eaef8c657aa5..e949c778e983 100644
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -43,6 +43,10 @@ Optional NAND chip properties:
 		     This is particularly useful when only the in-band area is
 		     used by the upper layers, and you want to make your NAND
 		     as reliable as possible.
+- nand-is-boot-medium: Whether the NAND chip is a boot medium. Drivers might use
+		       this information to select ECC algorithms supported by
+		       the boot ROM or similar restrictions.
+
 - nand-rb: shall contain the native Ready/Busy ids.
 
 The ECC strength and ECC step size properties define the correction capability
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d0af5347f89d..1c633f80c3e1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5859,6 +5859,9 @@ static int nand_dt_init(struct nand_chip *chip)
 	if (of_get_nand_bus_width(dn) == 16)
 		chip->options |= NAND_BUSWIDTH_16;
 
+	if (of_property_read_bool(dn, "nand-is-boot-medium"))
+		chip->options |= NAND_IS_BOOT_MEDIUM;
+
 	if (of_get_nand_on_flash_bbt(dn))
 		chip->bbt_options |= NAND_BBT_USE_FLASH;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 2d9cb7acbc3d..80aeeca03f36 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -219,6 +219,12 @@ enum nand_ecc_algo {
  */
 #define NAND_WAIT_TCCS		0x00200000
 
+/*
+ * Whether the NAND chip is a boot medium. Drivers might use this information
+ * to select ECC algorithms supported by the boot ROM or similar restrictions.
+ */
+#define NAND_IS_BOOT_MEDIUM	0x00400000
+
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
 #define NAND_CONTROLLER_ALLOC	0x80000000
-- 
cgit v1.2.3


From 00ce4e039ad5bded462931606c3063ff691964b7 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 25 Jun 2018 10:44:44 +1200
Subject: mtd: rawnand: add manufacturer fixup for ONFI parameter page

This is called after the ONFI parameter page checksum is verified
and allows us to override the contents of the parameter page.

Suggested-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 4 ++++
 include/linux/mtd/rawnand.h      | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 1c633f80c3e1..f362c09a71d7 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5168,6 +5168,10 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 		}
 	}
 
+	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+	    chip->manufacturer.desc->ops->fixup_onfi_param_page)
+		chip->manufacturer.desc->ops->fixup_onfi_param_page(chip, p);
+
 	/* Check version */
 	val = le16_to_cpu(p->revision);
 	if (val & (1 << 5))
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 80aeeca03f36..dc4538b58b84 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -785,11 +785,15 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  *	  implementation) if any.
  * @cleanup: the ->init() function may have allocated resources, ->cleanup()
  *	     is here to let vendor specific code release those resources.
+ * @fixup_onfi_param_page: apply vendor specific fixups to the ONFI parameter
+ *			   page. This is called after the checksum is verified.
  */
 struct nand_manufacturer_ops {
 	void (*detect)(struct nand_chip *chip);
 	int (*init)(struct nand_chip *chip);
 	void (*cleanup)(struct nand_chip *chip);
+	void (*fixup_onfi_param_page)(struct nand_chip *chip,
+				      struct nand_onfi_params *p);
 };
 
 /**
-- 
cgit v1.2.3


From 872b71ff084ab125c68073d9e69acfd7095f2015 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 25 Jun 2018 10:44:45 +1200
Subject: mtd: rawnand: add defines for ONFI version bits

Add defines for the ONFI version bits and use them in
nand_flash_detect_onfi().

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 10 +++++-----
 include/linux/mtd/rawnand.h      | 11 +++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index f362c09a71d7..dbf6e80e9ab5 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5174,15 +5174,15 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 
 	/* Check version */
 	val = le16_to_cpu(p->revision);
-	if (val & (1 << 5))
+	if (val & ONFI_VERSION_2_3)
 		chip->parameters.onfi.version = 23;
-	else if (val & (1 << 4))
+	else if (val & ONFI_VERSION_2_2)
 		chip->parameters.onfi.version = 22;
-	else if (val & (1 << 3))
+	else if (val & ONFI_VERSION_2_1)
 		chip->parameters.onfi.version = 21;
-	else if (val & (1 << 2))
+	else if (val & ONFI_VERSION_2_0)
 		chip->parameters.onfi.version = 20;
-	else if (val & (1 << 1))
+	else if (val & ONFI_VERSION_1_0)
 		chip->parameters.onfi.version = 10;
 
 	if (!chip->parameters.onfi.version) {
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index dc4538b58b84..fbac4741da52 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -237,6 +237,17 @@ enum nand_ecc_algo {
 /* Keep gcc happy */
 struct nand_chip;
 
+/* ONFI version bits */
+#define ONFI_VERSION_1_0		BIT(1)
+#define ONFI_VERSION_2_0		BIT(2)
+#define ONFI_VERSION_2_1		BIT(3)
+#define ONFI_VERSION_2_2		BIT(4)
+#define ONFI_VERSION_2_3		BIT(5)
+#define ONFI_VERSION_3_0		BIT(6)
+#define ONFI_VERSION_3_1		BIT(7)
+#define ONFI_VERSION_3_2		BIT(8)
+#define ONFI_VERSION_4_0		BIT(9)
+
 /* ONFI features */
 #define ONFI_FEATURE_16_BIT_BUS		(1 << 0)
 #define ONFI_FEATURE_EXT_PARAM_PAGE	(1 << 7)
-- 
cgit v1.2.3


From 78c9c4dfbf8c04883941445a195276bb4bb92c76 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 Jun 2018 15:21:32 +0200
Subject: posix-timers: Sanitize overrun handling

The posix timer overrun handling is broken because the forwarding functions
can return a huge number of overruns which does not fit in an int. As a
consequence timer_getoverrun(2) and siginfo::si_overrun can turn into
random number generators.

The k_clock::timer_forward() callbacks return a 64 bit value now. Make
k_itimer::ti_overrun[_last] 64bit as well, so the kernel internal
accounting is correct. 3Remove the temporary (int) casts.

Add a helper function which clamps the overrun value returned to user space
via timer_getoverrun(2) or siginfo::si_overrun limited to a positive value
between 0 and INT_MAX. INT_MAX is an indicator for user space that the
overrun value has been clamped.

Reported-by: Team OWL337 <icytxw@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Link: https://lkml.kernel.org/r/20180626132705.018623573@linutronix.de
---
 include/linux/posix-timers.h   |  4 ++--
 kernel/time/posix-cpu-timers.c |  2 +-
 kernel/time/posix-timers.c     | 31 ++++++++++++++++++++-----------
 3 files changed, 23 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index c85704fcdbd2..ee7e987ea1b4 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -95,8 +95,8 @@ struct k_itimer {
 	clockid_t		it_clock;
 	timer_t			it_id;
 	int			it_active;
-	int			it_overrun;
-	int			it_overrun_last;
+	s64			it_overrun;
+	s64			it_overrun_last;
 	int			it_requeue_pending;
 	int			it_sigev_notify;
 	ktime_t			it_interval;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5a6251ac6f7a..562cc3891b57 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -85,7 +85,7 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
 			continue;
 
 		timer->it.cpu.expires += incr;
-		timer->it_overrun += 1 << i;
+		timer->it_overrun += 1LL << i;
 		delta -= incr;
 	}
 }
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index db1d65963a57..3ac7295306dc 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -283,6 +283,17 @@ static __init int init_posix_timers(void)
 }
 __initcall(init_posix_timers);
 
+/*
+ * The siginfo si_overrun field and the return value of timer_getoverrun(2)
+ * are of type int. Clamp the overrun value to INT_MAX
+ */
+static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
+{
+	s64 sum = timr->it_overrun_last + (s64)baseval;
+
+	return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
+}
+
 static void common_hrtimer_rearm(struct k_itimer *timr)
 {
 	struct hrtimer *timer = &timr->it.real.timer;
@@ -290,9 +301,8 @@ static void common_hrtimer_rearm(struct k_itimer *timr)
 	if (!timr->it_interval)
 		return;
 
-	timr->it_overrun += (unsigned int) hrtimer_forward(timer,
-						timer->base->get_time(),
-						timr->it_interval);
+	timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
+					    timr->it_interval);
 	hrtimer_restart(timer);
 }
 
@@ -321,10 +331,10 @@ void posixtimer_rearm(struct siginfo *info)
 
 		timr->it_active = 1;
 		timr->it_overrun_last = timr->it_overrun;
-		timr->it_overrun = -1;
+		timr->it_overrun = -1LL;
 		++timr->it_requeue_pending;
 
-		info->si_overrun += timr->it_overrun_last;
+		info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
 	}
 
 	unlock_timer(timr, flags);
@@ -418,9 +428,8 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 					now = ktime_add(now, kj);
 			}
 #endif
-			timr->it_overrun += (unsigned int)
-				hrtimer_forward(timer, now,
-						timr->it_interval);
+			timr->it_overrun += hrtimer_forward(timer, now,
+							    timr->it_interval);
 			ret = HRTIMER_RESTART;
 			++timr->it_requeue_pending;
 			timr->it_active = 1;
@@ -524,7 +533,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
 	new_timer->it_id = (timer_t) new_timer_id;
 	new_timer->it_clock = which_clock;
 	new_timer->kclock = kc;
-	new_timer->it_overrun = -1;
+	new_timer->it_overrun = -1LL;
 
 	if (event) {
 		rcu_read_lock();
@@ -702,7 +711,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
 	 * expiry time forward by intervals, so expiry is > now.
 	 */
 	if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none))
-		timr->it_overrun += (int)kc->timer_forward(timr, now);
+		timr->it_overrun += kc->timer_forward(timr, now);
 
 	remaining = kc->timer_remaining(timr, now);
 	/* Return 0 only, when the timer is expired and not pending */
@@ -791,7 +800,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
 	if (!timr)
 		return -EINVAL;
 
-	overrun = timr->it_overrun_last;
+	overrun = timer_overrun_to_int(timr, 0);
 	unlock_timer(timr, flags);
 
 	return overrun;
-- 
cgit v1.2.3


From 05739375f1c0a1048fea8b9c4cb54d9e4a891938 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Fri, 29 Jun 2018 14:59:40 +0200
Subject: ASoC: pxa-ssp: remove .set_pll() and .set_clkdiv() callbacks

The .set_pll() and .set_clkdiv() callbacks are considered legacy and should
not be used anymore. In order to support PXA boards on DT platforms, remove
them and let the code figure out the correct dividers and PLL base
frequencies itself.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h |   8 +++
 sound/soc/pxa/pxa-ssp.c    | 146 ++++++++++++++++++++++-----------------------
 2 files changed, 81 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 03a7ca46735b..13b4244d44c1 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -171,6 +171,14 @@
 #define SSACD_SCDB		(1 << 3)	/* SSPSYSCLK Divider Bypass */
 #define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
 #define SSACD_ACDS(x)		((x) << 0)	/* Audio clock divider select */
+#define SSACD_ACDS_1		(0)
+#define SSACD_ACDS_2		(1)
+#define SSACD_ACDS_4		(2)
+#define SSACD_ACDS_8		(3)
+#define SSACD_ACDS_16		(4)
+#define SSACD_ACDS_32		(5)
+#define SSACD_SCDB_4X		(0)
+#define SSACD_SCDB_1X		(1)
 #define SSACD_SCDX8		(1 << 7)	/* SYSCLK division ratio select */
 
 /* LPSS SSP */
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 01d54697ede4..f8339bb01251 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -41,6 +41,7 @@
  */
 struct ssp_priv {
 	struct ssp_device *ssp;
+	unsigned long ssp_clk;
 	unsigned int sysclk;
 	unsigned int dai_fmt;
 	unsigned int configured_dai_fmt;
@@ -192,21 +193,6 @@ static void pxa_ssp_set_scr(struct ssp_device *ssp, u32 div)
 	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
 }
 
-/**
- * pxa_ssp_get_clkdiv - get SSP clock divider
- */
-static u32 pxa_ssp_get_scr(struct ssp_device *ssp)
-{
-	u32 sscr0 = pxa_ssp_read_reg(ssp, SSCR0);
-	u32 div;
-
-	if (ssp->type == PXA25x_SSP)
-		div = ((sscr0 >> 8) & 0xff) * 2 + 2;
-	else
-		div = ((sscr0 >> 8) & 0xfff) + 1;
-	return div;
-}
-
 /*
  * Set the SSP ports SYSCLK.
  */
@@ -262,67 +248,18 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 	return 0;
 }
 
-/*
- * Set the SSP clock dividers.
- */
-static int pxa_ssp_set_dai_clkdiv(struct snd_soc_dai *cpu_dai,
-	int div_id, int div)
-{
-	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
-	struct ssp_device *ssp = priv->ssp;
-	int val;
-
-	switch (div_id) {
-	case PXA_SSP_AUDIO_DIV_ACDS:
-		val = (pxa_ssp_read_reg(ssp, SSACD) & ~0x7) | SSACD_ACDS(div);
-		pxa_ssp_write_reg(ssp, SSACD, val);
-		break;
-	case PXA_SSP_AUDIO_DIV_SCDB:
-		val = pxa_ssp_read_reg(ssp, SSACD);
-		val &= ~SSACD_SCDB;
-		if (ssp->type == PXA3xx_SSP)
-			val &= ~SSACD_SCDX8;
-		switch (div) {
-		case PXA_SSP_CLK_SCDB_1:
-			val |= SSACD_SCDB;
-			break;
-		case PXA_SSP_CLK_SCDB_4:
-			break;
-		case PXA_SSP_CLK_SCDB_8:
-			if (ssp->type == PXA3xx_SSP)
-				val |= SSACD_SCDX8;
-			else
-				return -EINVAL;
-			break;
-		default:
-			return -EINVAL;
-		}
-		pxa_ssp_write_reg(ssp, SSACD, val);
-		break;
-	case PXA_SSP_DIV_SCR:
-		pxa_ssp_set_scr(ssp, div);
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
 /*
  * Configure the PLL frequency pxa27x and (afaik - pxa320 only)
  */
-static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
-	int source, unsigned int freq_in, unsigned int freq_out)
+static int pxa_ssp_set_pll(struct ssp_priv *priv, unsigned int freq)
 {
-	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
 	struct ssp_device *ssp = priv->ssp;
 	u32 ssacd = pxa_ssp_read_reg(ssp, SSACD) & ~0x70;
 
 	if (ssp->type == PXA3xx_SSP)
 		pxa_ssp_write_reg(ssp, SSACDD, 0);
 
-	switch (freq_out) {
+	switch (freq) {
 	case 5622000:
 		break;
 	case 11345000:
@@ -353,7 +290,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
 			u64 tmp = 19968;
 
 			tmp *= 1000000;
-			do_div(tmp, freq_out);
+			do_div(tmp, freq);
 			val = tmp;
 
 			val = (val << 16) | 64;
@@ -363,7 +300,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
 
 			dev_dbg(&ssp->pdev->dev,
 				"Using SSACDD %x to supply %uHz\n",
-				val, freq_out);
+				val, freq);
 			break;
 		}
 
@@ -568,6 +505,24 @@ static int pxa_ssp_configure_dai_fmt(struct ssp_priv *priv)
 	return 0;
 }
 
+struct pxa_ssp_clock_mode {
+	int rate;
+	int pll;
+	u8 acds;
+	u8 scdb;
+};
+
+static const struct pxa_ssp_clock_mode pxa_ssp_clock_modes[] = {
+	{ .rate =  8000, .pll = 32842000, .acds = SSACD_ACDS_32, .scdb = SSACD_SCDB_4X },
+	{ .rate = 11025, .pll =  5622000, .acds = SSACD_ACDS_4,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 16000, .pll = 32842000, .acds = SSACD_ACDS_16, .scdb = SSACD_SCDB_4X },
+	{ .rate = 22050, .pll =  5622000, .acds = SSACD_ACDS_2,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 44100, .pll = 11345000, .acds = SSACD_ACDS_2,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 48000, .pll = 12235000, .acds = SSACD_ACDS_2,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 96000, .pll = 12235000, .acds = SSACD_ACDS_4,  .scdb = SSACD_SCDB_1X },
+	{}
+};
+
 /*
  * Set the SSP audio DMA parameters and sample size.
  * Can be called multiple times by oss emulation.
@@ -579,11 +534,12 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
 	struct ssp_device *ssp = priv->ssp;
 	int chn = params_channels(params);
-	u32 sscr0;
-	u32 sspsp;
+	u32 sscr0, sspsp;
 	int width = snd_pcm_format_physical_width(params_format(params));
 	int ttsa = pxa_ssp_read_reg(ssp, SSTSA) & 0xf;
 	struct snd_dmaengine_dai_dma_data *dma_data;
+	int rate = params_rate(params);
+	int bclk = rate * chn * (width / 8);
 	int ret;
 
 	dma_data = snd_soc_dai_get_dma_data(cpu_dai, substream);
@@ -623,11 +579,57 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	}
 	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
 
+	if (sscr0 & SSCR0_ACS) {
+		ret = pxa_ssp_set_pll(priv, bclk);
+
+		/*
+		 * If we were able to generate the bclk directly,
+		 * all is fine. Otherwise, look up the closest rate
+		 * from the table and also set the dividers.
+		 */
+
+		if (ret < 0) {
+			const struct pxa_ssp_clock_mode *m;
+			int ssacd, acds;
+
+			for (m = pxa_ssp_clock_modes; m->rate; m++) {
+				if (m->rate == rate)
+					break;
+			}
+
+			if (!m->rate)
+				return -EINVAL;
+
+			acds = m->acds;
+
+			/* The values in the table are for 16 bits */
+			if (width == 32)
+				acds--;
+
+			ret = pxa_ssp_set_pll(priv, bclk);
+			if (ret < 0)
+				return ret;
+
+			ssacd = pxa_ssp_read_reg(ssp, SSACD);
+			ssacd &= ~(SSACD_ACDS(7) | SSACD_SCDB_1X);
+			ssacd |= SSACD_ACDS(m->acds);
+			ssacd |= m->scdb;
+			pxa_ssp_write_reg(ssp, SSACD, ssacd);
+		}
+	} else if (sscr0 & SSCR0_ECS) {
+		/*
+		 * For setups with external clocking, the PLL and its diviers
+		 * are not active. Instead, the SCR bits in SSCR0 can be used
+		 * to divide the clock.
+		 */
+		pxa_ssp_set_scr(ssp, bclk / rate);
+	}
+
 	switch (priv->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
 	       sspsp = pxa_ssp_read_reg(ssp, SSPSP);
 
-		if ((pxa_ssp_get_scr(ssp) == 4) && (width == 16)) {
+		if (((priv->sysclk / bclk) == 64) && (width == 16)) {
 			/* This is a special case where the bitclk is 64fs
 			 * and we're not dealing with 2*32 bits of audio
 			 * samples.
@@ -812,8 +814,6 @@ static const struct snd_soc_dai_ops pxa_ssp_dai_ops = {
 	.trigger	= pxa_ssp_trigger,
 	.hw_params	= pxa_ssp_hw_params,
 	.set_sysclk	= pxa_ssp_set_dai_sysclk,
-	.set_clkdiv	= pxa_ssp_set_dai_clkdiv,
-	.set_pll	= pxa_ssp_set_dai_pll,
 	.set_fmt	= pxa_ssp_set_dai_fmt,
 	.set_tdm_slot	= pxa_ssp_set_dai_tdm_slot,
 	.set_tristate	= pxa_ssp_set_dai_tristate,
-- 
cgit v1.2.3


From 88763a5cf80ca59a7c3bea32681ce8f697d9995f Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 26 Jun 2018 12:53:29 +0200
Subject: powercap / idle_inject: Add an idle injection framework

Initially, the cpu_cooling device for ARM was changed by adding a new
policy inserting idle cycles. The intel_powerclamp driver does a
similar action.

Instead of implementing idle injections privately in the cpu_cooling
device, move the idle injection code in a dedicated framework and give
the opportunity to other frameworks to make use of it.

The framework relies on the smpboot kthreads which handles via its
main loop the common code for hotplugging and [un]parking.

This code was previously tested with the cpu cooling device and went
through several iterations. It results now in split code and API
exported in the header file. It was tested with the cpu cooling device
with success.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Rewrite of all comments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/Kconfig       |  10 ++
 drivers/powercap/Makefile      |   1 +
 drivers/powercap/idle_inject.c | 356 +++++++++++++++++++++++++++++++++++++++++
 include/linux/idle_inject.h    |  29 ++++
 4 files changed, 396 insertions(+)
 create mode 100644 drivers/powercap/idle_inject.c
 create mode 100644 include/linux/idle_inject.h

(limited to 'include/linux')

diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 85727ef6ce8e..6ac27e5908f5 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -29,4 +29,14 @@ config INTEL_RAPL
 	  controller, CPU core (Power Plance 0), graphics uncore (Power Plane
 	  1), etc.
 
+config IDLE_INJECT
+	bool "Idle injection framework"
+	depends on CPU_IDLE
+	default n
+	help
+	  This enables support for the idle injection framework. It
+	  provides a way to force idle periods on a set of specified
+	  CPUs for power capping. Idle period can be injected
+	  synchronously on a set of specified CPUs or alternatively
+	  on a per CPU basis.
 endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index 0a21ef31372b..1b328854b36e 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_POWERCAP)	+= powercap_sys.o
 obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o
+obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
new file mode 100644
index 000000000000..24ff2a068978
--- /dev/null
+++ b/drivers/powercap/idle_inject.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The idle injection framework provides a way to force CPUs to enter idle
+ * states for a specified fraction of time over a specified period.
+ *
+ * It relies on the smpboot kthreads feature providing common code for CPU
+ * hotplug and thread [un]parking.
+ *
+ * All of the kthreads used for idle injection are created at init time.
+ *
+ * Next, the users of the the idle injection framework provide a cpumask via
+ * its register function. The kthreads will be synchronized with respect to
+ * this cpumask.
+ *
+ * The idle + run duration is specified via separate helpers and that allows
+ * idle injection to be started.
+ *
+ * The idle injection kthreads will call play_idle() with the idle duration
+ * specified as per the above.
+ *
+ * After all of them have been woken up, a timer is set to start the next idle
+ * injection cycle.
+ *
+ * The timer interrupt handler will wake up the idle injection kthreads for
+ * all of the CPUs in the cpumask provided by the user.
+ *
+ * Idle injection is stopped synchronously and no leftover idle injection
+ * kthread activity after its completion is guaranteed.
+ *
+ * It is up to the user of this framework to provide a lock for higher-level
+ * synchronization to prevent race conditions like starting idle injection
+ * while unregistering from the framework.
+ */
+#define pr_fmt(fmt) "ii_dev: " fmt
+
+#include <linux/cpu.h>
+#include <linux/hrtimer.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
+#include <uapi/linux/sched/types.h>
+
+/**
+ * struct idle_inject_thread - task on/off switch structure
+ * @tsk: task injecting the idle cycles
+ * @should_run: whether or not to run the task (for the smpboot kthread API)
+ */
+struct idle_inject_thread {
+	struct task_struct *tsk;
+	int should_run;
+};
+
+/**
+ * struct idle_inject_device - idle injection data
+ * @timer: idle injection period timer
+ * @idle_duration_ms: duration of CPU idle time to inject
+ * @run_duration_ms: duration of CPU run time to allow
+ * @cpumask: mask of CPUs affected by idle injection
+ */
+struct idle_inject_device {
+	struct hrtimer timer;
+	unsigned int idle_duration_ms;
+	unsigned int run_duration_ms;
+	unsigned long int cpumask[0];
+};
+
+static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread);
+static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device);
+
+/**
+ * idle_inject_wakeup - Wake up idle injection threads
+ * @ii_dev: target idle injection device
+ *
+ * Every idle injection task associated with the given idle injection device
+ * and running on an online CPU will be woken up.
+ */
+static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
+{
+	struct idle_inject_thread *iit;
+	unsigned int cpu;
+
+	for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) {
+		iit = per_cpu_ptr(&idle_inject_thread, cpu);
+		iit->should_run = 1;
+		wake_up_process(iit->tsk);
+	}
+}
+
+/**
+ * idle_inject_timer_fn - idle injection timer function
+ * @timer: idle injection hrtimer
+ *
+ * This function is called when the idle injection timer expires.  It wakes up
+ * idle injection tasks associated with the timer and they, in turn, invoke
+ * play_idle() to inject a specified amount of CPU idle time.
+ *
+ * Return: HRTIMER_RESTART.
+ */
+static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
+{
+	unsigned int duration_ms;
+	struct idle_inject_device *ii_dev =
+		container_of(timer, struct idle_inject_device, timer);
+
+	duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+	duration_ms += READ_ONCE(ii_dev->idle_duration_ms);
+
+	idle_inject_wakeup(ii_dev);
+
+	hrtimer_forward_now(timer, ms_to_ktime(duration_ms));
+
+	return HRTIMER_RESTART;
+}
+
+/**
+ * idle_inject_fn - idle injection work function
+ * @cpu: the CPU owning the task
+ *
+ * This function calls play_idle() to inject a specified amount of CPU idle
+ * time.
+ */
+static void idle_inject_fn(unsigned int cpu)
+{
+	struct idle_inject_device *ii_dev;
+	struct idle_inject_thread *iit;
+
+	ii_dev = per_cpu(idle_inject_device, cpu);
+	iit = per_cpu_ptr(&idle_inject_thread, cpu);
+
+	/*
+	 * Let the smpboot main loop know that the task should not run again.
+	 */
+	iit->should_run = 0;
+
+	play_idle(READ_ONCE(ii_dev->idle_duration_ms));
+}
+
+/**
+ * idle_inject_set_duration - idle and run duration update helper
+ * @run_duration_ms: CPU run time to allow in milliseconds
+ * @idle_duration_ms: CPU idle time to inject in milliseconds
+ */
+void idle_inject_set_duration(struct idle_inject_device *ii_dev,
+			      unsigned int run_duration_ms,
+			      unsigned int idle_duration_ms)
+{
+	if (run_duration_ms && idle_duration_ms) {
+		WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms);
+		WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms);
+	}
+}
+
+/**
+ * idle_inject_get_duration - idle and run duration retrieval helper
+ * @run_duration_ms: memory location to store the current CPU run time
+ * @idle_duration_ms: memory location to store the current CPU idle time
+ */
+void idle_inject_get_duration(struct idle_inject_device *ii_dev,
+			      unsigned int *run_duration_ms,
+			      unsigned int *idle_duration_ms)
+{
+	*run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+	*idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+}
+
+/**
+ * idle_inject_start - start idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function starts idle injection by first waking up all of the idle
+ * injection kthreads associated with @ii_dev to let them inject CPU idle time
+ * sets up a timer to start the next idle injection period.
+ *
+ * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success.
+ */
+int idle_inject_start(struct idle_inject_device *ii_dev)
+{
+	unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+	unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+
+	if (!idle_duration_ms || !run_duration_ms)
+		return -EINVAL;
+
+	pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
+		 cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+	idle_inject_wakeup(ii_dev);
+
+	hrtimer_start(&ii_dev->timer,
+		      ms_to_ktime(idle_duration_ms + run_duration_ms),
+		      HRTIMER_MODE_REL);
+
+	return 0;
+}
+
+/**
+ * idle_inject_stop - stops idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function stops idle injection and waits for the threads to finish work.
+ * If CPU idle time is being injected when this function runs, then it will
+ * wait until the end of the cycle.
+ *
+ * When it returns, there is no more idle injection kthread activity.  The
+ * kthreads are scheduled out and the periodic timer is off.
+ */
+void idle_inject_stop(struct idle_inject_device *ii_dev)
+{
+	struct idle_inject_thread *iit;
+	unsigned int cpu;
+
+	pr_debug("Stopping idle injection on CPUs '%*pbl'\n",
+		 cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+	hrtimer_cancel(&ii_dev->timer);
+
+	/*
+	 * Stopping idle injection requires all of the idle injection kthreads
+	 * associated with the given cpumask to be parked and stay that way, so
+	 * prevent CPUs from going online at this point.  Any CPUs going online
+	 * after the loop below will be covered by clearing the should_run flag
+	 * that will cause the smpboot main loop to schedule them out.
+	 */
+	cpu_hotplug_disable();
+
+	/*
+	 * Iterate over all (online + offline) CPUs here in case one of them
+	 * goes offline with the should_run flag set so as to prevent its idle
+	 * injection kthread from running when the CPU goes online again after
+	 * the ii_dev has been freed.
+	 */
+	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+		iit = per_cpu_ptr(&idle_inject_thread, cpu);
+		iit->should_run = 0;
+
+		wait_task_inactive(iit->tsk, 0);
+	}
+
+	cpu_hotplug_enable();
+}
+
+/**
+ * idle_inject_setup - prepare the current task for idle injection
+ * @cpu: not used
+ *
+ * Called once, this function is in charge of setting the current task's
+ * scheduler parameters to make it an RT task.
+ */
+static void idle_inject_setup(unsigned int cpu)
+{
+	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
+
+	sched_setscheduler(current, SCHED_FIFO, &param);
+}
+
+/**
+ * idle_inject_should_run - function helper for the smpboot API
+ * @cpu: CPU the kthread is running on
+ *
+ * Return: whether or not the thread can run.
+ */
+static int idle_inject_should_run(unsigned int cpu)
+{
+	struct idle_inject_thread *iit =
+		per_cpu_ptr(&idle_inject_thread, cpu);
+
+	return iit->should_run;
+}
+
+/**
+ * idle_inject_register - initialize idle injection on a set of CPUs
+ * @cpumask: CPUs to be affected by idle injection
+ *
+ * This function creates an idle injection control device structure for the
+ * given set of CPUs and initializes the timer associated with it.  It does not
+ * start any injection cycles.
+ *
+ * Return: NULL if memory allocation fails, idle injection control device
+ * pointer on success.
+ */
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
+{
+	struct idle_inject_device *ii_dev;
+	int cpu, cpu_rb;
+
+	ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL);
+	if (!ii_dev)
+		return NULL;
+
+	cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);
+	hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ii_dev->timer.function = idle_inject_timer_fn;
+
+	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+
+		if (per_cpu(idle_inject_device, cpu)) {
+			pr_err("cpu%d is already registered\n", cpu);
+			goto out_rollback;
+		}
+
+		per_cpu(idle_inject_device, cpu) = ii_dev;
+	}
+
+	return ii_dev;
+
+out_rollback:
+	for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) {
+		if (cpu == cpu_rb)
+			break;
+		per_cpu(idle_inject_device, cpu_rb) = NULL;
+	}
+
+	kfree(ii_dev);
+
+	return NULL;
+}
+
+/**
+ * idle_inject_unregister - unregister idle injection control device
+ * @ii_dev: idle injection control device to unregister
+ *
+ * The function stops idle injection for the given control device,
+ * unregisters its kthreads and frees memory allocated when that device was
+ * created.
+ */
+void idle_inject_unregister(struct idle_inject_device *ii_dev)
+{
+	unsigned int cpu;
+
+	idle_inject_stop(ii_dev);
+
+	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask))
+		per_cpu(idle_inject_device, cpu) = NULL;
+
+	kfree(ii_dev);
+}
+
+static struct smp_hotplug_thread idle_inject_threads = {
+	.store = &idle_inject_thread.tsk,
+	.setup = idle_inject_setup,
+	.thread_fn = idle_inject_fn,
+	.thread_comm = "idle_inject/%u",
+	.thread_should_run = idle_inject_should_run,
+};
+
+static int __init idle_inject_init(void)
+{
+	return smpboot_register_percpu_thread(&idle_inject_threads);
+}
+early_initcall(idle_inject_init);
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
new file mode 100644
index 000000000000..bdc0293fb6cb
--- /dev/null
+++ b/include/linux/idle_inject.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ */
+#ifndef __IDLE_INJECT_H__
+#define __IDLE_INJECT_H__
+
+/* private idle injection device structure */
+struct idle_inject_device;
+
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask);
+
+void idle_inject_unregister(struct idle_inject_device *ii_dev);
+
+int idle_inject_start(struct idle_inject_device *ii_dev);
+
+void idle_inject_stop(struct idle_inject_device *ii_dev);
+
+void idle_inject_set_duration(struct idle_inject_device *ii_dev,
+				 unsigned int run_duration_ms,
+				 unsigned int idle_duration_ms);
+
+void idle_inject_get_duration(struct idle_inject_device *ii_dev,
+				 unsigned int *run_duration_ms,
+				 unsigned int *idle_duration_ms);
+#endif /* __IDLE_INJECT_H__ */
-- 
cgit v1.2.3


From 603d4cf8fe095b1ee78f423d514427be507fb513 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Sat, 30 Jun 2018 17:38:55 +0200
Subject: net: fix use-after-free in GRO with ESP

Since the addition of GRO for ESP, gro_receive can consume the skb and
return -EINPROGRESS. In that case, the lower layer GRO handler cannot
touch the skb anymore.

Commit 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") converted
some of the gro_receive handlers that can lead to ESP's gro_receive so
that they wouldn't access the skb when -EINPROGRESS is returned, but
missed other spots, mainly in tunneling protocols.

This patch finishes the conversion to using skb_gro_flush_final(), and
adds a new helper, skb_gro_flush_final_remcsum(), used in VXLAN and
GUE.

Fixes: 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c      |  2 +-
 drivers/net/vxlan.c       |  4 +---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 net/8021q/vlan.c          |  2 +-
 net/ipv4/fou.c            |  4 +---
 net/ipv4/gre_offload.c    |  2 +-
 net/ipv4/udp_offload.c    |  2 +-
 7 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 750eaa53bf0c..ada33c2d9ac2 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -476,7 +476,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index aee0e60471f1..f6bb1d54d4bd 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -623,9 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
 	flush = 0;
 
 out:
-	skb_gro_remcsum_cleanup(skb, &grc);
-	skb->remcsum_offload = 0;
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
 
 	return pp;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec9850c7936..3d0cc0b5cec2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2789,11 +2789,31 @@ static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp,
 	if (PTR_ERR(pp) != -EINPROGRESS)
 		NAPI_GRO_CB(skb)->flush |= flush;
 }
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff **pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	if (PTR_ERR(pp) != -EINPROGRESS) {
+		NAPI_GRO_CB(skb)->flush |= flush;
+		skb_gro_remcsum_cleanup(skb, grc);
+		skb->remcsum_offload = 0;
+	}
+}
 #else
 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
 {
 	NAPI_GRO_CB(skb)->flush |= flush;
 }
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff **pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_remcsum_cleanup(skb, grc);
+	skb->remcsum_offload = 0;
+}
 #endif
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..8ccee3d01822 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..c9ec1603666b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -448,9 +448,7 @@ next_proto:
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
-	skb_gro_remcsum_cleanup(skb, &grc);
-	skb->remcsum_offload = 0;
+	skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
 
 	return pp;
 }
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..6a7d980105f6 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..69c54540d5b4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -394,7 +394,7 @@ unflush:
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 	return pp;
 }
 EXPORT_SYMBOL(udp_gro_receive);
-- 
cgit v1.2.3


From a7ca13826e478f9b201eb2f9f20de0b978a82ad9 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 29 Jun 2018 14:11:19 +1000
Subject: gpio: aspeed: Add interfaces for co-processor to grab GPIOs

On the Aspeed chip, the GPIOs can be under control of the ARM
chip or of the ColdFire coprocessor. (There's a third command
source, the LPC bus, which we don't use or support yet).

The control of which master is allowed to modify a given
GPIO is per-bank (8 GPIOs).

Unfortunately, systems already exist for which we want to
use GPIOs of both sources in the same bank.

This provides an API exported by the gpio-aspeed driver
that an aspeed coprocessor driver can use to "grab" some
GPIOs for use by the coprocessor, and allow the coprocessor
driver to provide callbacks for arbitrating access.

Once at least one GPIO of a given bank has been "grabbed"
by the coprocessor, the entire bank is marked as being
under coprocessor control. It's command source is switched
to the coprocessor.

If the ARM then tries to write to a GPIO in such a marked bank,
the provided callbacks are used to request access from the
coprocessor driver, which is responsible to doing whatever
is necessary to "pause" the coprocessor or prevent it from
trying to use the GPIOs while the ARM is doing its accesses.

During that time, the command source for the bank is temporarily
switched back to the ARM.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-aspeed.c  | 251 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/gpio/aspeed.h |  15 +++
 2 files changed, 246 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/gpio/aspeed.h

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index b3968f66b1d2..1e00f4045f9d 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -12,6 +12,7 @@
 #include <asm/div64.h>
 #include <linux/clk.h>
 #include <linux/gpio/driver.h>
+#include <linux/gpio/aspeed.h>
 #include <linux/hashtable.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -22,6 +23,15 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
+/*
+ * These two headers aren't meant to be used by GPIO drivers. We need
+ * them in order to access gpio_chip_hwgpio() which we need to implement
+ * the aspeed specific API which allows the coprocessor to request
+ * access to some GPIOs and to arbitrate between coprocessor and ARM.
+ */
+#include <linux/gpio/consumer.h>
+#include "gpiolib.h"
+
 struct aspeed_bank_props {
 	unsigned int bank;
 	u32 input;
@@ -56,6 +66,7 @@ struct aspeed_gpio {
 	struct clk *clk;
 
 	u32 *dcache;
+	u8 *cf_copro_bankmap;
 };
 
 struct aspeed_gpio_bank {
@@ -83,6 +94,9 @@ struct aspeed_gpio_bank {
 
 static const int debounce_timers[4] = { 0x00, 0x50, 0x54, 0x58 };
 
+static const struct aspeed_gpio_copro_ops *copro_ops;
+static void *copro_data;
+
 static const struct aspeed_gpio_bank aspeed_gpio_banks[] = {
 	{
 		.val_regs = 0x0000,
@@ -323,6 +337,50 @@ static void aspeed_gpio_change_cmd_source(struct aspeed_gpio *gpio,
 	iowrite32(reg, c0);
 }
 
+static bool aspeed_gpio_copro_request(struct aspeed_gpio *gpio,
+				      unsigned int offset)
+{
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+
+	if (!copro_ops || !gpio->cf_copro_bankmap)
+		return false;
+	if (!gpio->cf_copro_bankmap[offset >> 3])
+		return false;
+	if (!copro_ops->request_access)
+		return false;
+
+	/* Pause the coprocessor */
+	copro_ops->request_access(copro_data);
+
+	/* Change command source back to ARM */
+	aspeed_gpio_change_cmd_source(gpio, bank, offset >> 3, GPIO_CMDSRC_ARM);
+
+	/* Update cache */
+	gpio->dcache[GPIO_BANK(offset)] = ioread32(bank_reg(gpio, bank, reg_rdata));
+
+	return true;
+}
+
+static void aspeed_gpio_copro_release(struct aspeed_gpio *gpio,
+				      unsigned int offset)
+{
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+
+	if (!copro_ops || !gpio->cf_copro_bankmap)
+		return;
+	if (!gpio->cf_copro_bankmap[offset >> 3])
+		return;
+	if (!copro_ops->release_access)
+		return;
+
+	/* Change command source back to ColdFire */
+	aspeed_gpio_change_cmd_source(gpio, bank, offset >> 3,
+				      GPIO_CMDSRC_COLDFIRE);
+
+	/* Restart the coprocessor */
+	copro_ops->release_access(copro_data);
+}
+
 static int aspeed_gpio_get(struct gpio_chip *gc, unsigned int offset)
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
@@ -356,11 +414,15 @@ static void aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset,
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
 	unsigned long flags;
+	bool copro;
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
 
 	__aspeed_gpio_set(gc, offset, val);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 }
 
@@ -368,7 +430,9 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
 	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	void __iomem *addr = bank_reg(gpio, bank, reg_dir);
 	unsigned long flags;
+	bool copro;
 	u32 reg;
 
 	if (!have_input(gpio, offset))
@@ -376,8 +440,13 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
 
 	spin_lock_irqsave(&gpio->lock, flags);
 
-	reg = ioread32(bank_reg(gpio, bank, reg_dir));
-	iowrite32(reg & ~GPIO_BIT(offset), bank_reg(gpio, bank, reg_dir));
+	reg = ioread32(addr);
+	reg &= ~GPIO_BIT(offset);
+
+	copro = aspeed_gpio_copro_request(gpio, offset);
+	iowrite32(reg, addr);
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
@@ -389,7 +458,9 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc,
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
 	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	void __iomem *addr = bank_reg(gpio, bank, reg_dir);
 	unsigned long flags;
+	bool copro;
 	u32 reg;
 
 	if (!have_output(gpio, offset))
@@ -397,10 +468,15 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc,
 
 	spin_lock_irqsave(&gpio->lock, flags);
 
+	reg = ioread32(addr);
+	reg |= GPIO_BIT(offset);
+
+	copro = aspeed_gpio_copro_request(gpio, offset);
 	__aspeed_gpio_set(gc, offset, val);
-	reg = ioread32(bank_reg(gpio, bank, reg_dir));
-	iowrite32(reg | GPIO_BIT(offset), bank_reg(gpio, bank, reg_dir));
+	iowrite32(reg, addr);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
 	return 0;
@@ -430,24 +506,23 @@ static int aspeed_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
 }
 
 static inline int irqd_to_aspeed_gpio_data(struct irq_data *d,
-		struct aspeed_gpio **gpio,
-		const struct aspeed_gpio_bank **bank,
-		u32 *bit)
+					   struct aspeed_gpio **gpio,
+					   const struct aspeed_gpio_bank **bank,
+					   u32 *bit, int *offset)
 {
-	int offset;
 	struct aspeed_gpio *internal;
 
-	offset = irqd_to_hwirq(d);
+	*offset = irqd_to_hwirq(d);
 
 	internal = irq_data_get_irq_chip_data(d);
 
 	/* This might be a bit of a questionable place to check */
-	if (!have_irq(internal, offset))
+	if (!have_irq(internal, *offset))
 		return -ENOTSUPP;
 
 	*gpio = internal;
-	*bank = to_bank(offset);
-	*bit = GPIO_BIT(offset);
+	*bank = to_bank(*offset);
+	*bit = GPIO_BIT(*offset);
 
 	return 0;
 }
@@ -458,17 +533,23 @@ static void aspeed_gpio_irq_ack(struct irq_data *d)
 	struct aspeed_gpio *gpio;
 	unsigned long flags;
 	void __iomem *status_addr;
+	int rc, offset;
+	bool copro;
 	u32 bit;
-	int rc;
 
-	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit);
+	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit, &offset);
 	if (rc)
 		return;
 
 	status_addr = bank_reg(gpio, bank, reg_irq_status);
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
+
 	iowrite32(bit, status_addr);
+
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 }
 
@@ -479,15 +560,17 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
 	unsigned long flags;
 	u32 reg, bit;
 	void __iomem *addr;
-	int rc;
+	int rc, offset;
+	bool copro;
 
-	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit);
+	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit, &offset);
 	if (rc)
 		return;
 
 	addr = bank_reg(gpio, bank, reg_irq_enable);
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
 
 	reg = ioread32(addr);
 	if (set)
@@ -496,6 +579,8 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
 		reg &= ~bit;
 	iowrite32(reg, addr);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 }
 
@@ -520,9 +605,10 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
 	struct aspeed_gpio *gpio;
 	unsigned long flags;
 	void __iomem *addr;
-	int rc;
+	int rc, offset;
+	bool copro;
 
-	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit);
+	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit, &offset);
 	if (rc)
 		return -EINVAL;
 
@@ -548,6 +634,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
 	}
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
 
 	addr = bank_reg(gpio, bank, reg_irq_type0);
 	reg = ioread32(addr);
@@ -564,6 +651,8 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
 	reg = (reg & ~bit) | type2;
 	iowrite32(reg, addr);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
 	irq_set_handler_locked(d, handler);
@@ -658,11 +747,14 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip,
 	struct aspeed_gpio *gpio = gpiochip_get_data(chip);
 	unsigned long flags;
 	void __iomem *treg;
+	bool copro;
 	u32 val;
 
 	treg = bank_reg(gpio, to_bank(offset), reg_tolerance);
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
+
 	val = readl(treg);
 
 	if (enable)
@@ -671,6 +763,9 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip,
 		val &= ~GPIO_BIT(offset);
 
 	writel(val, treg);
+
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
 	return 0;
@@ -766,6 +861,9 @@ static void configure_timer(struct aspeed_gpio *gpio, unsigned int offset,
 	void __iomem *addr;
 	u32 val;
 
+	/* Note: Debounce timer isn't under control of the command
+	 * source registers, so no need to sync with the coprocessor
+	 */
 	addr = bank_reg(gpio, bank, reg_debounce_sel1);
 	val = ioread32(addr);
 	iowrite32((val & ~mask) | GPIO_SET_DEBOUNCE1(timer, offset), addr);
@@ -912,6 +1010,111 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
 	return -ENOTSUPP;
 }
 
+/**
+ * aspeed_gpio_copro_set_ops - Sets the callbacks used for handhsaking with
+ *                             the coprocessor for shared GPIO banks
+ * @ops: The callbacks
+ * @data: Pointer passed back to the callbacks
+ */
+int aspeed_gpio_copro_set_ops(const struct aspeed_gpio_copro_ops *ops, void *data)
+{
+	copro_data = data;
+	copro_ops = ops;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(aspeed_gpio_copro_set_ops);
+
+/**
+ * aspeed_gpio_copro_grab_gpio - Mark a GPIO used by the coprocessor. The entire
+ *                               bank gets marked and any access from the ARM will
+ *                               result in handshaking via callbacks.
+ * @desc: The GPIO to be marked
+ * @vreg_offset: If non-NULL, returns the value register offset in the GPIO space
+ * @dreg_offset: If non-NULL, returns the data latch register offset in the GPIO space
+ * @bit: If non-NULL, returns the bit number of the GPIO in the registers
+ */
+int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc,
+				u16 *vreg_offset, u16 *dreg_offset, u8 *bit)
+{
+	struct gpio_chip *chip = gpiod_to_chip(desc);
+	struct aspeed_gpio *gpio = gpiochip_get_data(chip);
+	int rc = 0, bindex, offset = gpio_chip_hwgpio(desc);
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	unsigned long flags;
+
+	if (!gpio->cf_copro_bankmap)
+		gpio->cf_copro_bankmap = kzalloc(gpio->config->nr_gpios >> 3, GFP_KERNEL);
+	if (!gpio->cf_copro_bankmap)
+		return -ENOMEM;
+	if (offset < 0 || offset > gpio->config->nr_gpios)
+		return -EINVAL;
+	bindex = offset >> 3;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	/* Sanity check, this shouldn't happen */
+	if (gpio->cf_copro_bankmap[bindex] == 0xff) {
+		rc = -EIO;
+		goto bail;
+	}
+	gpio->cf_copro_bankmap[bindex]++;
+
+	/* Switch command source */
+	if (gpio->cf_copro_bankmap[bindex] == 1)
+		aspeed_gpio_change_cmd_source(gpio, bank, bindex,
+					      GPIO_CMDSRC_COLDFIRE);
+
+	if (vreg_offset)
+		*vreg_offset = bank->val_regs;
+	if (dreg_offset)
+		*dreg_offset = bank->rdata_reg;
+	if (bit)
+		*bit = GPIO_OFFSET(offset);
+ bail:
+	spin_unlock_irqrestore(&gpio->lock, flags);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(aspeed_gpio_copro_grab_gpio);
+
+/**
+ * aspeed_gpio_copro_release_gpio - Unmark a GPIO used by the coprocessor.
+ * @desc: The GPIO to be marked
+ */
+int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc)
+{
+	struct gpio_chip *chip = gpiod_to_chip(desc);
+	struct aspeed_gpio *gpio = gpiochip_get_data(chip);
+	int rc = 0, bindex, offset = gpio_chip_hwgpio(desc);
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	unsigned long flags;
+
+	if (!gpio->cf_copro_bankmap)
+		return -ENXIO;
+
+	if (offset < 0 || offset > gpio->config->nr_gpios)
+		return -EINVAL;
+	bindex = offset >> 3;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	/* Sanity check, this shouldn't happen */
+	if (gpio->cf_copro_bankmap[bindex] == 0) {
+		rc = -EIO;
+		goto bail;
+	}
+	gpio->cf_copro_bankmap[bindex]--;
+
+	/* Switch command source */
+	if (gpio->cf_copro_bankmap[bindex] == 0)
+		aspeed_gpio_change_cmd_source(gpio, bank, bindex,
+					      GPIO_CMDSRC_ARM);
+ bail:
+	spin_unlock_irqrestore(&gpio->lock, flags);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(aspeed_gpio_copro_release_gpio);
+
 /*
  * Any banks not specified in a struct aspeed_bank_props array are assumed to
  * have the properties:
@@ -1002,10 +1205,18 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
 	if (!gpio->dcache)
 		return -ENOMEM;
 
-	/* Populate it with initial values read from the HW */
+	/*
+	 * Populate it with initial values read from the HW and switch
+	 * all command sources to the ARM by default
+	 */
 	for (i = 0; i < banks; i++) {
-		void __iomem *addr = bank_reg(gpio, &aspeed_gpio_banks[i], reg_rdata);
+		const struct aspeed_gpio_bank *bank = &aspeed_gpio_banks[i];
+		void __iomem *addr = bank_reg(gpio, bank, reg_rdata);
 		gpio->dcache[i] = ioread32(addr);
+		aspeed_gpio_change_cmd_source(gpio, bank, 0, GPIO_CMDSRC_ARM);
+		aspeed_gpio_change_cmd_source(gpio, bank, 1, GPIO_CMDSRC_ARM);
+		aspeed_gpio_change_cmd_source(gpio, bank, 2, GPIO_CMDSRC_ARM);
+		aspeed_gpio_change_cmd_source(gpio, bank, 3, GPIO_CMDSRC_ARM);
 	}
 
 	rc = devm_gpiochip_add_data(&pdev->dev, &gpio->chip, gpio);
diff --git a/include/linux/gpio/aspeed.h b/include/linux/gpio/aspeed.h
new file mode 100644
index 000000000000..1bfb3cdc86d0
--- /dev/null
+++ b/include/linux/gpio/aspeed.h
@@ -0,0 +1,15 @@
+#ifndef __GPIO_ASPEED_H
+#define __GPIO_ASPEED_H
+
+struct aspeed_gpio_copro_ops {
+	int (*request_access)(void *data);
+	int (*release_access)(void *data);
+};
+
+int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc,
+				u16 *vreg_offset, u16 *dreg_offset, u8 *bit);
+int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc);
+int aspeed_gpio_copro_set_ops(const struct aspeed_gpio_copro_ops *ops, void *data);
+
+
+#endif /* __GPIO_ASPEED_H */
-- 
cgit v1.2.3


From 240630e61870e62e39a97225048f9945848fa5f5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Jul 2018 12:15:46 +0200
Subject: ahci: Disable LPM on Lenovo 50 series laptops with a too old BIOS

There have been several reports of LPM related hard freezes about once
a day on multiple Lenovo 50 series models. Strange enough these reports
where not disk model specific as LPM issues usually are and some users
with the exact same disk + laptop where seeing them while other users
where not seeing these issues.

It turns out that enabling LPM triggers a firmware bug somewhere, which
has been fixed in later BIOS versions.

This commit adds a new ahci_broken_lpm() function and a new ATA_FLAG_NO_LPM
for dealing with this.

The ahci_broken_lpm() function contains DMI match info for the 4 models
which are known to be affected by this and the DMI BIOS date field for
known good BIOS versions. If the BIOS date is older then the one in the
table LPM will be disabled and a warning will be printed.

Note the BIOS dates are for known good versions, some older versions may
work too, but we don't know for sure, the table is using dates from BIOS
versions for which users have confirmed that upgrading to that version
makes the problem go away.

Unfortunately I've been unable to get hold of the reporter who reported
that BIOS version 2.35 fixed the problems on the W541 for him. I've been
able to verify the DMI_SYS_VENDOR and DMI_PRODUCT_VERSION from an older
dmidecode, but I don't know the exact BIOS date as reported in the DMI.
Lenovo keeps a changelog with dates in their release notes, but the
dates there are the release dates not the build dates which are in DMI.
So I've chosen to set the date to which we compare to one day past the
release date of the 2.34 BIOS. I plan to fix this with a follow up
commit once I've the necessary info.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci.c        | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata-core.c |  3 +++
 include/linux/libata.h    |  1 +
 3 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 738fb22978dd..fdeb3b4d0f4a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1280,6 +1280,59 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
 	return strcmp(buf, dmi->driver_data) < 0;
 }
 
+static bool ahci_broken_lpm(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		/* Various Lenovo 50 series have LPM issues with older BIOSen */
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X250"),
+			},
+			.driver_data = "20180406", /* 1.31 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L450"),
+			},
+			.driver_data = "20180420", /* 1.28 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T450s"),
+			},
+			.driver_data = "20180315", /* 1.33 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"),
+			},
+			/*
+			 * Note date based on release notes, 2.35 has been
+			 * reported to be good, but I've been unable to get
+			 * a hold of the reporter to get the DMI BIOS date.
+			 * TODO: fix this.
+			 */
+			.driver_data = "20180310", /* 2.35 */
+		},
+		{ }	/* terminate list */
+	};
+	const struct dmi_system_id *dmi = dmi_first_match(sysids);
+	int year, month, date;
+	char buf[9];
+
+	if (!dmi)
+		return false;
+
+	dmi_get_date(DMI_BIOS_DATE, &year, &month, &date);
+	snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date);
+
+	return strcmp(buf, dmi->driver_data) < 0;
+}
+
 static bool ahci_broken_online(struct pci_dev *pdev)
 {
 #define ENCODE_BUSDEVFN(bus, slot, func)			\
@@ -1694,6 +1747,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			"quirky BIOS, skipping spindown on poweroff\n");
 	}
 
+	if (ahci_broken_lpm(pdev)) {
+		pi.flags |= ATA_FLAG_NO_LPM;
+		dev_warn(&pdev->dev,
+			 "BIOS update required for Link Power Management support\n");
+	}
+
 	if (ahci_broken_suspend(pdev)) {
 		hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
 		dev_warn(&pdev->dev,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 27d15ed7fa3d..cc71c63df381 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2493,6 +2493,9 @@ int ata_dev_configure(struct ata_device *dev)
 	    (id[ATA_ID_SATA_CAPABILITY] & 0xe) == 0x2)
 		dev->horkage |= ATA_HORKAGE_NOLPM;
 
+	if (ap->flags & ATA_FLAG_NO_LPM)
+		dev->horkage |= ATA_HORKAGE_NOLPM;
+
 	if (dev->horkage & ATA_HORKAGE_NOLPM) {
 		ata_dev_warn(dev, "LPM support broken, forcing max_power\n");
 		dev->link->ap->target_lpm_policy = ATA_LPM_MAX_POWER;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index a2257e380789..32f247cb5e9e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -210,6 +210,7 @@ enum {
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
 	ATA_FLAG_SATA		= (1 << 1),
+	ATA_FLAG_NO_LPM		= (1 << 2), /* host not happy with LPM */
 	ATA_FLAG_NO_LOG_PAGE	= (1 << 5), /* do not issue log page read */
 	ATA_FLAG_NO_ATAPI	= (1 << 6), /* No ATAPI support */
 	ATA_FLAG_PIO_DMA	= (1 << 7), /* PIO cmds via DMA */
-- 
cgit v1.2.3


From 93dd2112c7b2fa5512cc4aff2c449420487fcb68 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:48 +0300
Subject: usb: typec: mux: Get the mux identifier from function parameter

In order for the muxes to be usable with alternate modes,
the alternate mode devices will need also to be able to get
a handle to the muxes on top of the port devices. To make
that possible, the muxes need to be possible to request with
an identifier.

This will change the API so that the mux identifier is given
as a function parameter to typec_mux_get(), and the hard-coded
"typec-mux" is replaced with that value.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c     | 2 +-
 drivers/usb/typec/mux.c       | 6 +++---
 include/linux/usb/typec_mux.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 3ef7b99b080f..784e928303d7 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1407,7 +1407,7 @@ struct typec_port *typec_register_port(struct device *parent,
 		goto err_switch;
 	}
 
-	port->mux = typec_mux_get(cap->fwnode ? &port->dev : parent);
+	port->mux = typec_mux_get(parent, "typec-mux");
 	if (IS_ERR(port->mux)) {
 		ret = PTR_ERR(port->mux);
 		goto err_mux;
diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index 9d8330e9c431..ddaac63ecf12 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -123,19 +123,19 @@ static void *typec_mux_match(struct device_connection *con, int ep, void *data)
 /**
  * typec_mux_get - Find USB Type-C Multiplexer
  * @dev: The caller device
+ * @name: Mux identifier
  *
  * Finds a mux linked to the caller. This function is primarily meant for the
  * Type-C drivers. Returns a reference to the mux on success, NULL if no
  * matching connection was found, or ERR_PTR(-EPROBE_DEFER) when a connection
  * was found but the mux has not been enumerated yet.
  */
-struct typec_mux *typec_mux_get(struct device *dev)
+struct typec_mux *typec_mux_get(struct device *dev, const char *name)
 {
 	struct typec_mux *mux;
 
 	mutex_lock(&mux_lock);
-	mux = device_connection_find_match(dev, "typec-mux", NULL,
-					   typec_mux_match);
+	mux = device_connection_find_match(dev, name, NULL, typec_mux_match);
 	if (!IS_ERR_OR_NULL(mux))
 		get_device(mux->dev);
 	mutex_unlock(&mux_lock);
diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h
index 12c1b057834b..79293f630ee1 100644
--- a/include/linux/usb/typec_mux.h
+++ b/include/linux/usb/typec_mux.h
@@ -47,7 +47,7 @@ void typec_switch_put(struct typec_switch *sw);
 int typec_switch_register(struct typec_switch *sw);
 void typec_switch_unregister(struct typec_switch *sw);
 
-struct typec_mux *typec_mux_get(struct device *dev);
+struct typec_mux *typec_mux_get(struct device *dev, const char *name);
 void typec_mux_put(struct typec_mux *mux);
 int typec_mux_register(struct typec_mux *mux);
 void typec_mux_unregister(struct typec_mux *mux);
-- 
cgit v1.2.3


From 4ab8c18d4d67321cc7b660559de17511d4fc0237 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:49 +0300
Subject: usb: typec: Register a device for every mode

Before a device was created for every discovered SVID, but
this will create a device for every discovered mode of every
SVID. The idea is to make it easier to create mode specific
drivers once a bus for the alternate mode is added.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 172 +++++++++++++++-------------------------------
 drivers/usb/typec/tcpm.c  |  45 ++++++------
 include/linux/usb/typec.h |  37 +++-------
 3 files changed, 83 insertions(+), 171 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 784e928303d7..96dc9c4f73f0 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -13,31 +13,20 @@
 #include <linux/usb/typec.h>
 #include <linux/usb/typec_mux.h>
 
-struct typec_mode {
-	int				index;
+struct typec_altmode {
+	struct device			dev;
+	u16				svid;
+	u8				mode;
+
 	u32				vdo;
 	char				*desc;
 	enum typec_port_type		roles;
-
-	struct typec_altmode		*alt_mode;
-
 	unsigned int			active:1;
 
+	struct attribute		*attrs[5];
 	char				group_name[6];
 	struct attribute_group		group;
-	struct attribute		*attrs[5];
-	struct device_attribute		vdo_attr;
-	struct device_attribute		desc_attr;
-	struct device_attribute		active_attr;
-	struct device_attribute		roles_attr;
-};
-
-struct typec_altmode {
-	struct device			dev;
-	u16				svid;
-	int				n_modes;
-	struct typec_mode		modes[ALTMODE_MAX_MODES];
-	const struct attribute_group	*mode_groups[ALTMODE_MAX_MODES];
+	const struct attribute_group	*groups[2];
 };
 
 struct typec_plug {
@@ -177,23 +166,20 @@ static void typec_report_identity(struct device *dev)
 /**
  * typec_altmode_update_active - Report Enter/Exit mode
  * @alt: Handle to the alternate mode
- * @mode: Mode index
  * @active: True when the mode has been entered
  *
  * If a partner or cable plug executes Enter/Exit Mode command successfully, the
  * drivers use this routine to report the updated state of the mode.
  */
-void typec_altmode_update_active(struct typec_altmode *alt, int mode,
-				 bool active)
+void typec_altmode_update_active(struct typec_altmode *alt, bool active)
 {
-	struct typec_mode *m = &alt->modes[mode];
 	char dir[6];
 
-	if (m->active == active)
+	if (alt->active == active)
 		return;
 
-	m->active = active;
-	snprintf(dir, sizeof(dir), "mode%d", mode);
+	alt->active = active;
+	snprintf(dir, sizeof(dir), "mode%d", alt->mode);
 	sysfs_notify(&alt->dev.kobj, dir, "active");
 	kobject_uevent(&alt->dev.kobj, KOBJ_CHANGE);
 }
@@ -220,42 +206,36 @@ struct typec_port *typec_altmode2port(struct typec_altmode *alt)
 EXPORT_SYMBOL_GPL(typec_altmode2port);
 
 static ssize_t
-typec_altmode_vdo_show(struct device *dev, struct device_attribute *attr,
-		       char *buf)
+vdo_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       vdo_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 
-	return sprintf(buf, "0x%08x\n", mode->vdo);
+	return sprintf(buf, "0x%08x\n", alt->vdo);
 }
+static DEVICE_ATTR_RO(vdo);
 
 static ssize_t
-typec_altmode_desc_show(struct device *dev, struct device_attribute *attr,
-			char *buf)
+description_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       desc_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 
-	return sprintf(buf, "%s\n", mode->desc ? mode->desc : "");
+	return sprintf(buf, "%s\n", alt->desc ? alt->desc : "");
 }
+static DEVICE_ATTR_RO(description);
 
 static ssize_t
-typec_altmode_active_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+active_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       active_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 
-	return sprintf(buf, "%s\n", mode->active ? "yes" : "no");
+	return sprintf(buf, "%s\n", alt->active ? "yes" : "no");
 }
 
-static ssize_t
-typec_altmode_active_store(struct device *dev, struct device_attribute *attr,
-			   const char *buf, size_t size)
+static ssize_t active_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t size)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       active_attr);
-	struct typec_port *port = typec_altmode2port(mode->alt_mode);
+	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_port *port = typec_altmode2port(alt);
 	bool activate;
 	int ret;
 
@@ -266,22 +246,22 @@ typec_altmode_active_store(struct device *dev, struct device_attribute *attr,
 	if (ret)
 		return ret;
 
-	ret = port->cap->activate_mode(port->cap, mode->index, activate);
+	ret = port->cap->activate_mode(port->cap, alt->mode, activate);
 	if (ret)
 		return ret;
 
 	return size;
 }
+static DEVICE_ATTR_RW(active);
 
 static ssize_t
-typec_altmode_roles_show(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+supported_roles_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       roles_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 	ssize_t ret;
 
-	switch (mode->roles) {
+	switch (alt->roles) {
 	case TYPEC_PORT_SRC:
 		ret = sprintf(buf, "source\n");
 		break;
@@ -295,61 +275,13 @@ typec_altmode_roles_show(struct device *dev, struct device_attribute *attr,
 	}
 	return ret;
 }
+static DEVICE_ATTR_RO(supported_roles);
 
-static void typec_init_modes(struct typec_altmode *alt,
-			     const struct typec_mode_desc *desc, bool is_port)
+static void typec_altmode_release(struct device *dev)
 {
-	int i;
-
-	for (i = 0; i < alt->n_modes; i++, desc++) {
-		struct typec_mode *mode = &alt->modes[i];
-
-		/* Not considering the human readable description critical */
-		mode->desc = kstrdup(desc->desc, GFP_KERNEL);
-		if (desc->desc && !mode->desc)
-			dev_err(&alt->dev, "failed to copy mode%d desc\n", i);
-
-		mode->alt_mode = alt;
-		mode->vdo = desc->vdo;
-		mode->roles = desc->roles;
-		mode->index = desc->index;
-		sprintf(mode->group_name, "mode%d", desc->index);
-
-		sysfs_attr_init(&mode->vdo_attr.attr);
-		mode->vdo_attr.attr.name = "vdo";
-		mode->vdo_attr.attr.mode = 0444;
-		mode->vdo_attr.show = typec_altmode_vdo_show;
-
-		sysfs_attr_init(&mode->desc_attr.attr);
-		mode->desc_attr.attr.name = "description";
-		mode->desc_attr.attr.mode = 0444;
-		mode->desc_attr.show = typec_altmode_desc_show;
-
-		sysfs_attr_init(&mode->active_attr.attr);
-		mode->active_attr.attr.name = "active";
-		mode->active_attr.attr.mode = 0644;
-		mode->active_attr.show = typec_altmode_active_show;
-		mode->active_attr.store = typec_altmode_active_store;
-
-		mode->attrs[0] = &mode->vdo_attr.attr;
-		mode->attrs[1] = &mode->desc_attr.attr;
-		mode->attrs[2] = &mode->active_attr.attr;
-
-		/* With ports, list the roles that the mode is supported with */
-		if (is_port) {
-			sysfs_attr_init(&mode->roles_attr.attr);
-			mode->roles_attr.attr.name = "supported_roles";
-			mode->roles_attr.attr.mode = 0444;
-			mode->roles_attr.show = typec_altmode_roles_show;
-
-			mode->attrs[3] = &mode->roles_attr.attr;
-		}
-
-		mode->group.attrs = mode->attrs;
-		mode->group.name = mode->group_name;
+	struct typec_altmode *alt = to_altmode(dev);
 
-		alt->mode_groups[i] = &mode->group;
-	}
+	kfree(alt);
 }
 
 static ssize_t svid_show(struct device *dev, struct device_attribute *attr,
@@ -367,16 +299,6 @@ static struct attribute *typec_altmode_attrs[] = {
 };
 ATTRIBUTE_GROUPS(typec_altmode);
 
-static void typec_altmode_release(struct device *dev)
-{
-	struct typec_altmode *alt = to_altmode(dev);
-	int i;
-
-	for (i = 0; i < alt->n_modes; i++)
-		kfree(alt->modes[i].desc);
-	kfree(alt);
-}
-
 static const struct device_type typec_altmode_dev_type = {
 	.name = "typec_alternate_mode",
 	.groups = typec_altmode_groups,
@@ -395,13 +317,27 @@ typec_register_altmode(struct device *parent,
 		return ERR_PTR(-ENOMEM);
 
 	alt->svid = desc->svid;
-	alt->n_modes = desc->n_modes;
-	typec_init_modes(alt, desc->modes, is_typec_port(parent));
+	alt->mode = desc->mode;
+	alt->vdo = desc->vdo;
+	alt->roles = desc->roles;
+
+	alt->attrs[0] = &dev_attr_vdo.attr;
+	alt->attrs[1] = &dev_attr_description.attr;
+	alt->attrs[2] = &dev_attr_active.attr;
+
+	if (is_typec_port(parent))
+		alt->attrs[3] = &dev_attr_supported_roles.attr;
+
+	sprintf(alt->group_name, "mode%d", desc->mode);
+	alt->group.name = alt->group_name;
+	alt->group.attrs = alt->attrs;
+	alt->groups[0] = &alt->group;
 
 	alt->dev.parent = parent;
-	alt->dev.groups = alt->mode_groups;
+	alt->dev.groups = alt->groups;
 	alt->dev.type = &typec_altmode_dev_type;
-	dev_set_name(&alt->dev, "svid-%04x", alt->svid);
+	dev_set_name(&alt->dev, "%s-%04x:%u", dev_name(parent),
+		     alt->svid, alt->mode);
 
 	ret = device_register(&alt->dev);
 	if (ret) {
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index a18af298d96d..6b57e7132e64 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -310,8 +310,8 @@ struct tcpm_port {
 
 	/* Alternate mode data */
 	struct pd_mode_data mode_data;
-	struct typec_altmode *partner_altmode[SVID_DISCOVERY_MAX];
-	struct typec_altmode *port_altmode[SVID_DISCOVERY_MAX];
+	struct typec_altmode *partner_altmode[SVID_DISCOVERY_MAX * 6];
+	struct typec_altmode *port_altmode[SVID_DISCOVERY_MAX * 6];
 
 	/* Deadline in jiffies to exit src_try_wait state */
 	unsigned long max_wait;
@@ -995,7 +995,6 @@ static void svdm_consume_modes(struct tcpm_port *port, const __le32 *payload,
 {
 	struct pd_mode_data *pmdata = &port->mode_data;
 	struct typec_altmode_desc *paltmode;
-	struct typec_mode_desc *pmode;
 	int i;
 
 	if (pmdata->altmodes >= ARRAY_SIZE(port->partner_altmode)) {
@@ -1003,32 +1002,28 @@ static void svdm_consume_modes(struct tcpm_port *port, const __le32 *payload,
 		return;
 	}
 
-	paltmode = &pmdata->altmode_desc[pmdata->altmodes];
-	memset(paltmode, 0, sizeof(*paltmode));
+	for (i = 1; i < cnt; i++) {
+		paltmode = &pmdata->altmode_desc[pmdata->altmodes];
+		memset(paltmode, 0, sizeof(*paltmode));
 
-	paltmode->svid = pmdata->svids[pmdata->svid_index];
+		paltmode->svid = pmdata->svids[pmdata->svid_index];
+		paltmode->mode = i;
+		paltmode->vdo = le32_to_cpu(payload[i]);
 
-	tcpm_log(port, " Alternate mode %d: SVID 0x%04x",
-		 pmdata->altmodes, paltmode->svid);
+		tcpm_log(port, " Alternate mode %d: SVID 0x%04x, VDO %d: 0x%08x",
+			 pmdata->altmodes, paltmode->svid,
+			 paltmode->mode, paltmode->vdo);
 
-	for (i = 1; i < cnt && paltmode->n_modes < ALTMODE_MAX_MODES; i++) {
-		pmode = &paltmode->modes[paltmode->n_modes];
-		memset(pmode, 0, sizeof(*pmode));
-		pmode->vdo = le32_to_cpu(payload[i]);
-		pmode->index = i - 1;
-		paltmode->n_modes++;
-		tcpm_log(port, "  VDO %d: 0x%08x",
-			 pmode->index, pmode->vdo);
-	}
-	port->partner_altmode[pmdata->altmodes] =
-		typec_partner_register_altmode(port->partner, paltmode);
-	if (!port->partner_altmode[pmdata->altmodes]) {
-		tcpm_log(port,
-			 "Failed to register alternate modes for SVID 0x%04x",
-			 paltmode->svid);
-		return;
+		port->partner_altmode[pmdata->altmodes] =
+			typec_partner_register_altmode(port->partner, paltmode);
+		if (!port->partner_altmode[pmdata->altmodes]) {
+			tcpm_log(port,
+				 "Failed to register modes for SVID 0x%04x",
+				 paltmode->svid);
+			return;
+		}
+		pmdata->altmodes++;
 	}
-	pmdata->altmodes++;
 }
 
 #define supports_modal(port)	PD_IDH_MODAL_SUPP((port)->partner_ident.id_header)
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 6c33b53d1162..2dcb1683075f 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -93,41 +93,23 @@ int typec_partner_set_identity(struct typec_partner *partner);
 int typec_cable_set_identity(struct typec_cable *cable);
 
 /*
- * struct typec_mode_desc - Individual Mode of an Alternate Mode
- * @index: Index of the Mode within the SVID
+ * struct typec_altmode_desc - USB Type-C Alternate Mode Descriptor
+ * @svid: Standard or Vendor ID
+ * @mode: Index of the Mode
  * @vdo: VDO returned by Discover Modes USB PD command
- * @desc: Optional human readable description of the mode
  * @roles: Only for ports. DRP if the mode is available in both roles
  *
- * Description of a mode of an Alternate Mode which a connector, cable plug or
- * partner supports. Every mode will have it's own sysfs group. The details are
- * the VDO returned by discover modes command, description for the mode and
- * active flag telling has the mode being entered or not.
+ * Description of an Alternate Mode which a connector, cable plug or partner
+ * supports.
  */
-struct typec_mode_desc {
-	int			index;
+struct typec_altmode_desc {
+	u16			svid;
+	u8			mode;
 	u32			vdo;
-	char			*desc;
 	/* Only used with ports */
 	enum typec_port_type	roles;
 };
 
-/*
- * struct typec_altmode_desc - USB Type-C Alternate Mode Descriptor
- * @svid: Standard or Vendor ID
- * @n_modes: Number of modes
- * @modes: Array of modes supported by the Alternate Mode
- *
- * Representation of an Alternate Mode that has SVID assigned by USB-IF. The
- * array of modes will list the modes of a particular SVID that are supported by
- * a connector, partner of a cable plug.
- */
-struct typec_altmode_desc {
-	u16			svid;
-	int			n_modes;
-	struct typec_mode_desc	modes[ALTMODE_MAX_MODES];
-};
-
 struct typec_altmode
 *typec_partner_register_altmode(struct typec_partner *partner,
 				const struct typec_altmode_desc *desc);
@@ -141,8 +123,7 @@ void typec_unregister_altmode(struct typec_altmode *altmode);
 
 struct typec_port *typec_altmode2port(struct typec_altmode *alt);
 
-void typec_altmode_update_active(struct typec_altmode *alt, int mode,
-				 bool active);
+void typec_altmode_update_active(struct typec_altmode *alt, bool active);
 
 enum typec_plug_index {
 	TYPEC_PLUG_SOP_P,
-- 
cgit v1.2.3


From 8a37d87d72f0c69f837229c04d2fcd7117ea57e7 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:50 +0300
Subject: usb: typec: Bus type for alternate modes

Introducing a simple bus for the alternate modes. Bus allows
binding drivers to the discovered alternate modes the
partners support.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/obsolete/sysfs-class-typec |  48 ++++
 Documentation/ABI/testing/sysfs-bus-typec    |  51 ++++
 Documentation/ABI/testing/sysfs-class-typec  |  62 +----
 Documentation/driver-api/usb/typec_bus.rst   | 136 +++++++++
 MAINTAINERS                                  |  11 +-
 drivers/usb/typec/Makefile                   |   2 +-
 drivers/usb/typec/bus.c                      | 401 +++++++++++++++++++++++++++
 drivers/usb/typec/bus.h                      |  38 +++
 drivers/usb/typec/class.c                    | 366 +++++++++++++++++++-----
 include/linux/mod_devicetable.h              |  15 +
 include/linux/usb/typec.h                    |  14 +-
 include/linux/usb/typec_altmode.h            | 160 +++++++++++
 scripts/mod/devicetable-offsets.c            |   4 +
 scripts/mod/file2alias.c                     |  13 +
 14 files changed, 1174 insertions(+), 147 deletions(-)
 create mode 100644 Documentation/ABI/obsolete/sysfs-class-typec
 create mode 100644 Documentation/ABI/testing/sysfs-bus-typec
 create mode 100644 Documentation/driver-api/usb/typec_bus.rst
 create mode 100644 drivers/usb/typec/bus.c
 create mode 100644 drivers/usb/typec/bus.h
 create mode 100644 include/linux/usb/typec_altmode.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/obsolete/sysfs-class-typec b/Documentation/ABI/obsolete/sysfs-class-typec
new file mode 100644
index 000000000000..32623514ee87
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-class-typec
@@ -0,0 +1,48 @@
+These files are deprecated and will be removed. The same files are available
+under /sys/bus/typec (see Documentation/ABI/testing/sysfs-bus-typec).
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/svid
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		The SVID (Standard or Vendor ID) assigned by USB-IF for this
+		alternate mode.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Every supported mode will have its own directory. The name of
+		a mode will be "mode<index>" (for example mode1), where <index>
+		is the actual index to the mode VDO returned by Discover Modes
+		USB power delivery command.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/description
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows description of the mode. The description is optional for
+		the drivers, just like with the Billboard Devices.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/vdo
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows the VDO in hexadecimal returned by Discover Modes command
+		for this mode.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/active
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows if the mode is active or not. The attribute can be used
+		for entering/exiting the mode with partners and cable plugs, and
+		with the port alternate modes it can be used for disabling
+		support for specific alternate modes. Entering/exiting modes is
+		supported as synchronous operation so write(2) to the attribute
+		does not return until the enter/exit mode operation has
+		finished. The attribute is notified when the mode is
+		entered/exited so poll(2) on the attribute wakes up.
+		Entering/exiting a mode will also generate uevent KOBJ_CHANGE.
+
+		Valid values: yes, no
diff --git a/Documentation/ABI/testing/sysfs-bus-typec b/Documentation/ABI/testing/sysfs-bus-typec
new file mode 100644
index 000000000000..205d9c91e2e1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-typec
@@ -0,0 +1,51 @@
+What:		/sys/bus/typec/devices/.../active
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows if the mode is active or not. The attribute can be used
+		for entering/exiting the mode. Entering/exiting modes is
+		supported as synchronous operation so write(2) to the attribute
+		does not return until the enter/exit mode operation has
+		finished. The attribute is notified when the mode is
+		entered/exited so poll(2) on the attribute wakes up.
+		Entering/exiting a mode will also generate uevent KOBJ_CHANGE.
+
+		Valid values are boolean.
+
+What:		/sys/bus/typec/devices/.../description
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows description of the mode. The description is optional for
+		the drivers, just like with the Billboard Devices.
+
+What:		/sys/bus/typec/devices/.../mode
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		The index number of the mode returned by Discover Modes USB
+		Power Delivery command. Depending on the alternate mode, the
+		mode index may be significant.
+
+		With some alternate modes (SVIDs), the mode index is assigned
+		for specific functionality in the specification for that
+		alternate mode.
+
+		With other alternate modes, the mode index values are not
+		assigned, and can not be therefore used for identification. When
+		the mode index is not assigned, identifying the alternate mode
+		must be done with either mode VDO or the description.
+
+What:		/sys/bus/typec/devices/.../svid
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		The Standard or Vendor ID (SVID) assigned by USB-IF for this
+		alternate mode.
+
+What:		/sys/bus/typec/devices/.../vdo
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows the VDO in hexadecimal returned by Discover Modes command
+		for this mode.
diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec
index 5be552e255e9..d7647b258c3c 100644
--- a/Documentation/ABI/testing/sysfs-class-typec
+++ b/Documentation/ABI/testing/sysfs-class-typec
@@ -222,70 +222,12 @@ Description:
 		available. The value can be polled.
 
 
-Alternate Mode devices.
+USB Type-C port alternate mode devices.
 
-The alternate modes will have Standard or Vendor ID (SVID) assigned by USB-IF.
-The ports, partners and cable plugs can have alternate modes. A supported SVID
-will consist of a set of modes. Every SVID a port/partner/plug supports will
-have a device created for it, and every supported mode for a supported SVID will
-have its own directory under that device. Below <dev> refers to the device for
-the alternate mode.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/svid
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		The SVID (Standard or Vendor ID) assigned by USB-IF for this
-		alternate mode.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Every supported mode will have its own directory. The name of
-		a mode will be "mode<index>" (for example mode1), where <index>
-		is the actual index to the mode VDO returned by Discover Modes
-		USB power delivery command.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/description
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Shows description of the mode. The description is optional for
-		the drivers, just like with the Billboard Devices.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/vdo
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Shows the VDO in hexadecimal returned by Discover Modes command
-		for this mode.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/active
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Shows if the mode is active or not. The attribute can be used
-		for entering/exiting the mode with partners and cable plugs, and
-		with the port alternate modes it can be used for disabling
-		support for specific alternate modes. Entering/exiting modes is
-		supported as synchronous operation so write(2) to the attribute
-		does not return until the enter/exit mode operation has
-		finished. The attribute is notified when the mode is
-		entered/exited so poll(2) on the attribute wakes up.
-		Entering/exiting a mode will also generate uevent KOBJ_CHANGE.
-
-		Valid values: yes, no
-
-What:		/sys/class/typec/<port>/<dev>/mode<index>/supported_roles
+What:		/sys/class/typec/<port>/<alt mode>/supported_roles
 Date:		April 2017
 Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
 Description:
 		Space separated list of the supported roles.
 
-		This attribute is available for the devices describing the
-		alternate modes a port supports, and it will not be exposed with
-		the devices presenting the alternate modes the partners or cable
-		plugs support.
-
 		Valid values: source, sink
diff --git a/Documentation/driver-api/usb/typec_bus.rst b/Documentation/driver-api/usb/typec_bus.rst
new file mode 100644
index 000000000000..d5eec1715b5b
--- /dev/null
+++ b/Documentation/driver-api/usb/typec_bus.rst
@@ -0,0 +1,136 @@
+
+API for USB Type-C Alternate Mode drivers
+=========================================
+
+Introduction
+------------
+
+Alternate modes require communication with the partner using Vendor Defined
+Messages (VDM) as defined in USB Type-C and USB Power Delivery Specifications.
+The communication is SVID (Standard or Vendor ID) specific, i.e. specific for
+every alternate mode, so every alternate mode will need a custom driver.
+
+USB Type-C bus allows binding a driver to the discovered partner alternate
+modes by using the SVID and the mode number.
+
+USB Type-C Connector Class provides a device for every alternate mode a port
+supports, and separate device for every alternate mode the partner supports.
+The drivers for the alternate modes are bound to the partner alternate mode
+devices, and the port alternate mode devices must be handled by the port
+drivers.
+
+When a new partner alternate mode device is registered, it is linked to the
+alternate mode device of the port that the partner is attached to, that has
+matching SVID and mode. Communication between the port driver and alternate mode
+driver will happen using the same API.
+
+The port alternate mode devices are used as a proxy between the partner and the
+alternate mode drivers, so the port drivers are only expected to pass the SVID
+specific commands from the alternate mode drivers to the partner, and from the
+partners to the alternate mode drivers. No direct SVID specific communication is
+needed from the port drivers, but the port drivers need to provide the operation
+callbacks for the port alternate mode devices, just like the alternate mode
+drivers need to provide them for the partner alternate mode devices.
+
+Usage:
+------
+
+General
+~~~~~~~
+
+By default, the alternate mode drivers are responsible for entering the mode.
+It is also possible to leave the decision about entering the mode to the user
+space (See Documentation/ABI/testing/sysfs-class-typec). Port drivers should not
+enter any modes on their own.
+
+``->vdm`` is the most important callback in the operation callbacks vector. It
+will be used to deliver all the SVID specific commands from the partner to the
+alternate mode driver, and vice versa in case of port drivers. The drivers send
+the SVID specific commands to each other using :c:func:`typec_altmode_vmd()`.
+
+If the communication with the partner using the SVID specific commands results
+in need to reconfigure the pins on the connector, the alternate mode driver
+needs to notify the bus using :c:func:`typec_altmode_notify()`. The driver
+passes the negotiated SVID specific pin configuration value to the function as
+parameter. The bus driver will then configure the mux behind the connector using
+that value as the state value for the mux, and also call blocking notification
+chain to notify the external drivers about the state of the connector that need
+to know it.
+
+NOTE: The SVID specific pin configuration values must always start from
+``TYPEC_STATE_MODAL``. USB Type-C specification defines two default states for
+the connector: ``TYPEC_STATE_USB`` and ``TYPEC_STATE_SAFE``. These values are
+reserved by the bus as the first possible values for the state. When the
+alternate mode is entered, the bus will put the connector into
+``TYPEC_STATE_SAFE`` before sending Enter or Exit Mode command as defined in USB
+Type-C Specification, and also put the connector back to ``TYPEC_STATE_USB``
+after the mode has been exited.
+
+An example of working definitions for SVID specific pin configurations would
+look like this:
+
+enum {
+	ALTMODEX_CONF_A = TYPEC_STATE_MODAL,
+	ALTMODEX_CONF_B,
+	...
+};
+
+Helper macro ``TYPEC_MODAL_STATE()`` can also be used:
+
+#define ALTMODEX_CONF_A = TYPEC_MODAL_STATE(0);
+#define ALTMODEX_CONF_B = TYPEC_MODAL_STATE(1);
+
+Notification chain
+~~~~~~~~~~~~~~~~~~
+
+The drivers for the components that the alternate modes are designed for need to
+get details regarding the results of the negotiation with the partner, and the
+pin configuration of the connector. In case of DisplayPort alternate mode for
+example, the GPU drivers will need to know those details. In case of
+Thunderbolt alternate mode, the thunderbolt drivers will need to know them, and
+so on.
+
+The notification chain is designed for this purpose. The drivers can register
+notifiers with :c:func:`typec_altmode_register_notifier()`.
+
+Cable plug alternate modes
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The alternate mode drivers are not bound to cable plug alternate mode devices,
+only to the partner alternate mode devices. If the alternate mode supports, or
+requires, a cable that responds to SOP Prime, and optionally SOP Double Prime
+messages, the driver for that alternate mode must request handle to the cable
+plug alternate modes using :c:func:`typec_altmode_get_plug()`, and take over
+their control.
+
+Driver API
+----------
+
+Alternate mode driver registering/unregistering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_altmode_register_driver typec_altmode_unregister_driver
+
+Alternate mode driver operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_altmode_enter typec_altmode_exit typec_altmode_attention typec_altmode_vdm typec_altmode_notify
+
+API for the port drivers
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_match_altmode
+
+Cable Plug operations
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_altmode_get_plug typec_altmode_put_plug
+
+Notifications
+~~~~~~~~~~~~~
+.. kernel-doc:: drivers/usb/typec/class.c
+   :functions: typec_altmode_register_notifier typec_altmode_unregister_notifier
diff --git a/MAINTAINERS b/MAINTAINERS
index 07d1576fc766..f35f39f2072e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14955,7 +14955,7 @@ L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	drivers/usb/typec/mux/pi3usb30532.c
 
-USB TYPEC SUBSYSTEM
+USB TYPEC CLASS
 M:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
 L:	linux-usb@vger.kernel.org
 S:	Maintained
@@ -14964,6 +14964,15 @@ F:	Documentation/driver-api/usb/typec.rst
 F:	drivers/usb/typec/
 F:	include/linux/usb/typec.h
 
+USB TYPEC BUS FOR ALTERNATE MODES
+M:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+L:	linux-usb@vger.kernel.org
+S:	Maintained
+F:	Documentation/ABI/testing/sysfs-bus-typec
+F:	Documentation/driver-api/usb/typec_bus.rst
+F:	drivers/usb/typec/altmodes/
+F:	include/linux/usb/typec_altmode.h
+
 USB UHCI DRIVER
 M:	Alan Stern <stern@rowland.harvard.edu>
 L:	linux-usb@vger.kernel.org
diff --git a/drivers/usb/typec/Makefile b/drivers/usb/typec/Makefile
index 46f86ee134a2..335ee06748fc 100644
--- a/drivers/usb/typec/Makefile
+++ b/drivers/usb/typec/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_TYPEC)		+= typec.o
-typec-y				:= class.o mux.o
+typec-y				:= class.o mux.o bus.o
 obj-$(CONFIG_TYPEC_TCPM)	+= tcpm.o
 obj-y				+= fusb302/
 obj-$(CONFIG_TYPEC_WCOVE)	+= typec_wcove.o
diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c
new file mode 100644
index 000000000000..999d7904172a
--- /dev/null
+++ b/drivers/usb/typec/bus.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Bus for USB Type-C Alternate Modes
+ *
+ * Copyright (C) 2018 Intel Corporation
+ * Author: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+ */
+
+#include <linux/usb/pd_vdo.h>
+
+#include "bus.h"
+
+static inline int typec_altmode_set_mux(struct altmode *alt, u8 state)
+{
+	return alt->mux ? alt->mux->set(alt->mux, state) : 0;
+}
+
+static int typec_altmode_set_state(struct typec_altmode *adev, int state)
+{
+	bool is_port = is_typec_port(adev->dev.parent);
+	struct altmode *port_altmode;
+	int ret;
+
+	port_altmode = is_port ? to_altmode(adev) : to_altmode(adev)->partner;
+
+	ret = typec_altmode_set_mux(port_altmode, state);
+	if (ret)
+		return ret;
+
+	blocking_notifier_call_chain(&port_altmode->nh, state, NULL);
+
+	return 0;
+}
+
+/* -------------------------------------------------------------------------- */
+/* Common API */
+
+/**
+ * typec_altmode_notify - Communication between the OS and alternate mode driver
+ * @adev: Handle to the alternate mode
+ * @conf: Alternate mode specific configuration value
+ * @data: Alternate mode specific data
+ *
+ * The primary purpose for this function is to allow the alternate mode drivers
+ * to tell which pin configuration has been negotiated with the partner. That
+ * information will then be used for example to configure the muxes.
+ * Communication to the other direction is also possible, and low level device
+ * drivers can also send notifications to the alternate mode drivers. The actual
+ * communication will be specific for every SVID.
+ */
+int typec_altmode_notify(struct typec_altmode *adev,
+			 unsigned long conf, void *data)
+{
+	bool is_port = is_typec_port(adev->dev.parent);
+	struct altmode *altmode;
+	struct altmode *partner;
+	int ret;
+
+	if (!adev)
+		return 0;
+
+	altmode = to_altmode(adev);
+
+	if (!altmode->partner)
+		return -ENODEV;
+
+	partner = altmode->partner;
+
+	ret = typec_altmode_set_mux(is_port ? altmode : partner, (u8)conf);
+	if (ret)
+		return ret;
+
+	blocking_notifier_call_chain(is_port ? &altmode->nh : &partner->nh,
+				     conf, data);
+
+	if (partner->adev.ops && partner->adev.ops->notify)
+		return partner->adev.ops->notify(&partner->adev, conf, data);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_notify);
+
+/**
+ * typec_altmode_enter - Enter Mode
+ * @adev: The alternate mode
+ *
+ * The alternate mode drivers use this function to enter mode. The port drivers
+ * use this to inform the alternate mode drivers that the partner has initiated
+ * Enter Mode command.
+ */
+int typec_altmode_enter(struct typec_altmode *adev)
+{
+	struct altmode *partner = to_altmode(adev)->partner;
+	struct typec_altmode *pdev = &partner->adev;
+	int ret;
+
+	if (!adev || adev->active)
+		return 0;
+
+	if (!pdev->ops || !pdev->ops->enter)
+		return -EOPNOTSUPP;
+
+	/* Moving to USB Safe State */
+	ret = typec_altmode_set_state(adev, TYPEC_STATE_SAFE);
+	if (ret)
+		return ret;
+
+	/* Enter Mode */
+	return pdev->ops->enter(pdev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_enter);
+
+/**
+ * typec_altmode_exit - Exit Mode
+ * @adev: The alternate mode
+ *
+ * The partner of @adev has initiated Exit Mode command.
+ */
+int typec_altmode_exit(struct typec_altmode *adev)
+{
+	struct altmode *partner = to_altmode(adev)->partner;
+	struct typec_altmode *pdev = &partner->adev;
+	int ret;
+
+	if (!adev || !adev->active)
+		return 0;
+
+	if (!pdev->ops || !pdev->ops->enter)
+		return -EOPNOTSUPP;
+
+	/* Moving to USB Safe State */
+	ret = typec_altmode_set_state(adev, TYPEC_STATE_SAFE);
+	if (ret)
+		return ret;
+
+	/* Exit Mode command */
+	return pdev->ops->exit(pdev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_exit);
+
+/**
+ * typec_altmode_attention - Attention command
+ * @adev: The alternate mode
+ * @vdo: VDO for the Attention command
+ *
+ * Notifies the partner of @adev about Attention command.
+ */
+void typec_altmode_attention(struct typec_altmode *adev, u32 vdo)
+{
+	struct typec_altmode *pdev = &to_altmode(adev)->partner->adev;
+
+	if (pdev->ops && pdev->ops->attention)
+		pdev->ops->attention(pdev, vdo);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_attention);
+
+/**
+ * typec_altmode_vdm - Send Vendor Defined Messages (VDM) to the partner
+ * @adev: Alternate mode handle
+ * @header: VDM Header
+ * @vdo: Array of Vendor Defined Data Objects
+ * @count: Number of Data Objects
+ *
+ * The alternate mode drivers use this function for SVID specific communication
+ * with the partner. The port drivers use it to deliver the Structured VDMs
+ * received from the partners to the alternate mode drivers.
+ */
+int typec_altmode_vdm(struct typec_altmode *adev,
+		      const u32 header, const u32 *vdo, int count)
+{
+	struct typec_altmode *pdev;
+	struct altmode *altmode;
+
+	if (!adev)
+		return 0;
+
+	altmode = to_altmode(adev);
+
+	if (!altmode->partner)
+		return -ENODEV;
+
+	pdev = &altmode->partner->adev;
+
+	if (!pdev->ops || !pdev->ops->vdm)
+		return -EOPNOTSUPP;
+
+	return pdev->ops->vdm(pdev, header, vdo, count);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_vdm);
+
+const struct typec_altmode *
+typec_altmode_get_partner(struct typec_altmode *adev)
+{
+	return &to_altmode(adev)->partner->adev;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_get_partner);
+
+/* -------------------------------------------------------------------------- */
+/* API for the alternate mode drivers */
+
+/**
+ * typec_altmode_get_plug - Find cable plug alternate mode
+ * @adev: Handle to partner alternate mode
+ * @index: Cable plug index
+ *
+ * Increment reference count for cable plug alternate mode device. Returns
+ * handle to the cable plug alternate mode, or NULL if none is found.
+ */
+struct typec_altmode *typec_altmode_get_plug(struct typec_altmode *adev,
+					     enum typec_plug_index index)
+{
+	struct altmode *port = to_altmode(adev)->partner;
+
+	if (port->plug[index]) {
+		get_device(&port->plug[index]->adev.dev);
+		return &port->plug[index]->adev;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_get_plug);
+
+/**
+ * typec_altmode_put_plug - Decrement cable plug alternate mode reference count
+ * @plug: Handle to the cable plug alternate mode
+ */
+void typec_altmode_put_plug(struct typec_altmode *plug)
+{
+	if (plug)
+		put_device(&plug->dev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_put_plug);
+
+int __typec_altmode_register_driver(struct typec_altmode_driver *drv,
+				    struct module *module)
+{
+	if (!drv->probe)
+		return -EINVAL;
+
+	drv->driver.owner = module;
+	drv->driver.bus = &typec_bus;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(__typec_altmode_register_driver);
+
+void typec_altmode_unregister_driver(struct typec_altmode_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_unregister_driver);
+
+/* -------------------------------------------------------------------------- */
+/* API for the port drivers */
+
+/**
+ * typec_match_altmode - Match SVID to an array of alternate modes
+ * @altmodes: Array of alternate modes
+ * @n: Number of elements in the array, or -1 for NULL termiated arrays
+ * @svid: Standard or Vendor ID to match with
+ *
+ * Return pointer to an alternate mode with SVID mathing @svid, or NULL when no
+ * match is found.
+ */
+struct typec_altmode *typec_match_altmode(struct typec_altmode **altmodes,
+					  size_t n, u16 svid, u8 mode)
+{
+	int i;
+
+	for (i = 0; i < n; i++) {
+		if (!altmodes[i])
+			break;
+		if (altmodes[i]->svid == svid && altmodes[i]->mode == mode)
+			return altmodes[i];
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(typec_match_altmode);
+
+/* -------------------------------------------------------------------------- */
+
+static ssize_t
+description_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct typec_altmode *alt = to_typec_altmode(dev);
+
+	return sprintf(buf, "%s\n", alt->desc ? alt->desc : "");
+}
+static DEVICE_ATTR_RO(description);
+
+static struct attribute *typec_attrs[] = {
+	&dev_attr_description.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(typec);
+
+static int typec_match(struct device *dev, struct device_driver *driver)
+{
+	struct typec_altmode_driver *drv = to_altmode_driver(driver);
+	struct typec_altmode *altmode = to_typec_altmode(dev);
+	const struct typec_device_id *id;
+
+	for (id = drv->id_table; id->svid; id++)
+		if (id->svid == altmode->svid &&
+		    (id->mode == TYPEC_ANY_MODE || id->mode == altmode->mode))
+			return 1;
+	return 0;
+}
+
+static int typec_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct typec_altmode *altmode = to_typec_altmode(dev);
+
+	if (add_uevent_var(env, "SVID=%04X", altmode->svid))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MODE=%u", altmode->mode))
+		return -ENOMEM;
+
+	return add_uevent_var(env, "MODALIAS=typec:id%04Xm%02X",
+			      altmode->svid, altmode->mode);
+}
+
+static int typec_altmode_create_links(struct altmode *alt)
+{
+	struct device *port_dev = &alt->partner->adev.dev;
+	struct device *dev = &alt->adev.dev;
+	int err;
+
+	err = sysfs_create_link(&dev->kobj, &port_dev->kobj, "port");
+	if (err)
+		return err;
+
+	err = sysfs_create_link(&port_dev->kobj, &dev->kobj, "partner");
+	if (err)
+		sysfs_remove_link(&dev->kobj, "port");
+
+	return err;
+}
+
+static void typec_altmode_remove_links(struct altmode *alt)
+{
+	sysfs_remove_link(&alt->partner->adev.dev.kobj, "partner");
+	sysfs_remove_link(&alt->adev.dev.kobj, "port");
+}
+
+static int typec_probe(struct device *dev)
+{
+	struct typec_altmode_driver *drv = to_altmode_driver(dev->driver);
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct altmode *altmode = to_altmode(adev);
+	int ret;
+
+	/* Fail if the port does not support the alternate mode */
+	if (!altmode->partner)
+		return -ENODEV;
+
+	ret = typec_altmode_create_links(altmode);
+	if (ret) {
+		dev_warn(dev, "failed to create symlinks\n");
+		return ret;
+	}
+
+	ret = drv->probe(adev);
+	if (ret)
+		typec_altmode_remove_links(altmode);
+
+	return ret;
+}
+
+static int typec_remove(struct device *dev)
+{
+	struct typec_altmode_driver *drv = to_altmode_driver(dev->driver);
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct altmode *altmode = to_altmode(adev);
+
+	typec_altmode_remove_links(altmode);
+
+	if (drv->remove)
+		drv->remove(to_typec_altmode(dev));
+
+	if (adev->active) {
+		WARN_ON(typec_altmode_set_state(adev, TYPEC_STATE_SAFE));
+		typec_altmode_update_active(adev, false);
+	}
+
+	adev->desc = NULL;
+	adev->ops = NULL;
+
+	return 0;
+}
+
+struct bus_type typec_bus = {
+	.name = "typec",
+	.dev_groups = typec_groups,
+	.match = typec_match,
+	.uevent = typec_uevent,
+	.probe = typec_probe,
+	.remove = typec_remove,
+};
diff --git a/drivers/usb/typec/bus.h b/drivers/usb/typec/bus.h
new file mode 100644
index 000000000000..62aaf8b56bde
--- /dev/null
+++ b/drivers/usb/typec/bus.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __USB_TYPEC_ALTMODE_H__
+#define __USB_TYPEC_ALTMODE_H__
+
+#include <linux/usb/typec_altmode.h>
+#include <linux/usb/typec_mux.h>
+
+struct bus_type;
+
+struct altmode {
+	unsigned int			id;
+	struct typec_altmode		adev;
+	struct typec_mux		*mux;
+
+	enum typec_port_data		roles;
+
+	struct attribute		*attrs[5];
+	char				group_name[6];
+	struct attribute_group		group;
+	const struct attribute_group	*groups[2];
+
+	struct altmode			*partner;
+	struct altmode			*plug[2];
+
+	struct blocking_notifier_head	nh;
+};
+
+#define to_altmode(d) container_of(d, struct altmode, adev)
+
+extern struct bus_type typec_bus;
+extern const struct device_type typec_altmode_dev_type;
+extern const struct device_type typec_port_dev_type;
+
+#define is_typec_altmode(_dev_) (_dev_->type == &typec_altmode_dev_type)
+#define is_typec_port(_dev_) (_dev_->type == &typec_port_dev_type)
+
+#endif /* __USB_TYPEC_ALTMODE_H__ */
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 96dc9c4f73f0..c202975f8097 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -10,28 +10,13 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/usb/typec.h>
-#include <linux/usb/typec_mux.h>
 
-struct typec_altmode {
-	struct device			dev;
-	u16				svid;
-	u8				mode;
-
-	u32				vdo;
-	char				*desc;
-	enum typec_port_type		roles;
-	unsigned int			active:1;
-
-	struct attribute		*attrs[5];
-	char				group_name[6];
-	struct attribute_group		group;
-	const struct attribute_group	*groups[2];
-};
+#include "bus.h"
 
 struct typec_plug {
 	struct device			dev;
 	enum typec_plug_index		index;
+	struct ida			mode_ids;
 };
 
 struct typec_cable {
@@ -46,11 +31,13 @@ struct typec_partner {
 	unsigned int			usb_pd:1;
 	struct usb_pd_identity		*identity;
 	enum typec_accessory		accessory;
+	struct ida			mode_ids;
 };
 
 struct typec_port {
 	unsigned int			id;
 	struct device			dev;
+	struct ida			mode_ids;
 
 	int				prefer_role;
 	enum typec_data_role		data_role;
@@ -71,17 +58,14 @@ struct typec_port {
 #define to_typec_plug(_dev_) container_of(_dev_, struct typec_plug, dev)
 #define to_typec_cable(_dev_) container_of(_dev_, struct typec_cable, dev)
 #define to_typec_partner(_dev_) container_of(_dev_, struct typec_partner, dev)
-#define to_altmode(_dev_) container_of(_dev_, struct typec_altmode, dev)
 
 static const struct device_type typec_partner_dev_type;
 static const struct device_type typec_cable_dev_type;
 static const struct device_type typec_plug_dev_type;
-static const struct device_type typec_port_dev_type;
 
 #define is_typec_partner(_dev_) (_dev_->type == &typec_partner_dev_type)
 #define is_typec_cable(_dev_) (_dev_->type == &typec_cable_dev_type)
 #define is_typec_plug(_dev_) (_dev_->type == &typec_plug_dev_type)
-#define is_typec_port(_dev_) (_dev_->type == &typec_port_dev_type)
 
 static DEFINE_IDA(typec_index_ida);
 static struct class *typec_class;
@@ -163,25 +147,148 @@ static void typec_report_identity(struct device *dev)
 /* ------------------------------------------------------------------------- */
 /* Alternate Modes */
 
+static int altmode_match(struct device *dev, void *data)
+{
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct typec_device_id *id = data;
+
+	if (!is_typec_altmode(dev))
+		return 0;
+
+	return ((adev->svid == id->svid) && (adev->mode == id->mode));
+}
+
+static void typec_altmode_set_partner(struct altmode *altmode)
+{
+	struct typec_altmode *adev = &altmode->adev;
+	struct typec_device_id id = { adev->svid, adev->mode, };
+	struct typec_port *port = typec_altmode2port(adev);
+	struct altmode *partner;
+	struct device *dev;
+
+	dev = device_find_child(&port->dev, &id, altmode_match);
+	if (!dev)
+		return;
+
+	/* Bind the port alt mode to the partner/plug alt mode. */
+	partner = to_altmode(to_typec_altmode(dev));
+	altmode->partner = partner;
+
+	/* Bind the partner/plug alt mode to the port alt mode. */
+	if (is_typec_plug(adev->dev.parent)) {
+		struct typec_plug *plug = to_typec_plug(adev->dev.parent);
+
+		partner->plug[plug->index] = altmode;
+	} else {
+		partner->partner = altmode;
+	}
+}
+
+static void typec_altmode_put_partner(struct altmode *altmode)
+{
+	struct altmode *partner = altmode->partner;
+	struct typec_altmode *adev;
+
+	if (!partner)
+		return;
+
+	adev = &partner->adev;
+
+	if (is_typec_plug(adev->dev.parent)) {
+		struct typec_plug *plug = to_typec_plug(adev->dev.parent);
+
+		partner->plug[plug->index] = NULL;
+	} else {
+		partner->partner = NULL;
+	}
+	put_device(&adev->dev);
+}
+
+static int __typec_port_match(struct device *dev, const void *name)
+{
+	return !strcmp((const char *)name, dev_name(dev));
+}
+
+static void *typec_port_match(struct device_connection *con, int ep, void *data)
+{
+	return class_find_device(typec_class, NULL, con->endpoint[ep],
+				 __typec_port_match);
+}
+
+struct typec_altmode *
+typec_altmode_register_notifier(struct device *dev, u16 svid, u8 mode,
+				struct notifier_block *nb)
+{
+	struct typec_device_id id = { svid, mode, };
+	struct device *altmode_dev;
+	struct device *port_dev;
+	struct altmode *altmode;
+	int ret;
+
+	/* Find the port linked to the caller */
+	port_dev = device_connection_find_match(dev, NULL, NULL,
+						typec_port_match);
+	if (IS_ERR_OR_NULL(port_dev))
+		return port_dev ? ERR_CAST(port_dev) : ERR_PTR(-ENODEV);
+
+	/* Find the altmode with matching svid */
+	altmode_dev = device_find_child(port_dev, &id, altmode_match);
+
+	put_device(port_dev);
+
+	if (!altmode_dev)
+		return ERR_PTR(-ENODEV);
+
+	altmode = to_altmode(to_typec_altmode(altmode_dev));
+
+	/* Register notifier */
+	ret = blocking_notifier_chain_register(&altmode->nh, nb);
+	if (ret) {
+		put_device(altmode_dev);
+		return ERR_PTR(ret);
+	}
+
+	return &altmode->adev;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_register_notifier);
+
+void typec_altmode_unregister_notifier(struct typec_altmode *adev,
+				       struct notifier_block *nb)
+{
+	struct altmode *altmode = to_altmode(adev);
+
+	blocking_notifier_chain_unregister(&altmode->nh, nb);
+	put_device(&adev->dev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_unregister_notifier);
+
 /**
  * typec_altmode_update_active - Report Enter/Exit mode
- * @alt: Handle to the alternate mode
+ * @adev: Handle to the alternate mode
  * @active: True when the mode has been entered
  *
  * If a partner or cable plug executes Enter/Exit Mode command successfully, the
  * drivers use this routine to report the updated state of the mode.
  */
-void typec_altmode_update_active(struct typec_altmode *alt, bool active)
+void typec_altmode_update_active(struct typec_altmode *adev, bool active)
 {
 	char dir[6];
 
-	if (alt->active == active)
+	if (adev->active == active)
 		return;
 
-	alt->active = active;
-	snprintf(dir, sizeof(dir), "mode%d", alt->mode);
-	sysfs_notify(&alt->dev.kobj, dir, "active");
-	kobject_uevent(&alt->dev.kobj, KOBJ_CHANGE);
+	if (!is_typec_port(adev->dev.parent)) {
+		if (!active)
+			module_put(adev->dev.driver->owner);
+		else
+			WARN_ON(!try_module_get(adev->dev.driver->owner));
+	}
+
+	adev->active = active;
+	snprintf(dir, sizeof(dir), "mode%d", adev->mode);
+	sysfs_notify(&adev->dev.kobj, dir, "active");
+	sysfs_notify(&adev->dev.kobj, NULL, "active");
+	kobject_uevent(&adev->dev.kobj, KOBJ_CHANGE);
 }
 EXPORT_SYMBOL_GPL(typec_altmode_update_active);
 
@@ -208,7 +315,7 @@ EXPORT_SYMBOL_GPL(typec_altmode2port);
 static ssize_t
 vdo_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *alt = to_typec_altmode(dev);
 
 	return sprintf(buf, "0x%08x\n", alt->vdo);
 }
@@ -217,7 +324,7 @@ static DEVICE_ATTR_RO(vdo);
 static ssize_t
 description_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *alt = to_typec_altmode(dev);
 
 	return sprintf(buf, "%s\n", alt->desc ? alt->desc : "");
 }
@@ -226,7 +333,7 @@ static DEVICE_ATTR_RO(description);
 static ssize_t
 active_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *alt = to_typec_altmode(dev);
 
 	return sprintf(buf, "%s\n", alt->active ? "yes" : "no");
 }
@@ -234,21 +341,37 @@ active_show(struct device *dev, struct device_attribute *attr, char *buf)
 static ssize_t active_store(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t size)
 {
-	struct typec_altmode *alt = to_altmode(dev);
-	struct typec_port *port = typec_altmode2port(alt);
-	bool activate;
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct altmode *altmode = to_altmode(adev);
+	bool enter;
 	int ret;
 
-	if (!port->cap->activate_mode)
-		return -EOPNOTSUPP;
-
-	ret = kstrtobool(buf, &activate);
+	ret = kstrtobool(buf, &enter);
 	if (ret)
 		return ret;
 
-	ret = port->cap->activate_mode(port->cap, alt->mode, activate);
-	if (ret)
-		return ret;
+	if (adev->active == enter)
+		return size;
+
+	if (is_typec_port(adev->dev.parent)) {
+		typec_altmode_update_active(adev, enter);
+
+		/* Make sure that the partner exits the mode before disabling */
+		if (altmode->partner && !enter && altmode->partner->adev.active)
+			typec_altmode_exit(&altmode->partner->adev);
+	} else if (altmode->partner) {
+		if (enter && !altmode->partner->adev.active) {
+			dev_warn(dev, "port has the mode disabled\n");
+			return -EPERM;
+		}
+	}
+
+	/* Note: If there is no driver, the mode will not be entered */
+	if (adev->ops && adev->ops->activate) {
+		ret = adev->ops->activate(adev, enter);
+		if (ret)
+			return ret;
+	}
 
 	return size;
 }
@@ -258,7 +381,7 @@ static ssize_t
 supported_roles_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct altmode *alt = to_altmode(to_typec_altmode(dev));
 	ssize_t ret;
 
 	switch (alt->roles) {
@@ -277,29 +400,72 @@ supported_roles_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(supported_roles);
 
-static void typec_altmode_release(struct device *dev)
+static ssize_t
+mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *adev = to_typec_altmode(dev);
 
-	kfree(alt);
+	return sprintf(buf, "%u\n", adev->mode);
 }
+static DEVICE_ATTR_RO(mode);
 
-static ssize_t svid_show(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static ssize_t
+svid_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *adev = to_typec_altmode(dev);
 
-	return sprintf(buf, "%04x\n", alt->svid);
+	return sprintf(buf, "%04x\n", adev->svid);
 }
 static DEVICE_ATTR_RO(svid);
 
 static struct attribute *typec_altmode_attrs[] = {
+	&dev_attr_active.attr,
+	&dev_attr_mode.attr,
 	&dev_attr_svid.attr,
+	&dev_attr_vdo.attr,
 	NULL
 };
 ATTRIBUTE_GROUPS(typec_altmode);
 
-static const struct device_type typec_altmode_dev_type = {
+static int altmode_id_get(struct device *dev)
+{
+	struct ida *ids;
+
+	if (is_typec_partner(dev))
+		ids = &to_typec_partner(dev)->mode_ids;
+	else if (is_typec_plug(dev))
+		ids = &to_typec_plug(dev)->mode_ids;
+	else
+		ids = &to_typec_port(dev)->mode_ids;
+
+	return ida_simple_get(ids, 0, 0, GFP_KERNEL);
+}
+
+static void altmode_id_remove(struct device *dev, int id)
+{
+	struct ida *ids;
+
+	if (is_typec_partner(dev))
+		ids = &to_typec_partner(dev)->mode_ids;
+	else if (is_typec_plug(dev))
+		ids = &to_typec_plug(dev)->mode_ids;
+	else
+		ids = &to_typec_port(dev)->mode_ids;
+
+	ida_simple_remove(ids, id);
+}
+
+static void typec_altmode_release(struct device *dev)
+{
+	struct altmode *alt = to_altmode(to_typec_altmode(dev));
+
+	typec_altmode_put_partner(alt);
+
+	altmode_id_remove(alt->adev.dev.parent, alt->id);
+	kfree(alt);
+}
+
+const struct device_type typec_altmode_dev_type = {
 	.name = "typec_alternate_mode",
 	.groups = typec_altmode_groups,
 	.release = typec_altmode_release,
@@ -309,58 +475,74 @@ static struct typec_altmode *
 typec_register_altmode(struct device *parent,
 		       const struct typec_altmode_desc *desc)
 {
-	struct typec_altmode *alt;
+	unsigned int id = altmode_id_get(parent);
+	bool is_port = is_typec_port(parent);
+	struct altmode *alt;
 	int ret;
 
 	alt = kzalloc(sizeof(*alt), GFP_KERNEL);
 	if (!alt)
 		return ERR_PTR(-ENOMEM);
 
-	alt->svid = desc->svid;
-	alt->mode = desc->mode;
-	alt->vdo = desc->vdo;
+	alt->adev.svid = desc->svid;
+	alt->adev.mode = desc->mode;
+	alt->adev.vdo = desc->vdo;
 	alt->roles = desc->roles;
+	alt->id = id;
 
 	alt->attrs[0] = &dev_attr_vdo.attr;
 	alt->attrs[1] = &dev_attr_description.attr;
 	alt->attrs[2] = &dev_attr_active.attr;
 
-	if (is_typec_port(parent))
+	if (is_port) {
 		alt->attrs[3] = &dev_attr_supported_roles.attr;
+		alt->adev.active = true; /* Enabled by default */
+	}
 
 	sprintf(alt->group_name, "mode%d", desc->mode);
 	alt->group.name = alt->group_name;
 	alt->group.attrs = alt->attrs;
 	alt->groups[0] = &alt->group;
 
-	alt->dev.parent = parent;
-	alt->dev.groups = alt->groups;
-	alt->dev.type = &typec_altmode_dev_type;
-	dev_set_name(&alt->dev, "%s-%04x:%u", dev_name(parent),
-		     alt->svid, alt->mode);
+	alt->adev.dev.parent = parent;
+	alt->adev.dev.groups = alt->groups;
+	alt->adev.dev.type = &typec_altmode_dev_type;
+	dev_set_name(&alt->adev.dev, "%s.%u", dev_name(parent), id);
+
+	/* Link partners and plugs with the ports */
+	if (is_port)
+		BLOCKING_INIT_NOTIFIER_HEAD(&alt->nh);
+	else
+		typec_altmode_set_partner(alt);
+
+	/* The partners are bind to drivers */
+	if (is_typec_partner(parent))
+		alt->adev.dev.bus = &typec_bus;
 
-	ret = device_register(&alt->dev);
+	ret = device_register(&alt->adev.dev);
 	if (ret) {
 		dev_err(parent, "failed to register alternate mode (%d)\n",
 			ret);
-		put_device(&alt->dev);
+		put_device(&alt->adev.dev);
 		return ERR_PTR(ret);
 	}
 
-	return alt;
+	return &alt->adev;
 }
 
 /**
  * typec_unregister_altmode - Unregister Alternate Mode
- * @alt: The alternate mode to be unregistered
+ * @adev: The alternate mode to be unregistered
  *
  * Unregister device created with typec_partner_register_altmode(),
  * typec_plug_register_altmode() or typec_port_register_altmode().
  */
-void typec_unregister_altmode(struct typec_altmode *alt)
+void typec_unregister_altmode(struct typec_altmode *adev)
 {
-	if (!IS_ERR_OR_NULL(alt))
-		device_unregister(&alt->dev);
+	if (IS_ERR_OR_NULL(adev))
+		return;
+	typec_mux_put(to_altmode(adev)->mux);
+	device_unregister(&adev->dev);
 }
 EXPORT_SYMBOL_GPL(typec_unregister_altmode);
 
@@ -398,6 +580,7 @@ static void typec_partner_release(struct device *dev)
 {
 	struct typec_partner *partner = to_typec_partner(dev);
 
+	ida_destroy(&partner->mode_ids);
 	kfree(partner);
 }
 
@@ -463,6 +646,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port,
 	if (!partner)
 		return ERR_PTR(-ENOMEM);
 
+	ida_init(&partner->mode_ids);
 	partner->usb_pd = desc->usb_pd;
 	partner->accessory = desc->accessory;
 
@@ -511,6 +695,7 @@ static void typec_plug_release(struct device *dev)
 {
 	struct typec_plug *plug = to_typec_plug(dev);
 
+	ida_destroy(&plug->mode_ids);
 	kfree(plug);
 }
 
@@ -563,6 +748,7 @@ struct typec_plug *typec_register_plug(struct typec_cable *cable,
 
 	sprintf(name, "plug%d", desc->index);
 
+	ida_init(&plug->mode_ids);
 	plug->index = desc->index;
 	plug->dev.class = typec_class;
 	plug->dev.parent = &cable->dev;
@@ -1083,12 +1269,13 @@ static void typec_release(struct device *dev)
 	struct typec_port *port = to_typec_port(dev);
 
 	ida_simple_remove(&typec_index_ida, port->id);
+	ida_destroy(&port->mode_ids);
 	typec_switch_put(port->sw);
 	typec_mux_put(port->mux);
 	kfree(port);
 }
 
-static const struct device_type typec_port_dev_type = {
+const struct device_type typec_port_dev_type = {
 	.name = "typec_port",
 	.groups = typec_groups,
 	.uevent = typec_uevent,
@@ -1279,11 +1466,11 @@ EXPORT_SYMBOL_GPL(typec_get_orientation);
 
 /**
  * typec_set_mode - Set mode of operation for USB Type-C connector
- * @port: USB Type-C port for the connector
- * @mode: Operation mode for the connector
+ * @port: USB Type-C connector
+ * @mode: Accessory Mode, USB Operation or Safe State
  *
- * Set mode @mode for @port. This function will configure the muxes needed to
- * enter @mode.
+ * Configure @port for Accessory Mode @mode. This function will configure the
+ * muxes needed for @mode.
  */
 int typec_set_mode(struct typec_port *port, int mode)
 {
@@ -1297,6 +1484,7 @@ EXPORT_SYMBOL_GPL(typec_set_mode);
  * typec_port_register_altmode - Register USB Type-C Port Alternate Mode
  * @port: USB Type-C Port that supports the alternate mode
  * @desc: Description of the alternate mode
+ * @drvdata: Private pointer to driver specific info
  *
  * This routine is used to register an alternate mode that @port is capable of
  * supporting.
@@ -1307,7 +1495,23 @@ struct typec_altmode *
 typec_port_register_altmode(struct typec_port *port,
 			    const struct typec_altmode_desc *desc)
 {
-	return typec_register_altmode(&port->dev, desc);
+	struct typec_altmode *adev;
+	struct typec_mux *mux;
+	char id[10];
+
+	sprintf(id, "id%04xm%02x", desc->svid, desc->mode);
+
+	mux = typec_mux_get(port->dev.parent, id);
+	if (IS_ERR(mux))
+		return ERR_CAST(mux);
+
+	adev = typec_register_altmode(&port->dev, desc);
+	if (IS_ERR(adev))
+		typec_mux_put(mux);
+	else
+		to_altmode(adev)->mux = mux;
+
+	return adev;
 }
 EXPORT_SYMBOL_GPL(typec_port_register_altmode);
 
@@ -1381,10 +1585,12 @@ struct typec_port *typec_register_port(struct device *parent,
 		break;
 	}
 
+	ida_init(&port->mode_ids);
+	mutex_init(&port->port_type_lock);
+
 	port->id = id;
 	port->cap = cap;
 	port->port_type = cap->type;
-	mutex_init(&port->port_type_lock);
 	port->prefer_role = cap->prefer_role;
 
 	port->dev.class = typec_class;
@@ -1428,8 +1634,19 @@ EXPORT_SYMBOL_GPL(typec_unregister_port);
 
 static int __init typec_init(void)
 {
+	int ret;
+
+	ret = bus_register(&typec_bus);
+	if (ret)
+		return ret;
+
 	typec_class = class_create(THIS_MODULE, "typec");
-	return PTR_ERR_OR_ZERO(typec_class);
+	if (IS_ERR(typec_class)) {
+		bus_unregister(&typec_bus);
+		return PTR_ERR(typec_class);
+	}
+
+	return 0;
 }
 subsys_initcall(typec_init);
 
@@ -1437,6 +1654,7 @@ static void __exit typec_exit(void)
 {
 	class_destroy(typec_class);
 	ida_destroy(&typec_index_ida);
+	bus_unregister(&typec_bus);
 }
 module_exit(typec_exit);
 
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 96a71a648eed..1298a7daa57d 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -746,4 +746,19 @@ struct tb_service_id {
 #define TBSVC_MATCH_PROTOCOL_VERSION	0x0004
 #define TBSVC_MATCH_PROTOCOL_REVISION	0x0008
 
+/* USB Type-C Alternate Modes */
+
+#define TYPEC_ANY_MODE	0x7
+
+/**
+ * struct typec_device_id - USB Type-C alternate mode identifiers
+ * @svid: Standard or Vendor ID
+ * @mode: Mode index
+ */
+struct typec_device_id {
+	__u16 svid;
+	__u8 mode;
+	kernel_ulong_t driver_data;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 2dcb1683075f..7df4ecabc78a 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -5,21 +5,18 @@
 
 #include <linux/types.h>
 
-/* XXX: Once we have a header for USB Power Delivery, this belongs there */
-#define ALTMODE_MAX_MODES	6
-
 /* USB Type-C Specification releases */
 #define USB_TYPEC_REV_1_0	0x100 /* 1.0 */
 #define USB_TYPEC_REV_1_1	0x110 /* 1.1 */
 #define USB_TYPEC_REV_1_2	0x120 /* 1.2 */
 
-struct typec_altmode;
 struct typec_partner;
 struct typec_cable;
 struct typec_plug;
 struct typec_port;
 
 struct fwnode_handle;
+struct device;
 
 enum typec_port_type {
 	TYPEC_PORT_SRC,
@@ -107,7 +104,7 @@ struct typec_altmode_desc {
 	u8			mode;
 	u32			vdo;
 	/* Only used with ports */
-	enum typec_port_type	roles;
+	enum typec_port_data	roles;
 };
 
 struct typec_altmode
@@ -186,7 +183,6 @@ struct typec_partner_desc {
  * @dr_set: Set Data Role
  * @pr_set: Set Power Role
  * @vconn_set: Set VCONN Role
- * @activate_mode: Enter/exit given Alternate Mode
  * @port_type_set: Set port type
  *
  * Static capabilities of a single USB Type-C port.
@@ -212,12 +208,8 @@ struct typec_capability {
 				  enum typec_role);
 	int		(*vconn_set)(const struct typec_capability *,
 				     enum typec_role);
-
-	int		(*activate_mode)(const struct typec_capability *,
-					 int mode, int activate);
 	int		(*port_type_set)(const struct typec_capability *,
-					enum typec_port_type);
-
+					 enum typec_port_type);
 };
 
 /* Specific to try_role(). Indicates the user want's to clear the preference. */
diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h
new file mode 100644
index 000000000000..9a88c74a1d0d
--- /dev/null
+++ b/include/linux/usb/typec_altmode.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __USB_TYPEC_ALTMODE_H
+#define __USB_TYPEC_ALTMODE_H
+
+#include <linux/mod_devicetable.h>
+#include <linux/usb/typec.h>
+#include <linux/device.h>
+
+#define MODE_DISCOVERY_MAX	6
+
+struct typec_altmode_ops;
+
+/**
+ * struct typec_altmode - USB Type-C alternate mode device
+ * @dev: Driver model's view of this device
+ * @svid: Standard or Vendor ID (SVID) of the alternate mode
+ * @mode: Index of the Mode
+ * @vdo: VDO returned by Discover Modes USB PD command
+ * @active: Tells has the mode been entered or not
+ * @desc: Optional human readable description of the mode
+ * @ops: Operations vector from the driver
+ */
+struct typec_altmode {
+	struct device			dev;
+	u16				svid;
+	int				mode;
+	u32				vdo;
+	unsigned int			active:1;
+
+	char				*desc;
+	const struct typec_altmode_ops	*ops;
+};
+
+#define to_typec_altmode(d) container_of(d, struct typec_altmode, dev)
+
+static inline void typec_altmode_set_drvdata(struct typec_altmode *altmode,
+					     void *data)
+{
+	dev_set_drvdata(&altmode->dev, data);
+}
+
+static inline void *typec_altmode_get_drvdata(struct typec_altmode *altmode)
+{
+	return dev_get_drvdata(&altmode->dev);
+}
+
+/**
+ * struct typec_altmode_ops - Alternate mode specific operations vector
+ * @enter: Operations to be executed with Enter Mode Command
+ * @exit: Operations to be executed with Exit Mode Command
+ * @attention: Callback for Attention Command
+ * @vdm: Callback for SVID specific commands
+ * @notify: Communication channel for platform and the alternate mode
+ * @activate: User callback for Enter/Exit Mode
+ */
+struct typec_altmode_ops {
+	int (*enter)(struct typec_altmode *altmode);
+	int (*exit)(struct typec_altmode *altmode);
+	void (*attention)(struct typec_altmode *altmode, u32 vdo);
+	int (*vdm)(struct typec_altmode *altmode, const u32 hdr,
+		   const u32 *vdo, int cnt);
+	int (*notify)(struct typec_altmode *altmode, unsigned long conf,
+		      void *data);
+	int (*activate)(struct typec_altmode *altmode, int activate);
+};
+
+int typec_altmode_enter(struct typec_altmode *altmode);
+int typec_altmode_exit(struct typec_altmode *altmode);
+void typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
+int typec_altmode_vdm(struct typec_altmode *altmode,
+		      const u32 header, const u32 *vdo, int count);
+int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf,
+			 void *data);
+const struct typec_altmode *
+typec_altmode_get_partner(struct typec_altmode *altmode);
+
+/*
+ * These are the connector states (USB, Safe and Alt Mode) defined in USB Type-C
+ * Specification. SVID specific connector states are expected to follow and
+ * start from the value TYPEC_STATE_MODAL.
+ */
+enum {
+	TYPEC_STATE_SAFE,	/* USB Safe State */
+	TYPEC_STATE_USB,	/* USB Operation */
+	TYPEC_STATE_MODAL,	/* Alternate Modes */
+};
+
+/*
+ * For the muxes there is no difference between Accessory Modes and Alternate
+ * Modes, so the Accessory Modes are supplied with specific modal state values
+ * here. Unlike with Alternate Modes, where the mux will be linked with the
+ * alternate mode device, the mux for Accessory Modes will be linked with the
+ * port device instead.
+ *
+ * Port drivers can use TYPEC_MODE_AUDIO and TYPEC_MODE_DEBUG as the mode
+ * value for typec_set_mode() when accessory modes are supported.
+ */
+enum {
+	TYPEC_MODE_AUDIO = TYPEC_STATE_MODAL,	/* Audio Accessory */
+	TYPEC_MODE_DEBUG,			/* Debug Accessory */
+};
+
+#define TYPEC_MODAL_STATE(_state_)	((_state_) + TYPEC_STATE_MODAL)
+
+struct typec_altmode *typec_altmode_get_plug(struct typec_altmode *altmode,
+					     enum typec_plug_index index);
+void typec_altmode_put_plug(struct typec_altmode *plug);
+
+struct typec_altmode *typec_match_altmode(struct typec_altmode **altmodes,
+					  size_t n, u16 svid, u8 mode);
+
+struct typec_altmode *
+typec_altmode_register_notifier(struct device *dev, u16 svid, u8 mode,
+				struct notifier_block *nb);
+
+void typec_altmode_unregister_notifier(struct typec_altmode *adev,
+				       struct notifier_block *nb);
+
+/**
+ * typec_altmode_get_orientation - Get cable plug orientation
+ * altmode: Handle to the alternate mode
+ */
+static inline enum typec_orientation
+typec_altmode_get_orientation(struct typec_altmode *altmode)
+{
+	return typec_get_orientation(typec_altmode2port(altmode));
+}
+
+/**
+ * struct typec_altmode_driver - USB Type-C alternate mode device driver
+ * @id_table: Null terminated array of SVIDs
+ * @probe: Callback for device binding
+ * @remove: Callback for device unbinding
+ * @driver: Device driver model driver
+ *
+ * These drivers will be bind to the partner alternate mode devices. They will
+ * handle all SVID specific communication.
+ */
+struct typec_altmode_driver {
+	const struct typec_device_id *id_table;
+	int (*probe)(struct typec_altmode *altmode);
+	void (*remove)(struct typec_altmode *altmode);
+	struct device_driver driver;
+};
+
+#define to_altmode_driver(d) container_of(d, struct typec_altmode_driver, \
+					  driver)
+
+#define typec_altmode_register_driver(drv) \
+		__typec_altmode_register_driver(drv, THIS_MODULE)
+int __typec_altmode_register_driver(struct typec_altmode_driver *drv,
+				    struct module *module);
+void typec_altmode_unregister_driver(struct typec_altmode_driver *drv);
+
+#define module_typec_altmode_driver(__typec_altmode_driver) \
+	module_driver(__typec_altmode_driver, typec_altmode_register_driver, \
+		      typec_altmode_unregister_driver)
+
+#endif /* __USB_TYPEC_ALTMODE_H */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 6667f7b491d6..293004499b4d 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -221,5 +221,9 @@ int main(void)
 	DEVID_FIELD(tb_service_id, protocol_version);
 	DEVID_FIELD(tb_service_id, protocol_revision);
 
+	DEVID(typec_device_id);
+	DEVID_FIELD(typec_device_id, svid);
+	DEVID_FIELD(typec_device_id, mode);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 52fd54a8fe39..7be43697ff84 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1352,6 +1352,19 @@ static int do_tbsvc_entry(const char *filename, void *symval, char *alias)
 }
 ADD_TO_DEVTABLE("tbsvc", tb_service_id, do_tbsvc_entry);
 
+/* Looks like: typec:idNmN */
+static int do_typec_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, typec_device_id, svid);
+	DEF_FIELD(symval, typec_device_id, mode);
+
+	sprintf(alias, "typec:id%04X", svid);
+	ADD(alias, "m", mode != TYPEC_ANY_MODE, mode);
+
+	return 1;
+}
+ADD_TO_DEVTABLE("typec", typec_device_id, do_typec_entry);
+
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
 {
-- 
cgit v1.2.3


From 0e3bb7d6894d9b6e67d6382bb03a46a1dc989588 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:51 +0300
Subject: usb: typec: Add driver for DisplayPort alternate mode

DisplayPort USB Type-C Alt Mode allows DisplayPort displays
and adapters to be attached to the USB Type-C ports on the
system.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-driver-typec-displayport     |  49 ++
 drivers/usb/typec/Kconfig                          |   2 +
 drivers/usb/typec/Makefile                         |   1 +
 drivers/usb/typec/altmodes/Kconfig                 |  14 +
 drivers/usb/typec/altmodes/Makefile                |   2 +
 drivers/usb/typec/altmodes/displayport.c           | 578 +++++++++++++++++++++
 include/linux/usb/typec_dp.h                       |  95 ++++
 7 files changed, 741 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-driver-typec-displayport
 create mode 100644 drivers/usb/typec/altmodes/Kconfig
 create mode 100644 drivers/usb/typec/altmodes/Makefile
 create mode 100644 drivers/usb/typec/altmodes/displayport.c
 create mode 100644 include/linux/usb/typec_dp.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-driver-typec-displayport b/Documentation/ABI/testing/sysfs-driver-typec-displayport
new file mode 100644
index 000000000000..231471ad0d4b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-typec-displayport
@@ -0,0 +1,49 @@
+What:		/sys/bus/typec/devices/.../displayport/configuration
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows the current DisplayPort configuration for the connector.
+		Valid values are USB, source and sink. Source means DisplayPort
+		source, and sink means DisplayPort sink.
+
+		All supported configurations are listed as space separated list
+		with the active one wrapped in square brackets.
+
+		Source example:
+
+			USB [source] sink
+
+		The configuration can be changed by writing to the file
+
+		Note. USB configuration does not equal to Exit Mode. It is
+		separate configuration defined in VESA DisplayPort Alt Mode on
+		USB Type-C Standard. Functionally it equals to the situation
+		where the mode has been exited (to exit the mode, see
+		Documentation/ABI/testing/sysfs-bus-typec, and use file
+		/sys/bus/typec/devices/.../active).
+
+What:		/sys/bus/typec/devices/.../displayport/pin_assignment
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		VESA DisplayPort Alt Mode on USB Type-C Standard defines six
+		different pin assignments for USB Type-C connector that are
+		labeled A, B, C, D, E, and F. The supported pin assignments are
+		listed as space separated list with the active one wrapped in
+		square brackets.
+
+		Example:
+
+			C [D]
+
+		Pin assignment can be changed by writing to the file. It is
+		possible to set pin assignment before configuration has been
+		set, but the assignment will not be active before the
+		connector is actually configured.
+
+		Note. As of VESA DisplayPort Alt Mode on USB Type-C Standard
+		version 1.0b, pin assignments A, B, and F are deprecated. Only
+		pin assignment D can now carry simultaneously one channel of
+		USB SuperSpeed protocol. From user perspective pin assignments C
+		and E are equal, where all channels on the connector are used
+		for carrying DisplayPort protocol (allowing higher resolutions).
diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
index ee808903983f..00878c386dd0 100644
--- a/drivers/usb/typec/Kconfig
+++ b/drivers/usb/typec/Kconfig
@@ -104,4 +104,6 @@ config TYPEC_TPS6598X
 
 source "drivers/usb/typec/mux/Kconfig"
 
+source "drivers/usb/typec/altmodes/Kconfig"
+
 endif # TYPEC
diff --git a/drivers/usb/typec/Makefile b/drivers/usb/typec/Makefile
index 335ee06748fc..45b0aef428a8 100644
--- a/drivers/usb/typec/Makefile
+++ b/drivers/usb/typec/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_TYPEC)		+= typec.o
 typec-y				:= class.o mux.o bus.o
+obj-$(CONFIG_TYPEC)		+= altmodes/
 obj-$(CONFIG_TYPEC_TCPM)	+= tcpm.o
 obj-y				+= fusb302/
 obj-$(CONFIG_TYPEC_WCOVE)	+= typec_wcove.o
diff --git a/drivers/usb/typec/altmodes/Kconfig b/drivers/usb/typec/altmodes/Kconfig
new file mode 100644
index 000000000000..efef2a64bc51
--- /dev/null
+++ b/drivers/usb/typec/altmodes/Kconfig
@@ -0,0 +1,14 @@
+
+menu "USB Type-C Alternate Mode drivers"
+
+config TYPEC_DP_ALTMODE
+	tristate "DisplayPort Alternate Mode driver"
+	help
+	  DisplayPort USB Type-C Alternate Mode allows DisplayPort
+	  displays and adapters to be attached to the USB Type-C
+	  connectors on the system.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called typec_displayport.
+
+endmenu
diff --git a/drivers/usb/typec/altmodes/Makefile b/drivers/usb/typec/altmodes/Makefile
new file mode 100644
index 000000000000..5caf094ef71a
--- /dev/null
+++ b/drivers/usb/typec/altmodes/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_TYPEC_DP_ALTMODE)		+= typec_displayport.o
+typec_displayport-y			:= displayport.o
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
new file mode 100644
index 000000000000..ef12b15bd484
--- /dev/null
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * USB Typec-C DisplayPort Alternate Mode driver
+ *
+ * Copyright (C) 2018 Intel Corporation
+ * Author: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+ *
+ * DisplayPort is trademark of VESA (www.vesa.org)
+ */
+
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/usb/pd_vdo.h>
+#include <linux/usb/typec_dp.h>
+
+#define DP_HEADER(cmd)			(VDO(USB_TYPEC_DP_SID, 1, cmd) | \
+					 VDO_OPOS(USB_TYPEC_DP_MODE))
+
+enum {
+	DP_CONF_USB,
+	DP_CONF_DFP_D,
+	DP_CONF_UFP_D,
+	DP_CONF_DUAL_D,
+};
+
+/* Helper for setting/getting the pin assignement value to the configuration */
+#define DP_CONF_SET_PIN_ASSIGN(_a_)	((_a_) << 8)
+#define DP_CONF_GET_PIN_ASSIGN(_conf_)	(((_conf_) & GENMASK(15, 8)) >> 8)
+
+/* Pin assignments that use USB3.1 Gen2 signaling to carry DP protocol */
+#define DP_PIN_ASSIGN_GEN2_BR_MASK	(BIT(DP_PIN_ASSIGN_A) | \
+					 BIT(DP_PIN_ASSIGN_B))
+
+/* Pin assignments that use DP v1.3 signaling to carry DP protocol */
+#define DP_PIN_ASSIGN_DP_BR_MASK	(BIT(DP_PIN_ASSIGN_C) | \
+					 BIT(DP_PIN_ASSIGN_D) | \
+					 BIT(DP_PIN_ASSIGN_E) | \
+					 BIT(DP_PIN_ASSIGN_F))
+
+/* DP only pin assignments */
+#define DP_PIN_ASSIGN_DP_ONLY_MASK	(BIT(DP_PIN_ASSIGN_A) | \
+					 BIT(DP_PIN_ASSIGN_C) | \
+					 BIT(DP_PIN_ASSIGN_E))
+
+/* Pin assignments where one channel is for USB */
+#define DP_PIN_ASSIGN_MULTI_FUNC_MASK	(BIT(DP_PIN_ASSIGN_B) | \
+					 BIT(DP_PIN_ASSIGN_D) | \
+					 BIT(DP_PIN_ASSIGN_F))
+
+enum dp_state {
+	DP_STATE_IDLE,
+	DP_STATE_ENTER,
+	DP_STATE_UPDATE,
+	DP_STATE_CONFIGURE,
+	DP_STATE_EXIT,
+};
+
+struct dp_altmode {
+	struct typec_displayport_data data;
+
+	enum dp_state state;
+
+	struct mutex lock; /* device lock */
+	struct work_struct work;
+	struct typec_altmode *alt;
+	const struct typec_altmode *port;
+};
+
+static int dp_altmode_notify(struct dp_altmode *dp)
+{
+	u8 state = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf));
+
+	return typec_altmode_notify(dp->alt, TYPEC_MODAL_STATE(state),
+				   &dp->data);
+}
+
+static int dp_altmode_configure(struct dp_altmode *dp, u8 con)
+{
+	u32 conf = DP_CONF_SIGNALING_DP; /* Only DP signaling supported */
+	u8 pin_assign = 0;
+
+	switch (con) {
+	case DP_STATUS_CON_DISABLED:
+		return 0;
+	case DP_STATUS_CON_DFP_D:
+		conf |= DP_CONF_UFP_U_AS_DFP_D;
+		pin_assign = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo) &
+			     DP_CAP_DFP_D_PIN_ASSIGN(dp->port->vdo);
+		break;
+	case DP_STATUS_CON_UFP_D:
+	case DP_STATUS_CON_BOTH: /* NOTE: First acting as DP source */
+		conf |= DP_CONF_UFP_U_AS_UFP_D;
+		pin_assign = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo) &
+			     DP_CAP_UFP_D_PIN_ASSIGN(dp->port->vdo);
+		break;
+	default:
+		break;
+	}
+
+	/* Determining the initial pin assignment. */
+	if (!DP_CONF_GET_PIN_ASSIGN(dp->data.conf)) {
+		/* Is USB together with DP preferred */
+		if (dp->data.status & DP_STATUS_PREFER_MULTI_FUNC &&
+		    pin_assign & DP_PIN_ASSIGN_MULTI_FUNC_MASK)
+			pin_assign &= DP_PIN_ASSIGN_MULTI_FUNC_MASK;
+		else
+			pin_assign &= DP_PIN_ASSIGN_DP_ONLY_MASK;
+
+		if (!pin_assign)
+			return -EINVAL;
+
+		conf |= DP_CONF_SET_PIN_ASSIGN(pin_assign);
+	}
+
+	dp->data.conf = conf;
+
+	return 0;
+}
+
+static int dp_altmode_status_update(struct dp_altmode *dp)
+{
+	bool configured = !!DP_CONF_GET_PIN_ASSIGN(dp->data.conf);
+	u8 con = DP_STATUS_CONNECTION(dp->data.status);
+	int ret = 0;
+
+	if (configured && (dp->data.status & DP_STATUS_SWITCH_TO_USB)) {
+		dp->data.conf = 0;
+		dp->state = DP_STATE_CONFIGURE;
+	} else if (dp->data.status & DP_STATUS_EXIT_DP_MODE) {
+		dp->state = DP_STATE_EXIT;
+	} else if (!(con & DP_CONF_CURRENTLY(dp->data.conf))) {
+		ret = dp_altmode_configure(dp, con);
+		if (!ret)
+			dp->state = DP_STATE_CONFIGURE;
+	}
+
+	return ret;
+}
+
+static int dp_altmode_configured(struct dp_altmode *dp)
+{
+	int ret;
+
+	sysfs_notify(&dp->alt->dev.kobj, "displayport", "configuration");
+
+	if (!dp->data.conf)
+		return typec_altmode_notify(dp->alt, TYPEC_STATE_USB,
+					    &dp->data);
+
+	ret = dp_altmode_notify(dp);
+	if (ret)
+		return ret;
+
+	sysfs_notify(&dp->alt->dev.kobj, "displayport", "pin_assignment");
+
+	return 0;
+}
+
+static int dp_altmode_configure_vdm(struct dp_altmode *dp, u32 conf)
+{
+	u32 header = DP_HEADER(DP_CMD_CONFIGURE);
+	int ret;
+
+	ret = typec_altmode_notify(dp->alt, TYPEC_STATE_SAFE, &dp->data);
+	if (ret) {
+		dev_err(&dp->alt->dev,
+			"unable to put to connector to safe mode\n");
+		return ret;
+	}
+
+	ret = typec_altmode_vdm(dp->alt, header, &conf, 2);
+	if (ret) {
+		if (DP_CONF_GET_PIN_ASSIGN(dp->data.conf))
+			dp_altmode_notify(dp);
+		else
+			typec_altmode_notify(dp->alt, TYPEC_STATE_USB,
+					     &dp->data);
+	}
+
+	return ret;
+}
+
+static void dp_altmode_work(struct work_struct *work)
+{
+	struct dp_altmode *dp = container_of(work, struct dp_altmode, work);
+	u32 header;
+	u32 vdo;
+	int ret;
+
+	mutex_lock(&dp->lock);
+
+	switch (dp->state) {
+	case DP_STATE_ENTER:
+		ret = typec_altmode_enter(dp->alt);
+		if (ret)
+			dev_err(&dp->alt->dev, "failed to enter mode\n");
+		break;
+	case DP_STATE_UPDATE:
+		header = DP_HEADER(DP_CMD_STATUS_UPDATE);
+		vdo = 1;
+		ret = typec_altmode_vdm(dp->alt, header, &vdo, 2);
+		if (ret)
+			dev_err(&dp->alt->dev,
+				"unable to send Status Update command (%d)\n",
+				ret);
+		break;
+	case DP_STATE_CONFIGURE:
+		ret = dp_altmode_configure_vdm(dp, dp->data.conf);
+		if (ret)
+			dev_err(&dp->alt->dev,
+				"unable to send Configure command (%d)\n", ret);
+		break;
+	case DP_STATE_EXIT:
+		if (typec_altmode_exit(dp->alt))
+			dev_err(&dp->alt->dev, "Exit Mode Failed!\n");
+		break;
+	default:
+		break;
+	}
+
+	dp->state = DP_STATE_IDLE;
+
+	mutex_unlock(&dp->lock);
+}
+
+static void dp_altmode_attention(struct typec_altmode *alt, const u32 vdo)
+{
+	struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
+	u8 old_state;
+
+	mutex_lock(&dp->lock);
+
+	old_state = dp->state;
+	dp->data.status = vdo;
+
+	if (old_state != DP_STATE_IDLE)
+		dev_warn(&alt->dev, "ATTENTION while processing state %d\n",
+			 old_state);
+
+	if (dp_altmode_status_update(dp))
+		dev_warn(&alt->dev, "%s: status update failed\n", __func__);
+
+	if (dp_altmode_notify(dp))
+		dev_err(&alt->dev, "%s: notification failed\n", __func__);
+
+	if (old_state == DP_STATE_IDLE && dp->state != DP_STATE_IDLE)
+		schedule_work(&dp->work);
+
+	mutex_unlock(&dp->lock);
+}
+
+static int dp_altmode_vdm(struct typec_altmode *alt,
+			  const u32 hdr, const u32 *vdo, int count)
+{
+	struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
+	int cmd_type = PD_VDO_CMDT(hdr);
+	int cmd = PD_VDO_CMD(hdr);
+	int ret = 0;
+
+	mutex_lock(&dp->lock);
+
+	if (dp->state != DP_STATE_IDLE) {
+		ret = -EBUSY;
+		goto err_unlock;
+	}
+
+	switch (cmd_type) {
+	case CMDT_RSP_ACK:
+		switch (cmd) {
+		case CMD_ENTER_MODE:
+			dp->state = DP_STATE_UPDATE;
+			break;
+		case CMD_EXIT_MODE:
+			dp->data.status = 0;
+			dp->data.conf = 0;
+			break;
+		case DP_CMD_STATUS_UPDATE:
+			dp->data.status = *vdo;
+			ret = dp_altmode_status_update(dp);
+			break;
+		case DP_CMD_CONFIGURE:
+			ret = dp_altmode_configured(dp);
+			break;
+		default:
+			break;
+		}
+		break;
+	case CMDT_RSP_NAK:
+		switch (cmd) {
+		case DP_CMD_CONFIGURE:
+			dp->data.conf = 0;
+			ret = dp_altmode_configured(dp);
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (dp->state != DP_STATE_IDLE)
+		schedule_work(&dp->work);
+
+err_unlock:
+	mutex_unlock(&dp->lock);
+	return ret;
+}
+
+static int dp_altmode_activate(struct typec_altmode *alt, int activate)
+{
+	return activate ? typec_altmode_enter(alt) : typec_altmode_exit(alt);
+}
+
+static const struct typec_altmode_ops dp_altmode_ops = {
+	.attention = dp_altmode_attention,
+	.vdm = dp_altmode_vdm,
+	.activate = dp_altmode_activate,
+};
+
+static const char * const configurations[] = {
+	[DP_CONF_USB]	= "USB",
+	[DP_CONF_DFP_D]	= "source",
+	[DP_CONF_UFP_D]	= "sink",
+};
+
+static ssize_t
+configuration_store(struct device *dev, struct device_attribute *attr,
+		    const char *buf, size_t size)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	u32 conf;
+	u32 cap;
+	int con;
+	int ret;
+
+	con = sysfs_match_string(configurations, buf);
+	if (con < 0)
+		return con;
+
+	mutex_lock(&dp->lock);
+
+	if (dp->state != DP_STATE_IDLE) {
+		ret = -EBUSY;
+		goto err_unlock;
+	}
+
+	cap = DP_CAP_CAPABILITY(dp->alt->vdo);
+
+	if ((con == DP_CONF_DFP_D && !(cap & DP_CAP_DFP_D)) ||
+	    (con == DP_CONF_UFP_D && !(cap & DP_CAP_UFP_D)))
+		return -EINVAL;
+
+	conf = dp->data.conf & ~DP_CONF_DUAL_D;
+	conf |= con;
+
+	if (dp->alt->active) {
+		ret = dp_altmode_configure_vdm(dp, conf);
+		if (ret)
+			goto err_unlock;
+	}
+
+	dp->data.conf = conf;
+
+err_unlock:
+	mutex_unlock(&dp->lock);
+
+	return ret ? ret : size;
+}
+
+static ssize_t configuration_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	int len;
+	u8 cap;
+	u8 cur;
+	int i;
+
+	mutex_lock(&dp->lock);
+
+	cap = DP_CAP_CAPABILITY(dp->alt->vdo);
+	cur = DP_CONF_CURRENTLY(dp->data.conf);
+
+	len = sprintf(buf, "%s ", cur ? "USB" : "[USB]");
+
+	for (i = 1; i < ARRAY_SIZE(configurations); i++) {
+		if (i == cur)
+			len += sprintf(buf + len, "[%s] ", configurations[i]);
+		else if ((i == DP_CONF_DFP_D && cap & DP_CAP_DFP_D) ||
+			 (i == DP_CONF_UFP_D && cap & DP_CAP_UFP_D))
+			len += sprintf(buf + len, "%s ", configurations[i]);
+	}
+
+	mutex_unlock(&dp->lock);
+
+	buf[len - 1] = '\n';
+	return len;
+}
+static DEVICE_ATTR_RW(configuration);
+
+static const char * const pin_assignments[] = {
+	[DP_PIN_ASSIGN_A] = "A",
+	[DP_PIN_ASSIGN_B] = "B",
+	[DP_PIN_ASSIGN_C] = "C",
+	[DP_PIN_ASSIGN_D] = "D",
+	[DP_PIN_ASSIGN_E] = "E",
+	[DP_PIN_ASSIGN_F] = "F",
+};
+
+static ssize_t
+pin_assignment_store(struct device *dev, struct device_attribute *attr,
+		     const char *buf, size_t size)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	u8 assignments;
+	u32 conf;
+	int ret;
+
+	ret = sysfs_match_string(pin_assignments, buf);
+	if (ret < 0)
+		return ret;
+
+	conf = DP_CONF_SET_PIN_ASSIGN(BIT(ret));
+	ret = 0;
+
+	mutex_lock(&dp->lock);
+
+	if (conf & dp->data.conf)
+		goto out_unlock;
+
+	if (dp->state != DP_STATE_IDLE) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
+		assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
+	else
+		assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
+
+	if (!(DP_CONF_GET_PIN_ASSIGN(conf) & assignments)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	conf |= dp->data.conf & ~DP_CONF_PIN_ASSIGNEMENT_MASK;
+
+	/* Only send Configure command if a configuration has been set */
+	if (dp->alt->active && DP_CONF_CURRENTLY(dp->data.conf)) {
+		ret = dp_altmode_configure_vdm(dp, conf);
+		if (ret)
+			goto out_unlock;
+	}
+
+	dp->data.conf = conf;
+
+out_unlock:
+	mutex_unlock(&dp->lock);
+
+	return ret ? ret : size;
+}
+
+static ssize_t pin_assignment_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	u8 assignments;
+	int len = 0;
+	u8 cur;
+	int i;
+
+	mutex_lock(&dp->lock);
+
+	cur = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf));
+
+	if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
+		assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
+	else
+		assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
+
+	for (i = 0; assignments; assignments >>= 1, i++) {
+		if (assignments & 1) {
+			if (i == cur)
+				len += sprintf(buf + len, "[%s] ",
+					       pin_assignments[i]);
+			else
+				len += sprintf(buf + len, "%s ",
+					       pin_assignments[i]);
+		}
+	}
+
+	mutex_unlock(&dp->lock);
+
+	buf[len - 1] = '\n';
+	return len;
+}
+static DEVICE_ATTR_RW(pin_assignment);
+
+static struct attribute *dp_altmode_attrs[] = {
+	&dev_attr_configuration.attr,
+	&dev_attr_pin_assignment.attr,
+	NULL
+};
+
+static const struct attribute_group dp_altmode_group = {
+	.name = "displayport",
+	.attrs = dp_altmode_attrs,
+};
+
+static int dp_altmode_probe(struct typec_altmode *alt)
+{
+	const struct typec_altmode *port = typec_altmode_get_partner(alt);
+	struct dp_altmode *dp;
+	int ret;
+
+	/* FIXME: Port can only be DFP_U. */
+
+	/* Make sure we have compatiple pin configurations */
+	if (!(DP_CAP_DFP_D_PIN_ASSIGN(port->vdo) &
+	      DP_CAP_UFP_D_PIN_ASSIGN(alt->vdo)) &&
+	    !(DP_CAP_UFP_D_PIN_ASSIGN(port->vdo) &
+	      DP_CAP_DFP_D_PIN_ASSIGN(alt->vdo)))
+		return -ENODEV;
+
+	ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
+	if (ret)
+		return ret;
+
+	dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
+	if (!dp)
+		return -ENOMEM;
+
+	INIT_WORK(&dp->work, dp_altmode_work);
+	mutex_init(&dp->lock);
+	dp->port = port;
+	dp->alt = alt;
+
+	alt->desc = "DisplayPort";
+	alt->ops = &dp_altmode_ops;
+
+	typec_altmode_set_drvdata(alt, dp);
+
+	dp->state = DP_STATE_ENTER;
+	schedule_work(&dp->work);
+
+	return 0;
+}
+
+static void dp_altmode_remove(struct typec_altmode *alt)
+{
+	struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
+
+	sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group);
+	cancel_work_sync(&dp->work);
+}
+
+static const struct typec_device_id dp_typec_id[] = {
+	{ USB_TYPEC_DP_SID, USB_TYPEC_DP_MODE },
+	{ },
+};
+MODULE_DEVICE_TABLE(typec, dp_typec_id);
+
+static struct typec_altmode_driver dp_altmode_driver = {
+	.id_table = dp_typec_id,
+	.probe = dp_altmode_probe,
+	.remove = dp_altmode_remove,
+	.driver = {
+		.name = "typec_displayport",
+		.owner = THIS_MODULE,
+	},
+};
+module_typec_altmode_driver(dp_altmode_driver);
+
+MODULE_AUTHOR("Heikki Krogerus <heikki.krogerus@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DisplayPort Alternate Mode");
diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
new file mode 100644
index 000000000000..55ae781d60a9
--- /dev/null
+++ b/include/linux/usb/typec_dp.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __USB_TYPEC_DP_H
+#define __USB_TYPEC_DP_H
+
+#include <linux/usb/typec_altmode.h>
+
+#define USB_TYPEC_DP_SID	0xff01
+#define USB_TYPEC_DP_MODE	1
+
+/*
+ * Connector states matching the pin assignments in DisplayPort Alt Mode
+ * Specification.
+ *
+ * These values are meant primarily to be used by the mux drivers, but they are
+ * also used as the "value" part in the alternate mode notification chain, so
+ * receivers of those notifications will always see them.
+ *
+ * Note. DisplayPort USB Type-C Alt Mode Specification version 1.0b deprecated
+ * pin assignments A, B and F, but they are still defined here for legacy
+ * purposes.
+ */
+enum {
+	TYPEC_DP_STATE_A = TYPEC_STATE_MODAL,	/* Not supported after v1.0b */
+	TYPEC_DP_STATE_B,			/* Not supported after v1.0b */
+	TYPEC_DP_STATE_C,
+	TYPEC_DP_STATE_D,
+	TYPEC_DP_STATE_E,
+	TYPEC_DP_STATE_F,			/* Not supported after v1.0b */
+};
+
+/*
+ * struct typec_displayport_data - DisplayPort Alt Mode specific data
+ * @status: Status Update command VDO content
+ * @conf: Configure command VDO content
+ *
+ * This structure is delivered as the data part with the notifications. It
+ * contains the VDOs from the two DisplayPort Type-C alternate mode specific
+ * commands: Status Update and Configure.
+ *
+ * @status will show for example the status of the HPD signal.
+ */
+struct typec_displayport_data {
+	u32 status;
+	u32 conf;
+};
+
+enum {
+	DP_PIN_ASSIGN_A, /* Not supported after v1.0b */
+	DP_PIN_ASSIGN_B, /* Not supported after v1.0b */
+	DP_PIN_ASSIGN_C,
+	DP_PIN_ASSIGN_D,
+	DP_PIN_ASSIGN_E,
+	DP_PIN_ASSIGN_F, /* Not supported after v1.0b */
+};
+
+/* DisplayPort alt mode specific commands */
+#define DP_CMD_STATUS_UPDATE		VDO_CMD_VENDOR(0)
+#define DP_CMD_CONFIGURE		VDO_CMD_VENDOR(1)
+
+/* DisplayPort Capabilities VDO bits (returned with Discover Modes) */
+#define DP_CAP_CAPABILITY(_cap_)	((_cap_) & 3)
+#define   DP_CAP_UFP_D			1
+#define   DP_CAP_DFP_D			2
+#define   DP_CAP_DFP_D_AND_UFP_D	3
+#define DP_CAP_DP_SIGNALING		BIT(2) /* Always set */
+#define DP_CAP_GEN2			BIT(3) /* Reserved after v1.0b */
+#define DP_CAP_RECEPTACLE		BIT(6)
+#define DP_CAP_USB			BIT(7)
+#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_)	(((_cap_) & GENMASK(15, 8)) >> 8)
+#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_)	(((_cap_) & GENMASK(23, 16)) >> 16)
+
+/* DisplayPort Status Update VDO bits */
+#define DP_STATUS_CONNECTION(_status_)	((_status_) & 3)
+#define   DP_STATUS_CON_DISABLED	0
+#define   DP_STATUS_CON_DFP_D		1
+#define   DP_STATUS_CON_UFP_D		2
+#define   DP_STATUS_CON_BOTH		3
+#define DP_STATUS_POWER_LOW		BIT(2)
+#define DP_STATUS_ENABLED		BIT(3)
+#define DP_STATUS_PREFER_MULTI_FUNC	BIT(4)
+#define DP_STATUS_SWITCH_TO_USB		BIT(5)
+#define DP_STATUS_EXIT_DP_MODE		BIT(6)
+#define DP_STATUS_HPD_STATE		BIT(7) /* 0 = HPD_Low, 1 = HPD_High */
+#define DP_STATUS_IRQ_HPD		BIT(8)
+
+/* DisplayPort Configurations VDO bits */
+#define DP_CONF_CURRENTLY(_conf_)	((_conf_) & 3)
+#define DP_CONF_UFP_U_AS_DFP_D		BIT(0)
+#define DP_CONF_UFP_U_AS_UFP_D		BIT(1)
+#define DP_CONF_SIGNALING_DP		BIT(2)
+#define DP_CONF_SIGNALING_GEN_2		BIT(3) /* Reserved after v1.0b */
+#define DP_CONF_PIN_ASSIGNEMENT_SHIFT	8
+#define DP_CONF_PIN_ASSIGNEMENT_MASK	GENMASK(15, 8)
+
+#endif /* __USB_TYPEC_DP_H */
-- 
cgit v1.2.3


From e9576fe8e605c4413beb91b290b8a473985710de Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:53 +0300
Subject: usb: typec: tcpm: Support for Alternate Modes

This adds more complete handling of VDMs and registration of
partner alternate modes, and introduces callbacks for
alternate mode operations.

Only DFP role is supported for now.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm.c | 165 ++++++++++++++++++++++++++++++++++++++---------
 include/linux/usb/tcpm.h |   9 ---
 2 files changed, 135 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index 6b57e7132e64..c732fd703961 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -26,7 +26,7 @@
 #include <linux/usb/pd_vdo.h>
 #include <linux/usb/role.h>
 #include <linux/usb/tcpm.h>
-#include <linux/usb/typec.h>
+#include <linux/usb/typec_altmode.h>
 #include <linux/workqueue.h>
 
 #define FOREACH_STATE(S)			\
@@ -169,13 +169,14 @@ enum pd_msg_request {
 /* Alternate mode support */
 
 #define SVID_DISCOVERY_MAX	16
+#define ALTMODE_DISCOVERY_MAX	(SVID_DISCOVERY_MAX * MODE_DISCOVERY_MAX)
 
 struct pd_mode_data {
 	int svid_index;		/* current SVID index		*/
 	int nsvids;
 	u16 svids[SVID_DISCOVERY_MAX];
 	int altmodes;		/* number of alternate modes	*/
-	struct typec_altmode_desc altmode_desc[SVID_DISCOVERY_MAX];
+	struct typec_altmode_desc altmode_desc[ALTMODE_DISCOVERY_MAX];
 };
 
 struct pd_pps_data {
@@ -310,8 +311,8 @@ struct tcpm_port {
 
 	/* Alternate mode data */
 	struct pd_mode_data mode_data;
-	struct typec_altmode *partner_altmode[SVID_DISCOVERY_MAX * 6];
-	struct typec_altmode *port_altmode[SVID_DISCOVERY_MAX * 6];
+	struct typec_altmode *partner_altmode[ALTMODE_DISCOVERY_MAX];
+	struct typec_altmode *port_altmode[ALTMODE_DISCOVERY_MAX];
 
 	/* Deadline in jiffies to exit src_try_wait state */
 	unsigned long max_wait;
@@ -641,14 +642,14 @@ void tcpm_pd_transmit_complete(struct tcpm_port *port,
 }
 EXPORT_SYMBOL_GPL(tcpm_pd_transmit_complete);
 
-static int tcpm_mux_set(struct tcpm_port *port, enum tcpc_mux_mode mode,
+static int tcpm_mux_set(struct tcpm_port *port, int state,
 			enum usb_role usb_role,
 			enum typec_orientation orientation)
 {
 	int ret;
 
-	tcpm_log(port, "Requesting mux mode %d, usb-role %d, orientation %d",
-		 mode, usb_role, orientation);
+	tcpm_log(port, "Requesting mux state %d, usb-role %d, orientation %d",
+		 state, usb_role, orientation);
 
 	ret = typec_set_orientation(port->typec_port, orientation);
 	if (ret)
@@ -660,7 +661,7 @@ static int tcpm_mux_set(struct tcpm_port *port, enum tcpc_mux_mode mode,
 			return ret;
 	}
 
-	return typec_set_mode(port->typec_port, mode);
+	return typec_set_mode(port->typec_port, state);
 }
 
 static int tcpm_set_polarity(struct tcpm_port *port,
@@ -787,7 +788,7 @@ static int tcpm_set_roles(struct tcpm_port *port, bool attached,
 	else
 		usb_role = USB_ROLE_DEVICE;
 
-	ret = tcpm_mux_set(port, TYPEC_MUX_USB, usb_role, orientation);
+	ret = tcpm_mux_set(port, TYPEC_STATE_USB, usb_role, orientation);
 	if (ret < 0)
 		return ret;
 
@@ -1014,36 +1015,57 @@ static void svdm_consume_modes(struct tcpm_port *port, const __le32 *payload,
 			 pmdata->altmodes, paltmode->svid,
 			 paltmode->mode, paltmode->vdo);
 
-		port->partner_altmode[pmdata->altmodes] =
-			typec_partner_register_altmode(port->partner, paltmode);
-		if (!port->partner_altmode[pmdata->altmodes]) {
-			tcpm_log(port,
-				 "Failed to register modes for SVID 0x%04x",
-				 paltmode->svid);
-			return;
-		}
 		pmdata->altmodes++;
 	}
 }
 
+static void tcpm_register_partner_altmodes(struct tcpm_port *port)
+{
+	struct pd_mode_data *modep = &port->mode_data;
+	struct typec_altmode *altmode;
+	int i;
+
+	for (i = 0; i < modep->altmodes; i++) {
+		altmode = typec_partner_register_altmode(port->partner,
+						&modep->altmode_desc[i]);
+		if (!altmode)
+			tcpm_log(port, "Failed to register partner SVID 0x%04x",
+				 modep->altmode_desc[i].svid);
+		port->partner_altmode[i] = altmode;
+	}
+}
+
 #define supports_modal(port)	PD_IDH_MODAL_SUPP((port)->partner_ident.id_header)
 
 static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 			u32 *response)
 {
-	u32 p0 = le32_to_cpu(payload[0]);
-	int cmd_type = PD_VDO_CMDT(p0);
-	int cmd = PD_VDO_CMD(p0);
+	struct typec_altmode *adev;
+	struct typec_altmode *pdev;
 	struct pd_mode_data *modep;
+	u32 p[PD_MAX_PAYLOAD];
 	int rlen = 0;
-	u16 svid;
+	int cmd_type;
+	int cmd;
 	int i;
 
+	for (i = 0; i < cnt; i++)
+		p[i] = le32_to_cpu(payload[i]);
+
+	cmd_type = PD_VDO_CMDT(p[0]);
+	cmd = PD_VDO_CMD(p[0]);
+
 	tcpm_log(port, "Rx VDM cmd 0x%x type %d cmd %d len %d",
-		 p0, cmd_type, cmd, cnt);
+		 p[0], cmd_type, cmd, cnt);
 
 	modep = &port->mode_data;
 
+	adev = typec_match_altmode(port->port_altmode, ALTMODE_DISCOVERY_MAX,
+				   PD_VDO_VID(p[0]), PD_VDO_OPOS(p[0]));
+
+	pdev = typec_match_altmode(port->partner_altmode, ALTMODE_DISCOVERY_MAX,
+				   PD_VDO_VID(p[0]), PD_VDO_OPOS(p[0]));
+
 	switch (cmd_type) {
 	case CMDT_INIT:
 		switch (cmd) {
@@ -1065,17 +1087,19 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 		case CMD_EXIT_MODE:
 			break;
 		case CMD_ATTENTION:
-			break;
+			/* Attention command does not have response */
+			typec_altmode_attention(adev, p[1]);
+			return 0;
 		default:
 			break;
 		}
 		if (rlen >= 1) {
-			response[0] = p0 | VDO_CMDT(CMDT_RSP_ACK);
+			response[0] = p[0] | VDO_CMDT(CMDT_RSP_ACK);
 		} else if (rlen == 0) {
-			response[0] = p0 | VDO_CMDT(CMDT_RSP_NAK);
+			response[0] = p[0] | VDO_CMDT(CMDT_RSP_NAK);
 			rlen = 1;
 		} else {
-			response[0] = p0 | VDO_CMDT(CMDT_RSP_BUSY);
+			response[0] = p[0] | VDO_CMDT(CMDT_RSP_BUSY);
 			rlen = 1;
 		}
 		break;
@@ -1108,14 +1132,39 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 			svdm_consume_modes(port, payload, cnt);
 			modep->svid_index++;
 			if (modep->svid_index < modep->nsvids) {
-				svid = modep->svids[modep->svid_index];
+				u16 svid = modep->svids[modep->svid_index];
 				response[0] = VDO(svid, 1, CMD_DISCOVER_MODES);
 				rlen = 1;
 			} else {
-				/* enter alternate mode if/when implemented */
+				tcpm_register_partner_altmodes(port);
 			}
 			break;
 		case CMD_ENTER_MODE:
+			typec_altmode_update_active(pdev, true);
+
+			if (typec_altmode_vdm(adev, p[0], &p[1], cnt)) {
+				response[0] = VDO(adev->svid, 1, CMD_EXIT_MODE);
+				response[0] |= VDO_OPOS(adev->mode);
+				return 1;
+			}
+			return 0;
+		case CMD_EXIT_MODE:
+			typec_altmode_update_active(pdev, false);
+
+			/* Back to USB Operation */
+			WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB,
+						     NULL));
+			break;
+		default:
+			break;
+		}
+		break;
+	case CMDT_RSP_NAK:
+		switch (cmd) {
+		case CMD_ENTER_MODE:
+			/* Back to USB Operation */
+			WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB,
+						     NULL));
 			break;
 		default:
 			break;
@@ -1125,6 +1174,9 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 		break;
 	}
 
+	/* Informing the alternate mode drivers about everything */
+	typec_altmode_vdm(adev, p[0], &p[1], cnt);
+
 	return rlen;
 }
 
@@ -1408,6 +1460,57 @@ static int tcpm_validate_caps(struct tcpm_port *port, const u32 *pdo,
 	return 0;
 }
 
+static int tcpm_altmode_enter(struct typec_altmode *altmode)
+{
+	struct tcpm_port *port = typec_altmode_get_drvdata(altmode);
+	u32 header;
+
+	mutex_lock(&port->lock);
+	header = VDO(altmode->svid, 1, CMD_ENTER_MODE);
+	header |= VDO_OPOS(altmode->mode);
+
+	tcpm_queue_vdm(port, header, NULL, 0);
+	mod_delayed_work(port->wq, &port->vdm_state_machine, 0);
+	mutex_unlock(&port->lock);
+
+	return 0;
+}
+
+static int tcpm_altmode_exit(struct typec_altmode *altmode)
+{
+	struct tcpm_port *port = typec_altmode_get_drvdata(altmode);
+	u32 header;
+
+	mutex_lock(&port->lock);
+	header = VDO(altmode->svid, 1, CMD_EXIT_MODE);
+	header |= VDO_OPOS(altmode->mode);
+
+	tcpm_queue_vdm(port, header, NULL, 0);
+	mod_delayed_work(port->wq, &port->vdm_state_machine, 0);
+	mutex_unlock(&port->lock);
+
+	return 0;
+}
+
+static int tcpm_altmode_vdm(struct typec_altmode *altmode,
+			    u32 header, const u32 *data, int count)
+{
+	struct tcpm_port *port = typec_altmode_get_drvdata(altmode);
+
+	mutex_lock(&port->lock);
+	tcpm_queue_vdm(port, header, data, count - 1);
+	mod_delayed_work(port->wq, &port->vdm_state_machine, 0);
+	mutex_unlock(&port->lock);
+
+	return 0;
+}
+
+static const struct typec_altmode_ops tcpm_altmode_ops = {
+	.enter = tcpm_altmode_enter,
+	.exit = tcpm_altmode_exit,
+	.vdm = tcpm_altmode_vdm,
+};
+
 /*
  * PD (data, control) command handling functions
  */
@@ -2539,7 +2642,7 @@ out_disable_vconn:
 out_disable_pd:
 	port->tcpc->set_pd_rx(port->tcpc, false);
 out_disable_mux:
-	tcpm_mux_set(port, TYPEC_MUX_NONE, USB_ROLE_NONE,
+	tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE,
 		     TYPEC_ORIENTATION_NONE);
 	return ret;
 }
@@ -2585,7 +2688,7 @@ static void tcpm_reset_port(struct tcpm_port *port)
 	tcpm_init_vconn(port);
 	tcpm_set_current_limit(port, 0, 0);
 	tcpm_set_polarity(port, TYPEC_POLARITY_CC1);
-	tcpm_mux_set(port, TYPEC_MUX_NONE, USB_ROLE_NONE,
+	tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE,
 		     TYPEC_ORIENTATION_NONE);
 	tcpm_set_attached_state(port, false);
 	port->try_src_count = 0;
@@ -4706,6 +4809,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 					 dev_name(dev), paltmode->svid);
 				break;
 			}
+			typec_altmode_set_drvdata(alt, port);
+			alt->ops = &tcpm_altmode_ops;
 			port->port_altmode[i] = alt;
 			i++;
 			paltmode++;
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index 193920a2e05f..7e7fbfb84e8e 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -98,15 +98,6 @@ struct tcpc_config {
 #define TCPC_MUX_DP_ENABLED		BIT(1)	/* DP enabled */
 #define TCPC_MUX_POLARITY_INVERTED	BIT(2)	/* Polarity inverted */
 
-/* Mux modes, decoded to attributes */
-enum tcpc_mux_mode {
-	TYPEC_MUX_NONE	= 0,				/* Open switch */
-	TYPEC_MUX_USB	= TCPC_MUX_USB_ENABLED,		/* USB only */
-	TYPEC_MUX_DP	= TCPC_MUX_DP_ENABLED,		/* DP only */
-	TYPEC_MUX_DOCK	= TCPC_MUX_USB_ENABLED |	/* Both USB and DP */
-			  TCPC_MUX_DP_ENABLED,
-};
-
 /**
  * struct tcpc_dev - Port configuration and callback functions
  * @config:	Pointer to port configuration
-- 
cgit v1.2.3


From 60a866685006bae0c3db20e4bae31887eb452ff1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 26 Jun 2018 06:49:08 -0300
Subject: gpio.h: fix location of gpio legacy documentation

The location of this doc file was moved. Change its reference
accordingly.

Fixes: 7ee2c13080c9 ("Documentation: gpio: Move legacy documentation to driver-api")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 91ed23468530..39745b8bdd65 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -14,7 +14,7 @@
 
 #include <linux/errno.h>
 
-/* see Documentation/gpio/gpio-legacy.txt */
+/* see Documentation/driver-api/gpio/legacy.rst */
 
 /* make these flag values available regardless of GPIO kconfig options */
 #define GPIOF_DIR_OUT	(0 << 0)
-- 
cgit v1.2.3


From dc8fbeb0ffde1f2395449006019e2c89c177df50 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Fri, 22 Jun 2018 00:41:26 +0200
Subject: ARM: OMAP1: Get rid of <mach/ams-delta-fiq.h>

Split the header file into two parts and move them to directories where
they belong.

Information on internal structure of FIQ buffer is moved to
<linux/platform_data/ams-delta-fiq.h> for ams-delta-serio driver use.

Other information used by ams-delta board init file and FIQ code is
made local to mach-omap1 root directory.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 MAINTAINERS                                      |  1 +
 arch/arm/mach-omap1/ams-delta-fiq-handler.S      |  5 +-
 arch/arm/mach-omap1/ams-delta-fiq.c              |  7 ++-
 arch/arm/mach-omap1/ams-delta-fiq.h              | 41 ++++++++++++
 arch/arm/mach-omap1/board-ams-delta.c            |  2 +-
 arch/arm/mach-omap1/include/mach/ams-delta-fiq.h | 79 ------------------------
 drivers/input/serio/ams_delta_serio.c            |  3 +-
 include/linux/platform_data/ams-delta-fiq.h      | 62 +++++++++++++++++++
 8 files changed, 113 insertions(+), 87 deletions(-)
 create mode 100644 arch/arm/mach-omap1/ams-delta-fiq.h
 delete mode 100644 arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
 create mode 100644 include/linux/platform_data/ams-delta-fiq.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d5eeff51b5f..c78b7dc42a81 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10391,6 +10391,7 @@ F:	arch/arm/plat-omap/
 F:	arch/arm/configs/omap1_defconfig
 F:	drivers/i2c/busses/i2c-omap.c
 F:	include/linux/platform_data/i2c-omap.h
+F:	include/linux/platform_data/ams-delta-fiq.h
 
 OMAP2+ SUPPORT
 M:	Tony Lindgren <tony@atomide.com>
diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
index bf608441b357..ddc27638ba2a 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S
+++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
@@ -14,11 +14,12 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/assembler.h>
+#include <linux/platform_data/ams-delta-fiq.h>
 
+#include <asm/assembler.h>
 #include <mach/board-ams-delta.h>
-#include <mach/ams-delta-fiq.h>
 
+#include "ams-delta-fiq.h"
 #include "iomap.h"
 #include "soc.h"
 
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 5a6c59ac9b5f..e72935034d42 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -19,12 +19,13 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/platform_data/ams-delta-fiq.h>
 
 #include <mach/board-ams-delta.h>
 
 #include <asm/fiq.h>
 
-#include <mach/ams-delta-fiq.h>
+#include "ams-delta-fiq.h"
 
 static struct fiq_handler fh = {
 	.name	= "ams-delta-fiq"
@@ -35,8 +36,8 @@ static struct fiq_handler fh = {
  * The FIQ and IRQ isrs can both read and write it.
  * It is structured as a header section several 32bit slots,
  * followed by the circular buffer where the FIQ isr stores
- * keystrokes received from the qwerty keyboard.
- * See ams-delta-fiq.h for details of offsets.
+ * keystrokes received from the qwerty keyboard.  See
+ * <linux/platform_data/ams-delta-fiq.h> for details of offsets.
  */
 unsigned int fiq_buffer[1024];
 EXPORT_SYMBOL(fiq_buffer);
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.h b/arch/arm/mach-omap1/ams-delta-fiq.h
new file mode 100644
index 000000000000..3f691d68aa62
--- /dev/null
+++ b/arch/arm/mach-omap1/ams-delta-fiq.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * arch/arm/mach-omap1/ams-delta-fiq.h
+ *
+ * Taken from the original Amstrad modifications to fiq.h
+ *
+ * Copyright (c) 2004 Amstrad Plc
+ * Copyright (c) 2006 Matt Callow
+ * Copyright (c) 2010 Janusz Krzysztofik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __AMS_DELTA_FIQ_H
+#define __AMS_DELTA_FIQ_H
+
+#include <mach/irqs.h>
+
+/*
+ * Interrupt number used for passing control from FIQ to IRQ.
+ * IRQ12, described as reserved, has been selected.
+ */
+#define INT_DEFERRED_FIQ	INT_1510_RES12
+/*
+ * Base address of an interrupt handler that the INT_DEFERRED_FIQ belongs to.
+ */
+#if (INT_DEFERRED_FIQ < IH2_BASE)
+#define DEFERRED_FIQ_IH_BASE	OMAP_IH1_BASE
+#else
+#define DEFERRED_FIQ_IH_BASE	OMAP_IH2_BASE
+#endif
+
+#ifndef __ASSEMBLER__
+extern unsigned char qwerty_fiqin_start, qwerty_fiqin_end;
+
+extern void __init ams_delta_init_fiq(struct gpio_chip *chip);
+#endif
+
+#endif
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index e937372e704a..902c24cc8508 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -41,10 +41,10 @@
 #include <mach/mux.h>
 
 #include <mach/hardware.h>
-#include <mach/ams-delta-fiq.h>
 #include "camera.h"
 #include <mach/usb.h>
 
+#include "ams-delta-fiq.h"
 #include "iomap.h"
 #include "common.h"
 
diff --git a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
deleted file mode 100644
index a9769ff396bc..000000000000
--- a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * arch/arm/mach-omap1/include/ams-delta-fiq.h
- *
- * Taken from the original Amstrad modifications to fiq.h
- *
- * Copyright (c) 2004 Amstrad Plc
- * Copyright (c) 2006 Matt Callow
- * Copyright (c) 2010 Janusz Krzysztofik
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __AMS_DELTA_FIQ_H
-#define __AMS_DELTA_FIQ_H
-
-#include <mach/irqs.h>
-
-/*
- * Interrupt number used for passing control from FIQ to IRQ.
- * IRQ12, described as reserved, has been selected.
- */
-#define INT_DEFERRED_FIQ	INT_1510_RES12
-/*
- * Base address of an interrupt handler that the INT_DEFERRED_FIQ belongs to.
- */
-#if (INT_DEFERRED_FIQ < IH2_BASE)
-#define DEFERRED_FIQ_IH_BASE	OMAP_IH1_BASE
-#else
-#define DEFERRED_FIQ_IH_BASE	OMAP_IH2_BASE
-#endif
-
-/*
- * These are the offsets from the beginning of the fiq_buffer. They are put here
- * since the buffer and header need to be accessed by drivers servicing devices
- * which generate GPIO interrupts - e.g. keyboard, modem, hook switch.
- */
-#define FIQ_MASK		 0
-#define FIQ_STATE		 1
-#define FIQ_KEYS_CNT		 2
-#define FIQ_TAIL_OFFSET		 3
-#define FIQ_HEAD_OFFSET		 4
-#define FIQ_BUF_LEN		 5
-#define FIQ_KEY			 6
-#define FIQ_MISSED_KEYS		 7
-#define FIQ_BUFFER_START	 8
-#define FIQ_GPIO_INT_MASK	 9
-#define FIQ_KEYS_HICNT		10
-#define FIQ_IRQ_PEND		11
-#define FIQ_SIR_CODE_L1		12
-#define IRQ_SIR_CODE_L2		13
-
-#define FIQ_CNT_INT_00		14
-#define FIQ_CNT_INT_KEY		15
-#define FIQ_CNT_INT_MDM		16
-#define FIQ_CNT_INT_03		17
-#define FIQ_CNT_INT_HSW		18
-#define FIQ_CNT_INT_05		19
-#define FIQ_CNT_INT_06		20
-#define FIQ_CNT_INT_07		21
-#define FIQ_CNT_INT_08		22
-#define FIQ_CNT_INT_09		23
-#define FIQ_CNT_INT_10		24
-#define FIQ_CNT_INT_11		25
-#define FIQ_CNT_INT_12		26
-#define FIQ_CNT_INT_13		27
-#define FIQ_CNT_INT_14		28
-#define FIQ_CNT_INT_15		29
-
-#define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
-
-#ifndef __ASSEMBLER__
-extern unsigned int fiq_buffer[];
-extern unsigned char qwerty_fiqin_start, qwerty_fiqin_end;
-
-extern void __init ams_delta_init_fiq(struct gpio_chip *chip);
-#endif
-
-#endif
diff --git a/drivers/input/serio/ams_delta_serio.c b/drivers/input/serio/ams_delta_serio.c
index 7952a29f9540..2602f7cff5ae 100644
--- a/drivers/input/serio/ams_delta_serio.c
+++ b/drivers/input/serio/ams_delta_serio.c
@@ -22,6 +22,7 @@
  */
 #include <linux/gpio.h>
 #include <linux/irq.h>
+#include <linux/platform_data/ams-delta-fiq.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/serio.h>
@@ -30,8 +31,6 @@
 
 #include <mach/board-ams-delta.h>
 
-#include <mach/ams-delta-fiq.h>
-
 #define DRIVER_NAME	"ams-delta-serio"
 
 MODULE_AUTHOR("Matt Callow");
diff --git a/include/linux/platform_data/ams-delta-fiq.h b/include/linux/platform_data/ams-delta-fiq.h
new file mode 100644
index 000000000000..dc0f835ea918
--- /dev/null
+++ b/include/linux/platform_data/ams-delta-fiq.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * include/linux/platform_data/ams-delta-fiq.h
+ *
+ * Taken from the original Amstrad modifications to fiq.h
+ *
+ * Copyright (c) 2004 Amstrad Plc
+ * Copyright (c) 2006 Matt Callow
+ * Copyright (c) 2010 Janusz Krzysztofik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_PLATFORM_DATA_AMS_DELTA_FIQ_H
+#define __LINUX_PLATFORM_DATA_AMS_DELTA_FIQ_H
+
+/*
+ * These are the offsets from the beginning of the fiq_buffer. They are put here
+ * since the buffer and header need to be accessed by drivers servicing devices
+ * which generate GPIO interrupts - e.g. keyboard, modem, hook switch.
+ */
+#define FIQ_MASK		 0
+#define FIQ_STATE		 1
+#define FIQ_KEYS_CNT		 2
+#define FIQ_TAIL_OFFSET		 3
+#define FIQ_HEAD_OFFSET		 4
+#define FIQ_BUF_LEN		 5
+#define FIQ_KEY			 6
+#define FIQ_MISSED_KEYS		 7
+#define FIQ_BUFFER_START	 8
+#define FIQ_GPIO_INT_MASK	 9
+#define FIQ_KEYS_HICNT		10
+#define FIQ_IRQ_PEND		11
+#define FIQ_SIR_CODE_L1		12
+#define IRQ_SIR_CODE_L2		13
+
+#define FIQ_CNT_INT_00		14
+#define FIQ_CNT_INT_KEY		15
+#define FIQ_CNT_INT_MDM		16
+#define FIQ_CNT_INT_03		17
+#define FIQ_CNT_INT_HSW		18
+#define FIQ_CNT_INT_05		19
+#define FIQ_CNT_INT_06		20
+#define FIQ_CNT_INT_07		21
+#define FIQ_CNT_INT_08		22
+#define FIQ_CNT_INT_09		23
+#define FIQ_CNT_INT_10		24
+#define FIQ_CNT_INT_11		25
+#define FIQ_CNT_INT_12		26
+#define FIQ_CNT_INT_13		27
+#define FIQ_CNT_INT_14		28
+#define FIQ_CNT_INT_15		29
+
+#define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
+
+#ifndef __ASSEMBLER__
+extern unsigned int fiq_buffer[];
+#endif
+
+#endif
-- 
cgit v1.2.3


From 5f73861fae087df19f7337620da65c99e4260c72 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Fri, 22 Jun 2018 00:41:28 +0200
Subject: Input: ams_delta_serio: Get FIQ buffer from platform_data

Instead of exporting the FIQ buffer symbol to be used in
ams-delta-serio driver, pass it to the driver as platform_data.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/ams-delta-fiq.c         |  6 +++---
 arch/arm/mach-omap1/board-ams-delta.c       |  8 ++++++++
 drivers/input/serio/ams_delta_serio.c       | 20 +++++++++++++-------
 include/linux/platform_data/ams-delta-fiq.h |  4 ----
 4 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 82ca4246a5e4..b0dc7ddf5877 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -40,8 +40,7 @@ static struct fiq_handler fh = {
  * keystrokes received from the qwerty keyboard.  See
  * <linux/platform_data/ams-delta-fiq.h> for details of offsets.
  */
-unsigned int fiq_buffer[1024];
-EXPORT_SYMBOL(fiq_buffer);
+static unsigned int fiq_buffer[1024];
 
 static struct irq_chip *irq_chip;
 static struct irq_data *irq_data[16];
@@ -203,9 +202,10 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip,
 	val = omap_readl(OMAP_IH1_BASE + offset) | 1;
 	omap_writel(val, OMAP_IH1_BASE + offset);
 
-	/* Initialize serio device IRQ resource */
+	/* Initialize serio device IRQ resource and platform_data */
 	serio->resource[0].start = gpiod_to_irq(clk);
 	serio->resource[0].end = serio->resource[0].start;
+	serio->dev.platform_data = fiq_buffer;
 
 	/*
 	 * Since FIQ handler performs handling of GPIO registers for
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index f453b08ed7ef..22f9be297c2a 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -537,6 +537,14 @@ static struct resource ams_delta_serio_resources[] = {
 static struct platform_device ams_delta_serio_device = {
 	.name		= "ams-delta-serio",
 	.id		= PLATFORM_DEVID_NONE,
+	.dev		= {
+		/*
+		 * Initialize .platform_data explicitly with NULL to
+		 * indicate it is going to be used.  It will be replaced
+		 * with FIQ buffer address as soon as FIQ is initialized.
+		 */
+		.platform_data = NULL,
+	},
 	.num_resources	= ARRAY_SIZE(ams_delta_serio_resources),
 	.resource	= ams_delta_serio_resources,
 };
diff --git a/drivers/input/serio/ams_delta_serio.c b/drivers/input/serio/ams_delta_serio.c
index c1f8226f172e..f8663d7891f2 100644
--- a/drivers/input/serio/ams_delta_serio.c
+++ b/drivers/input/serio/ams_delta_serio.c
@@ -37,6 +37,7 @@ MODULE_LICENSE("GPL");
 struct ams_delta_serio {
 	struct serio *serio;
 	struct regulator *vcc;
+	unsigned int *fiq_buffer;
 };
 
 static int check_data(struct serio *serio, int data)
@@ -66,22 +67,23 @@ static int check_data(struct serio *serio, int data)
 static irqreturn_t ams_delta_serio_interrupt(int irq, void *dev_id)
 {
 	struct ams_delta_serio *priv = dev_id;
-	int *circ_buff = &fiq_buffer[FIQ_CIRC_BUFF];
+	int *circ_buff = &priv->fiq_buffer[FIQ_CIRC_BUFF];
 	int data, dfl;
 	u8 scancode;
 
-	fiq_buffer[FIQ_IRQ_PEND] = 0;
+	priv->fiq_buffer[FIQ_IRQ_PEND] = 0;
 
 	/*
 	 * Read data from the circular buffer, check it
 	 * and then pass it on the serio
 	 */
-	while (fiq_buffer[FIQ_KEYS_CNT] > 0) {
+	while (priv->fiq_buffer[FIQ_KEYS_CNT] > 0) {
 
-		data = circ_buff[fiq_buffer[FIQ_HEAD_OFFSET]++];
-		fiq_buffer[FIQ_KEYS_CNT]--;
-		if (fiq_buffer[FIQ_HEAD_OFFSET] == fiq_buffer[FIQ_BUF_LEN])
-			fiq_buffer[FIQ_HEAD_OFFSET] = 0;
+		data = circ_buff[priv->fiq_buffer[FIQ_HEAD_OFFSET]++];
+		priv->fiq_buffer[FIQ_KEYS_CNT]--;
+		if (priv->fiq_buffer[FIQ_HEAD_OFFSET] ==
+		    priv->fiq_buffer[FIQ_BUF_LEN])
+			priv->fiq_buffer[FIQ_HEAD_OFFSET] = 0;
 
 		dfl = check_data(priv->serio, data);
 		scancode = (u8) (data >> 1) & 0xFF;
@@ -116,6 +118,10 @@ static int ams_delta_serio_init(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
+	priv->fiq_buffer = pdev->dev.platform_data;
+	if (!priv->fiq_buffer)
+		return -EINVAL;
+
 	priv->vcc = devm_regulator_get(&pdev->dev, "vcc");
 	if (IS_ERR(priv->vcc)) {
 		err = PTR_ERR(priv->vcc);
diff --git a/include/linux/platform_data/ams-delta-fiq.h b/include/linux/platform_data/ams-delta-fiq.h
index dc0f835ea918..cf4589ccb720 100644
--- a/include/linux/platform_data/ams-delta-fiq.h
+++ b/include/linux/platform_data/ams-delta-fiq.h
@@ -55,8 +55,4 @@
 
 #define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
 
-#ifndef __ASSEMBLER__
-extern unsigned int fiq_buffer[];
-#endif
-
 #endif
-- 
cgit v1.2.3


From d082852f40de5cf55a7a689bf582fced39f5443e Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Fri, 22 Jun 2018 13:32:48 +0800
Subject: ARM: imx: enable bus auto clock gating function for i.mx6sll

i.MX6SLL has HW bus auto clock gating function, enable
it by default to save VDD_SOC_IN power, about 5% ~ 20%
saved depends on different use cases.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/pm-imx6.c                 | 11 +++++++++--
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c
index 791e1fda248e..b08e407d8d96 100644
--- a/arch/arm/mach-imx/pm-imx6.c
+++ b/arch/arm/mach-imx/pm-imx6.c
@@ -648,10 +648,17 @@ void __init imx6dl_pm_init(void)
 
 void __init imx6sl_pm_init(void)
 {
-	if (cpu_is_imx6sl())
+	struct regmap *gpr;
+
+	if (cpu_is_imx6sl()) {
 		imx6_pm_common_init(&imx6sl_pm_data);
-	else
+	} else {
 		imx6_pm_common_init(&imx6sll_pm_data);
+		gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+		if (!IS_ERR(gpr))
+			regmap_update_bits(gpr, IOMUXC_GPR5,
+				IMX6SLL_GPR5_AFCG_X_BYPASS_MASK, 0);
+	}
 }
 
 void __init imx6sx_pm_init(void)
diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index e06f5f79eaef..6c1ad160ed87 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -457,4 +457,7 @@
 #define MCLK_DIR(x) (x == 1 ? IMX6UL_GPR1_SAI1_MCLK_DIR : x == 2 ? \
 		     IMX6UL_GPR1_SAI2_MCLK_DIR : IMX6UL_GPR1_SAI3_MCLK_DIR)
 
+/* For imx6sll iomux gpr register field define */
+#define IMX6SLL_GPR5_AFCG_X_BYPASS_MASK		(0x1f << 11)
+
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
-- 
cgit v1.2.3


From 1cef1150ef40ec52f507436a14230cbc2623299c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jun 2018 11:45:49 +0200
Subject: kthread, sched/core: Fix kthread_parkme() (again...)

Gaurav reports that commit:

  85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue")

isn't working for him. Because of the following race:

> controller Thread                               CPUHP Thread
> takedown_cpu
> kthread_park
> kthread_parkme
> Set KTHREAD_SHOULD_PARK
>                                                 smpboot_thread_fn
>                                                 set Task interruptible
>
>
> wake_up_process
>  if (!(p->state & state))
>                 goto out;
>
>                                                 Kthread_parkme
>                                                 SET TASK_PARKED
>                                                 schedule
>                                                 raw_spin_lock(&rq->lock)
> ttwu_remote
> waiting for __task_rq_lock
>                                                 context_switch
>
>                                                 finish_lock_switch
>
>
>
>                                                 Case TASK_PARKED
>                                                 kthread_park_complete
>
>
> SET Running

Furthermore, Oleg noticed that the whole scheduler TASK_PARKED
handling is buggered because the TASK_DEAD thing is done with
preemption disabled, the current code can still complete early on
preemption :/

So basically revert that earlier fix and go with a variant of the
alternative mentioned in the commit. Promote TASK_PARKED to special
state to avoid the store-store issue on task->state leading to the
WARN in kthread_unpark() -> __kthread_bind().

But in addition, add wait_task_inactive() to kthread_park() to ensure
the task really is PARKED when we return from kthread_park(). This
avoids the whole kthread still gets migrated nonsense -- although it
would be really good to get this done differently.

Reported-by: Gaurav Kohli <gkohli@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kthread.h |  1 -
 include/linux/sched.h   |  2 +-
 kernel/kthread.c        | 30 ++++++++++++++++++++++++------
 kernel/sched/core.c     | 31 +++++++++++--------------------
 4 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2803264c512f..c1961761311d 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_park_complete(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9256118bd40c..43731fe51c97 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,7 +118,7 @@ struct task_group;
  * the comment with set_special_state().
  */
 #define is_special_task_state(state)				\
-	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
 
 #define __set_current_state(state_value)			\
 	do {							\
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 481951bf091d..750cb8082694 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task)
 static void __kthread_parkme(struct kthread *self)
 {
 	for (;;) {
-		set_current_state(TASK_PARKED);
+		/*
+		 * TASK_PARKED is a special state; we must serialize against
+		 * possible pending wakeups to avoid store-store collisions on
+		 * task->state.
+		 *
+		 * Such a collision might possibly result in the task state
+		 * changin from TASK_PARKED and us failing the
+		 * wait_task_inactive() in kthread_park().
+		 */
+		set_special_state(TASK_PARKED);
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
+
+		complete_all(&self->parked);
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -191,11 +202,6 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-void kthread_park_complete(struct task_struct *k)
-{
-	complete_all(&to_kthread(k)->parked);
-}
-
 static int kthread(void *_create)
 {
 	/* Copy data: it's on kthread's stack */
@@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k)
 
 	reinit_completion(&kthread->parked);
 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+	/*
+	 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
+	 */
 	wake_up_state(k, TASK_PARKED);
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
 	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 	if (k != current) {
 		wake_up_process(k);
+		/*
+		 * Wait for __kthread_parkme() to complete(), this means we
+		 * _will_ have TASK_PARKED and are about to call schedule().
+		 */
 		wait_for_completion(&kthread->parked);
+		/*
+		 * Now wait for that schedule() to complete and the task to
+		 * get scheduled out.
+		 */
+		WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
 	}
 
 	return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 22fce36426c0..fe365c9a08e9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,7 +7,6 @@
  */
 #include "sched.h"
 
-#include <linux/kthread.h>
 #include <linux/nospec.h>
 
 #include <linux/kcov.h>
@@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		membarrier_mm_sync_core_before_usermode(mm);
 		mmdrop(mm);
 	}
-	if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
-		switch (prev_state) {
-		case TASK_DEAD:
-			if (prev->sched_class->task_dead)
-				prev->sched_class->task_dead(prev);
+	if (unlikely(prev_state == TASK_DEAD)) {
+		if (prev->sched_class->task_dead)
+			prev->sched_class->task_dead(prev);
 
-			/*
-			 * Remove function-return probe instances associated with this
-			 * task and put them back on the free list.
-			 */
-			kprobe_flush_task(prev);
-
-			/* Task is done with its stack. */
-			put_task_stack(prev);
+		/*
+		 * Remove function-return probe instances associated with this
+		 * task and put them back on the free list.
+		 */
+		kprobe_flush_task(prev);
 
-			put_task_struct(prev);
-			break;
+		/* Task is done with its stack. */
+		put_task_stack(prev);
 
-		case TASK_PARKED:
-			kthread_park_complete(prev);
-			break;
-		}
+		put_task_struct(prev);
 	}
 
 	tick_nohz_task_switch();
-- 
cgit v1.2.3


From 9cf57731b63e37ed995b46690adc604891a9a28f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jun 2018 10:52:03 +0200
Subject: watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work

Oleg suggested to replace the "watchdog/%u" threads with
cpu_stop_work. That removes one thread per CPU while at the same time
fixes softlockup vs SCHED_DEADLINE.

But more importantly, it does away with the single
smpboot_update_cpumask_percpu_thread() user, which allows
cleanups/shrinkage of the smpboot interface.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuhotplug.h |   1 +
 include/linux/nmi.h        |   5 ++
 kernel/cpu.c               |   5 ++
 kernel/watchdog.c          | 137 ++++++++++++++++++++-------------------------
 4 files changed, 71 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..4cf06a64bc02 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -164,6 +164,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+	CPUHP_AP_WATCHDOG_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b8d868d23e79..80664bbeca43 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -33,10 +33,15 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif /* !CONFIG_SMP */
 
+extern int lockup_detector_online_cpu(unsigned int cpu);
+extern int lockup_detector_offline_cpu(unsigned int cpu);
+
 #else /* CONFIG_LOCKUP_DETECTOR */
 static inline void lockup_detector_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
 static inline void lockup_detector_cleanup(void) { }
+#define lockup_detector_online_cpu	NULL
+#define lockup_detector_offline_cpu	NULL
 #endif /* !CONFIG_LOCKUP_DETECTOR */
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0db8938fbb23..191097c45fb1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= perf_event_init_cpu,
 		.teardown.single	= perf_event_exit_cpu,
 	},
+	[CPUHP_AP_WATCHDOG_ONLINE] = {
+		.name			= "lockup_detector:online",
+		.startup.single		= lockup_detector_online_cpu,
+		.teardown.single	= lockup_detector_offline_cpu,
+	},
 	[CPUHP_AP_WORKQUEUE_ONLINE] = {
 		.name			= "workqueue:online",
 		.startup.single		= workqueue_online_cpu,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 576d18045811..b81f777838d5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -18,18 +18,14 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
-#include <linux/smpboot.h>
-#include <linux/sched/rt.h>
-#include <uapi/linux/sched/types.h>
 #include <linux/tick.h>
-#include <linux/workqueue.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/isolation.h>
+#include <linux/stop_machine.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
-#include <linux/kthread.h>
 
 static DEFINE_MUTEX(watchdog_mutex);
 
@@ -169,11 +165,10 @@ static void lockup_detector_update_enable(void)
 unsigned int __read_mostly softlockup_panic =
 			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
 
-static bool softlockup_threads_initialized __read_mostly;
+static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
@@ -335,6 +330,25 @@ static void watchdog_interrupt_count(void)
 	__this_cpu_inc(hrtimer_interrupts);
 }
 
+/*
+ * The watchdog thread function - touches the timestamp.
+ *
+ * It only runs once every sample_period seconds (4 seconds by
+ * default) to reset the softlockup timestamp. If this gets delayed
+ * for more than 2*watchdog_thresh seconds then the debug-printout
+ * triggers in watchdog_timer_fn().
+ */
+static int softlockup_fn(void *data)
+{
+	__this_cpu_write(soft_lockup_hrtimer_cnt,
+			 __this_cpu_read(hrtimer_interrupts));
+	__touch_watchdog();
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -350,7 +364,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	watchdog_interrupt_count();
 
 	/* kick the softlockup detector */
-	wake_up_process(__this_cpu_read(softlockup_watchdog));
+	stop_one_cpu_nowait(smp_processor_id(),
+			softlockup_fn, NULL,
+			this_cpu_ptr(&softlockup_stop_work));
 
 	/* .. and repeat */
 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
@@ -448,17 +464,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
-static void watchdog_set_prio(unsigned int policy, unsigned int prio)
-{
-	struct sched_param param = { .sched_priority = prio };
-
-	sched_setscheduler(current, policy, &param);
-}
-
 static void watchdog_enable(unsigned int cpu)
 {
 	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
 	/*
 	 * Start the timer first to prevent the NMI watchdog triggering
 	 * before the timer has a chance to fire.
@@ -473,15 +484,14 @@ static void watchdog_enable(unsigned int cpu)
 	/* Enable the perf event */
 	if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
 		watchdog_nmi_enable(cpu);
-
-	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
 }
 
 static void watchdog_disable(unsigned int cpu)
 {
 	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 
-	watchdog_set_prio(SCHED_NORMAL, 0);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
 	/*
 	 * Disable the perf event first. That prevents that a large delay
 	 * between disabling the timer and disabling the perf event causes
@@ -491,77 +501,63 @@ static void watchdog_disable(unsigned int cpu)
 	hrtimer_cancel(hrtimer);
 }
 
-static void watchdog_cleanup(unsigned int cpu, bool online)
+static int softlockup_stop_fn(void *data)
 {
-	watchdog_disable(cpu);
+	watchdog_disable(smp_processor_id());
+	return 0;
 }
 
-static int watchdog_should_run(unsigned int cpu)
+static void softlockup_stop_all(void)
 {
-	return __this_cpu_read(hrtimer_interrupts) !=
-		__this_cpu_read(soft_lockup_hrtimer_cnt);
+	int cpu;
+
+	if (!softlockup_initialized)
+		return;
+
+	for_each_cpu(cpu, &watchdog_allowed_mask)
+		smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
+
+	cpumask_clear(&watchdog_allowed_mask);
 }
 
-/*
- * The watchdog thread function - touches the timestamp.
- *
- * It only runs once every sample_period seconds (4 seconds by
- * default) to reset the softlockup timestamp. If this gets delayed
- * for more than 2*watchdog_thresh seconds then the debug-printout
- * triggers in watchdog_timer_fn().
- */
-static void watchdog(unsigned int cpu)
+static int softlockup_start_fn(void *data)
 {
-	__this_cpu_write(soft_lockup_hrtimer_cnt,
-			 __this_cpu_read(hrtimer_interrupts));
-	__touch_watchdog();
+	watchdog_enable(smp_processor_id());
+	return 0;
 }
 
-static struct smp_hotplug_thread watchdog_threads = {
-	.store			= &softlockup_watchdog,
-	.thread_should_run	= watchdog_should_run,
-	.thread_fn		= watchdog,
-	.thread_comm		= "watchdog/%u",
-	.setup			= watchdog_enable,
-	.cleanup		= watchdog_cleanup,
-	.park			= watchdog_disable,
-	.unpark			= watchdog_enable,
-};
-
-static void softlockup_update_smpboot_threads(void)
+static void softlockup_start_all(void)
 {
-	lockdep_assert_held(&watchdog_mutex);
-
-	if (!softlockup_threads_initialized)
-		return;
+	int cpu;
 
-	smpboot_update_cpumask_percpu_thread(&watchdog_threads,
-					     &watchdog_allowed_mask);
+	cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
+	for_each_cpu(cpu, &watchdog_allowed_mask)
+		smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
 }
 
-/* Temporarily park all watchdog threads */
-static void softlockup_park_all_threads(void)
+int lockup_detector_online_cpu(unsigned int cpu)
 {
-	cpumask_clear(&watchdog_allowed_mask);
-	softlockup_update_smpboot_threads();
+	watchdog_enable(cpu);
+	return 0;
 }
 
-/* Unpark enabled threads */
-static void softlockup_unpark_threads(void)
+int lockup_detector_offline_cpu(unsigned int cpu)
 {
-	cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
-	softlockup_update_smpboot_threads();
+	watchdog_disable(cpu);
+	return 0;
 }
 
 static void lockup_detector_reconfigure(void)
 {
 	cpus_read_lock();
 	watchdog_nmi_stop();
-	softlockup_park_all_threads();
+
+	softlockup_stop_all();
 	set_sample_period();
 	lockup_detector_update_enable();
 	if (watchdog_enabled && watchdog_thresh)
-		softlockup_unpark_threads();
+		softlockup_start_all();
+
 	watchdog_nmi_start();
 	cpus_read_unlock();
 	/*
@@ -580,8 +576,6 @@ static void lockup_detector_reconfigure(void)
  */
 static __init void lockup_detector_setup(void)
 {
-	int ret;
-
 	/*
 	 * If sysctl is off and watchdog got disabled on the command line,
 	 * nothing to do here.
@@ -592,24 +586,13 @@ static __init void lockup_detector_setup(void)
 	    !(watchdog_enabled && watchdog_thresh))
 		return;
 
-	ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
-						     &watchdog_allowed_mask);
-	if (ret) {
-		pr_err("Failed to initialize soft lockup detector threads\n");
-		return;
-	}
-
 	mutex_lock(&watchdog_mutex);
-	softlockup_threads_initialized = true;
 	lockup_detector_reconfigure();
+	softlockup_initialized = true;
 	mutex_unlock(&watchdog_mutex);
 }
 
 #else /* CONFIG_SOFTLOCKUP_DETECTOR */
-static inline int watchdog_park_threads(void) { return 0; }
-static inline void watchdog_unpark_threads(void) { }
-static inline int watchdog_enable_all_cpus(void) { return 0; }
-static inline void watchdog_disable_all_cpus(void) { }
 static void lockup_detector_reconfigure(void)
 {
 	cpus_read_lock();
-- 
cgit v1.2.3


From 167a88677b05d6a810f23b871cfb2b5db1808e60 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jun 2018 10:53:01 +0200
Subject: smpboot: Remove cpumask from the API

Now that the sole use of the whole smpboot_*cpumask() API is gone,
remove it.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/smpboot.h | 15 +-------------
 kernel/smpboot.c        | 54 +++++--------------------------------------------
 2 files changed, 6 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index c174844cf663..d0884b525001 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -25,8 +25,6 @@ struct smpboot_thread_data;
  *			parked (cpu offline)
  * @unpark:		Optional unpark function, called when the thread is
  *			unparked (cpu online)
- * @cpumask:		Internal state.  To update which threads are unparked,
- *			call smpboot_update_cpumask_percpu_thread().
  * @selfparking:	Thread is not parked by the park function.
  * @thread_comm:	The base name of the thread
  */
@@ -40,23 +38,12 @@ struct smp_hotplug_thread {
 	void				(*cleanup)(unsigned int cpu, bool online);
 	void				(*park)(unsigned int cpu);
 	void				(*unpark)(unsigned int cpu);
-	cpumask_var_t			cpumask;
 	bool				selfparking;
 	const char			*thread_comm;
 };
 
-int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
-					   const struct cpumask *cpumask);
-
-static inline int
-smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
-{
-	return smpboot_register_percpu_thread_cpumask(plug_thread,
-						      cpu_possible_mask);
-}
+int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
 
 void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
-void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
-					  const struct cpumask *);
 
 #endif
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 5043e7433f4b..c230c2dd48e1 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -238,8 +238,7 @@ int smpboot_unpark_threads(unsigned int cpu)
 
 	mutex_lock(&smpboot_threads_lock);
 	list_for_each_entry(cur, &hotplug_threads, list)
-		if (cpumask_test_cpu(cpu, cur->cpumask))
-			smpboot_unpark_thread(cur, cpu);
+		smpboot_unpark_thread(cur, cpu);
 	mutex_unlock(&smpboot_threads_lock);
 	return 0;
 }
@@ -280,34 +279,26 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
 }
 
 /**
- * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related
+ * smpboot_register_percpu_thread - Register a per_cpu thread related
  * 					    to hotplug
  * @plug_thread:	Hotplug thread descriptor
- * @cpumask:		The cpumask where threads run
  *
  * Creates and starts the threads on all online cpus.
  */
-int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
-					   const struct cpumask *cpumask)
+int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
 {
 	unsigned int cpu;
 	int ret = 0;
 
-	if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))
-		return -ENOMEM;
-	cpumask_copy(plug_thread->cpumask, cpumask);
-
 	get_online_cpus();
 	mutex_lock(&smpboot_threads_lock);
 	for_each_online_cpu(cpu) {
 		ret = __smpboot_create_thread(plug_thread, cpu);
 		if (ret) {
 			smpboot_destroy_threads(plug_thread);
-			free_cpumask_var(plug_thread->cpumask);
 			goto out;
 		}
-		if (cpumask_test_cpu(cpu, cpumask))
-			smpboot_unpark_thread(plug_thread, cpu);
+		smpboot_unpark_thread(plug_thread, cpu);
 	}
 	list_add(&plug_thread->list, &hotplug_threads);
 out:
@@ -315,7 +306,7 @@ out:
 	put_online_cpus();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread_cpumask);
+EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
 
 /**
  * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug
@@ -331,44 +322,9 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
 	smpboot_destroy_threads(plug_thread);
 	mutex_unlock(&smpboot_threads_lock);
 	put_online_cpus();
-	free_cpumask_var(plug_thread->cpumask);
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
 
-/**
- * smpboot_update_cpumask_percpu_thread - Adjust which per_cpu hotplug threads stay parked
- * @plug_thread:	Hotplug thread descriptor
- * @new:		Revised mask to use
- *
- * The cpumask field in the smp_hotplug_thread must not be updated directly
- * by the client, but only by calling this function.
- * This function can only be called on a registered smp_hotplug_thread.
- */
-void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
-					  const struct cpumask *new)
-{
-	struct cpumask *old = plug_thread->cpumask;
-	static struct cpumask tmp;
-	unsigned int cpu;
-
-	lockdep_assert_cpus_held();
-	mutex_lock(&smpboot_threads_lock);
-
-	/* Park threads that were exclusively enabled on the old mask. */
-	cpumask_andnot(&tmp, old, new);
-	for_each_cpu_and(cpu, &tmp, cpu_online_mask)
-		smpboot_park_thread(plug_thread, cpu);
-
-	/* Unpark threads that are exclusively enabled on the new mask. */
-	cpumask_andnot(&tmp, new, old);
-	for_each_cpu_and(cpu, &tmp, cpu_online_mask)
-		smpboot_unpark_thread(plug_thread, cpu);
-
-	cpumask_copy(old, new);
-
-	mutex_unlock(&smpboot_threads_lock);
-}
-
 static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
 
 /*
-- 
cgit v1.2.3


From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline
 declarations

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index fd282c7d3e5e..573f5a7d42d4 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -65,6 +65,18 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
+/*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
 /*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
@@ -72,19 +84,22 @@
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 
-- 
cgit v1.2.3


From 71b38245acb05a38d2d861792bdd99cd9f6a0f78 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Tue, 5 Jun 2018 13:37:51 -0700
Subject: Drivers: hv: vmbus: Add comments on ring buffer signaling

Add comments describing intricacies of Hyper-V ring buffer
signaling code.  This information is not in Hyper-V public
documents, so include here to capture the knowledge for
future coders.

There are no code changes in this commit.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/ring_buffer.c | 65 ++++++++++++++++++++++++++++++++++++++++--------
 include/linux/hyperv.h   | 31 +++++++++++++++++------
 2 files changed, 77 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index be3c8b10b84a..3e90eb91db45 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -431,7 +431,24 @@ static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi,
 }
 
 /*
- * Update host ring buffer after iterating over packets.
+ * Update host ring buffer after iterating over packets. If the host has
+ * stopped queuing new entries because it found the ring buffer full, and
+ * sufficient space is being freed up, signal the host. But be careful to
+ * only signal the host when necessary, both for performance reasons and
+ * because Hyper-V protects itself by throttling guests that signal
+ * inappropriately.
+ *
+ * Determining when to signal is tricky. There are three key data inputs
+ * that must be handled in this order to avoid race conditions:
+ *
+ * 1. Update the read_index
+ * 2. Read the pending_send_sz
+ * 3. Read the current write_index
+ *
+ * The interrupt_mask is not used to determine when to signal. The
+ * interrupt_mask is used only on the guest->host ring buffer when
+ * sending requests to the host. The host does not use it on the host->
+ * guest ring buffer to indicate whether it should be signaled.
  */
 void hv_pkt_iter_close(struct vmbus_channel *channel)
 {
@@ -447,22 +464,30 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
 	start_read_index = rbi->ring_buffer->read_index;
 	rbi->ring_buffer->read_index = rbi->priv_read_index;
 
+	/*
+	 * Older versions of Hyper-V (before WS2102 and Win8) do not
+	 * implement pending_send_sz and simply poll if the host->guest
+	 * ring buffer is full.  No signaling is needed or expected.
+	 */
 	if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz)
 		return;
 
 	/*
 	 * Issue a full memory barrier before making the signaling decision.
-	 * Here is the reason for having this barrier:
-	 * If the reading of the pend_sz (in this function)
-	 * were to be reordered and read before we commit the new read
-	 * index (in the calling function)  we could
-	 * have a problem. If the host were to set the pending_sz after we
-	 * have sampled pending_sz and go to sleep before we commit the
+	 * If reading pending_send_sz were to be reordered and happen
+	 * before we commit the new read_index, a race could occur.  If the
+	 * host were to set the pending_send_sz after we have sampled
+	 * pending_send_sz, and the ring buffer blocks before we commit the
 	 * read index, we could miss sending the interrupt. Issue a full
 	 * memory barrier to address this.
 	 */
 	virt_mb();
 
+	/*
+	 * If the pending_send_sz is zero, then the ring buffer is not
+	 * blocked and there is no need to signal.  This is far by the
+	 * most common case, so exit quickly for best performance.
+	 */
 	pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
 	if (!pending_sz)
 		return;
@@ -476,14 +501,32 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
 	bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index);
 
 	/*
-	 * If there was space before we began iteration,
-	 * then host was not blocked.
+	 * We want to signal the host only if we're transitioning
+	 * from a "not enough free space" state to a "enough free
+	 * space" state.  For example, it's possible that this function
+	 * could run and free up enough space to signal the host, and then
+	 * run again and free up additional space before the host has a
+	 * chance to clear the pending_send_sz.  The 2nd invocation would
+	 * be a null transition from "enough free space" to "enough free
+	 * space", which doesn't warrant a signal.
+	 *
+	 * Exactly filling the ring buffer is treated as "not enough
+	 * space". The ring buffer always must have at least one byte
+	 * empty so the empty and full conditions are distinguishable.
+	 * hv_get_bytes_to_write() doesn't fully tell the truth in
+	 * this regard.
+	 *
+	 * So first check if we were in the "enough free space" state
+	 * before we began the iteration. If so, the host was not
+	 * blocked, and there's no need to signal.
 	 */
-
 	if (curr_write_sz - bytes_read > pending_sz)
 		return;
 
-	/* If pending write will not fit, don't give false hope. */
+	/*
+	 * Similarly, if the new state is "not enough space", then
+	 * there's no need to signal.
+	 */
 	if (curr_write_sz <= pending_sz)
 		return;
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3a3012f57be4..2330f08062c7 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -89,18 +89,33 @@ struct hv_ring_buffer {
 	u32 interrupt_mask;
 
 	/*
-	 * Win8 uses some of the reserved bits to implement
-	 * interrupt driven flow management. On the send side
-	 * we can request that the receiver interrupt the sender
-	 * when the ring transitions from being full to being able
-	 * to handle a message of size "pending_send_sz".
+	 * WS2012/Win8 and later versions of Hyper-V implement interrupt
+	 * driven flow management. The feature bit feat_pending_send_sz
+	 * is set by the host on the host->guest ring buffer, and by the
+	 * guest on the guest->host ring buffer.
 	 *
-	 * Add necessary state for this enhancement.
+	 * The meaning of the feature bit is a bit complex in that it has
+	 * semantics that apply to both ring buffers.  If the guest sets
+	 * the feature bit in the guest->host ring buffer, the guest is
+	 * telling the host that:
+	 * 1) It will set the pending_send_sz field in the guest->host ring
+	 *    buffer when it is waiting for space to become available, and
+	 * 2) It will read the pending_send_sz field in the host->guest
+	 *    ring buffer and interrupt the host when it frees enough space
+	 *
+	 * Similarly, if the host sets the feature bit in the host->guest
+	 * ring buffer, the host is telling the guest that:
+	 * 1) It will set the pending_send_sz field in the host->guest ring
+	 *    buffer when it is waiting for space to become available, and
+	 * 2) It will read the pending_send_sz field in the guest->host
+	 *    ring buffer and interrupt the guest when it frees enough space
+	 *
+	 * If either the guest or host does not set the feature bit that it
+	 * owns, that guest or host must do polling if it encounters a full
+	 * ring buffer, and not signal the other end with an interrupt.
 	 */
 	u32 pending_send_sz;
-
 	u32 reserved1[12];
-
 	union {
 		struct {
 			u32 feat_pending_send_sz:1;
-- 
cgit v1.2.3


From 63453b59e41173241c4efe9335815f6432fa8586 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 10:51:53 +0200
Subject: i2c: smbus: add unlocked __i2c_smbus_xfer variant

Removes all locking from i2c_smbus_xfer and renames it to __i2c_smbus_xfer,
then adds a new i2c_smbus_xfer function that simply grabs the lock while
calling the unlocked variant.

This is not perfectly equivalent, since i2c_smbus_xfer was callable from
atomic/irq context if you happened to end up emulating SMBus with an I2C
transfer, and that is no longer the case with this patch. It is unknown
(to me) if anything depends on that quirk, but it seems fragile enough to
simply break those cases and require them to call i2c_transfer directly
instead.

While at it, for consistency rename the 2nd to last argument (size) of
the i2c_smbus_xfer declaration to protocol and remove the surplus extern
marker.

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-smbus.c | 28 ++++++++++++++++++++--------
 include/linux/i2c.h          | 11 ++++++++---
 2 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 51970bae3c4a..9cd66cabb84f 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -463,7 +463,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
 			msg[num-1].len++;
 	}
 
-	status = i2c_transfer(adapter, msg, num);
+	status = __i2c_transfer(adapter, msg, num);
 	if (status < 0)
 		goto cleanup;
 	if (status != num) {
@@ -524,9 +524,24 @@ cleanup:
  * This executes an SMBus protocol operation, and returns a negative
  * errno code else zero on success.
  */
-s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
-		   char read_write, u8 command, int protocol,
-		   union i2c_smbus_data *data)
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write,
+		   u8 command, int protocol, union i2c_smbus_data *data)
+{
+	s32 res;
+
+	i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
+	res = __i2c_smbus_xfer(adapter, addr, flags, read_write,
+			       command, protocol, data);
+	i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
+
+	return res;
+}
+EXPORT_SYMBOL(i2c_smbus_xfer);
+
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write,
+		     u8 command, int protocol, union i2c_smbus_data *data)
 {
 	unsigned long orig_jiffies;
 	int try;
@@ -543,8 +558,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
 
 	if (adapter->algo->smbus_xfer) {
-		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
-
 		/* Retry automatically on arbitration loss */
 		orig_jiffies = jiffies;
 		for (res = 0, try = 0; try <= adapter->retries; try++) {
@@ -557,7 +570,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 				       orig_jiffies + adapter->timeout))
 				break;
 		}
-		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
 
 		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
 			goto trace;
@@ -579,7 +591,7 @@ trace:
 
 	return res;
 }
-EXPORT_SYMBOL(i2c_smbus_xfer);
+EXPORT_SYMBOL(__i2c_smbus_xfer);
 
 /**
  * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..465afb092fa7 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -140,9 +140,14 @@ extern int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
    and probably just as fast.
    Note that we use i2c_adapter here, because you do not need a specific
    smbus adapter to call this function. */
-extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
-			  unsigned short flags, char read_write, u8 command,
-			  int size, union i2c_smbus_data *data);
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write, u8 command,
+		   int protocol, union i2c_smbus_data *data);
+
+/* Unlocked flavor */
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write, u8 command,
+		     int protocol, union i2c_smbus_data *data);
 
 /* Now follow the 'nice' access routines. These also document the calling
    conventions of i2c_smbus_xfer. */
-- 
cgit v1.2.3


From 5ccba64a560fa6ca06008d4001f5d46ebeb34b41 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Fri, 2 Feb 2018 10:14:49 +0800
Subject: ftrace: Nuke clear_ftrace_function

clear_ftrace_function is not used outside of ftrace.c and is not help to
use a function, so nuke it per Steve's suggestion.

Link: http://lkml.kernel.org/r/1517537689-34947-1-git-send-email-xieyisheng1@huawei.com

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 --
 kernel/trace/ftrace.c  | 13 +------------
 2 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8154f4920fcb..ebb77674be90 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -223,7 +223,6 @@ extern enum ftrace_tracing_type_t ftrace_tracing_type;
  */
 int register_ftrace_function(struct ftrace_ops *ops);
 int unregister_ftrace_function(struct ftrace_ops *ops);
-void clear_ftrace_function(void);
 
 extern void ftrace_stub(unsigned long a0, unsigned long a1,
 			struct ftrace_ops *op, struct pt_regs *regs);
@@ -239,7 +238,6 @@ static inline int ftrace_nr_registered_ops(void)
 {
 	return 0;
 }
-static inline void clear_ftrace_function(void) { }
 static inline void ftrace_kill(void) { }
 static inline void ftrace_free_init_mem(void) { }
 static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index efed9c1cfb7e..caf9cbf35816 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -192,17 +192,6 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 	op->saved_func(ip, parent_ip, op, regs);
 }
 
-/**
- * clear_ftrace_function - reset the ftrace function
- *
- * This NULLs the ftrace function and in essence stops
- * tracing.  There may be lag
- */
-void clear_ftrace_function(void)
-{
-	ftrace_trace_function = ftrace_stub;
-}
-
 static void ftrace_sync(struct work_struct *work)
 {
 	/*
@@ -6689,7 +6678,7 @@ void ftrace_kill(void)
 {
 	ftrace_disabled = 1;
 	ftrace_enabled = 0;
-	clear_ftrace_function();
+	ftrace_trace_function = ftrace_stub;
 }
 
 /**
-- 
cgit v1.2.3


From 077772468ec141b22e1e7c0c58bc09e2f9dc8762 Mon Sep 17 00:00:00 2001
From: Wang Dongsheng <dongsheng.wang@hxt-semitech.com>
Date: Sun, 1 Jul 2018 23:15:46 -0700
Subject: net: phy: marvell: change default m88e1510 LED configuration

The m88e1121 LED default configuration does not apply m88e151x.
So add a function to relpace m88e1121 LED configuration.

Signed-off-by: Wang Dongsheng <dongsheng.wang@hxt-semitech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 54 ++++++++++++++++++++++++++++++---------------
 include/linux/marvell_phy.h |  2 ++
 2 files changed, 38 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index b8f57e9b9379..1cd439bdf608 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -130,8 +130,9 @@
 #define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS		BIT(12)
 #define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE	BIT(14)
 
-#define MII_88E1121_PHY_LED_CTRL	16
+#define MII_PHY_LED_CTRL	        16
 #define MII_88E1121_PHY_LED_DEF		0x0030
+#define MII_88E1510_PHY_LED_DEF		0x1177
 
 #define MII_M1011_PHY_STATUS		0x11
 #define MII_M1011_PHY_STATUS_1000	0x8000
@@ -632,8 +633,40 @@ error:
 	return err;
 }
 
+static void marvell_config_led(struct phy_device *phydev)
+{
+	u16 def_config;
+	int err;
+
+	switch (MARVELL_PHY_FAMILY_ID(phydev->phy_id)) {
+	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
+	case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1121R):
+	case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1318S):
+		def_config = MII_88E1121_PHY_LED_DEF;
+		break;
+	/* Default PHY LED config:
+	 * LED[0] .. 1000Mbps Link
+	 * LED[1] .. 100Mbps Link
+	 * LED[2] .. Blink, Activity
+	 */
+	case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1510):
+		def_config = MII_88E1510_PHY_LED_DEF;
+		break;
+	default:
+		return;
+	}
+
+	err = phy_write_paged(phydev, MII_MARVELL_LED_PAGE, MII_PHY_LED_CTRL,
+			      def_config);
+	if (err < 0)
+		pr_warn("Fail to config marvell phy LED.\n");
+}
+
 static int marvell_config_init(struct phy_device *phydev)
 {
+	/* Set defalut LED */
+	marvell_config_led(phydev);
+
 	/* Set registers from marvell,reg-init DT property */
 	return marvell_of_reg_init(phydev);
 }
@@ -813,21 +846,6 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	return genphy_soft_reset(phydev);
 }
 
-static int m88e1121_config_init(struct phy_device *phydev)
-{
-	int err;
-
-	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
-	err = phy_write_paged(phydev, MII_MARVELL_LED_PAGE,
-			      MII_88E1121_PHY_LED_CTRL,
-			      MII_88E1121_PHY_LED_DEF);
-	if (err < 0)
-		return err;
-
-	/* Set marvell,reg-init configuration from device tree */
-	return marvell_config_init(phydev);
-}
-
 static int m88e1318_config_init(struct phy_device *phydev)
 {
 	if (phy_interrupt_is_valid(phydev)) {
@@ -841,7 +859,7 @@ static int m88e1318_config_init(struct phy_device *phydev)
 			return err;
 	}
 
-	return m88e1121_config_init(phydev);
+	return marvell_config_init(phydev);
 }
 
 static int m88e1510_config_init(struct phy_device *phydev)
@@ -2087,7 +2105,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = &m88e1121_probe,
-		.config_init = &m88e1121_config_init,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 4f5f8c21e283..1eb6f244588d 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -27,6 +27,8 @@
  */
 #define MARVELL_PHY_ID_88E6390		0x01410f90
 
+#define MARVELL_PHY_FAMILY_ID(id)	((id) >> 4)
+
 /* struct phy_device dev_flags definitions */
 #define MARVELL_PHY_M1145_FLAGS_RESISTANCE	0x00000001
 #define MARVELL_PHY_M1118_DNS323_LEDS		0x00000002
-- 
cgit v1.2.3


From 8a9c58d28d0f66569737a3295116710ed24573cd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 2 Jul 2018 18:21:12 +0800
Subject: sctp: add support for dscp and flowlabel per transport

Like some other per transport params, flowlabel and dscp are added
in transport, asoc and sctp_sock. By default, transport sets its
value from asoc's, and asoc does it from sctp_sock. flowlabel
only works for ipv6 transport.

Other than that they need to be passed down in sctp_xmit, flow4/6
also needs to set them before looking up route in get_dst.

Note that it uses '& 0x100000' to check if flowlabel is set and
'& 0x1' (tos 1st bit is unused) to check if dscp is set by users,
so that they could be set to 0 by sockopt in next patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       |  7 +++++++
 include/net/sctp/structs.h |  9 +++++++++
 net/sctp/associola.c       |  7 +++++++
 net/sctp/ipv6.c            | 11 +++++++++--
 net/sctp/protocol.c        | 16 ++++++++++++----
 5 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index b36c76635f18..83d94341e003 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -801,4 +801,11 @@ struct sctp_strreset_resptsn {
 	__be32 receivers_next_tsn;
 };
 
+enum {
+	SCTP_DSCP_SET_MASK = 0x1,
+	SCTP_DSCP_VAL_MASK = 0xfc,
+	SCTP_FLOWLABEL_SET_MASK = 0x100000,
+	SCTP_FLOWLABEL_VAL_MASK = 0xfffff
+};
+
 #endif /* __LINUX_SCTP_H__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 701a51736fa5..ab869e0d8326 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -193,6 +193,9 @@ struct sctp_sock {
 	/* This is the max_retrans value for new associations. */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* The initial Path MTU to use for new associations. */
 	__u32 pathmtu;
 
@@ -895,6 +898,9 @@ struct sctp_transport {
 	 */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* This is the partially failed retrans value for the transport
 	 * and will be initialized from the assocs value.  This can be changed
 	 * using the SCTP_PEER_ADDR_THLDS socket option
@@ -1772,6 +1778,9 @@ struct sctp_association {
 	 */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* Flag that path mtu update is pending */
 	__u8   pmtu_pending;
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5d5a16204d50..16ecfbc95614 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -115,6 +115,9 @@ static struct sctp_association *sctp_association_init(
 	/* Initialize path max retrans value. */
 	asoc->pathmaxrxt = sp->pathmaxrxt;
 
+	asoc->flowlabel = sp->flowlabel;
+	asoc->dscp = sp->dscp;
+
 	/* Initialize default path MTU. */
 	asoc->pathmtu = sp->pathmtu;
 
@@ -647,6 +650,10 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	peer->sackdelay = asoc->sackdelay;
 	peer->sackfreq = asoc->sackfreq;
 
+	if (addr->sa.sa_family == AF_INET6)
+		peer->flowlabel = asoc->flowlabel;
+	peer->dscp = asoc->dscp;
+
 	/* Enable/disable heartbeat, SACK delay, and path MTU discovery
 	 * based on association setting.
 	 */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0cd2e764f47f..38102bf7f13e 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,12 +209,17 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	struct sock *sk = skb->sk;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi6 *fl6 = &transport->fl.u.ip6;
+	__u8 tclass = np->tclass;
 	int res;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
 		 skb->len, &fl6->saddr, &fl6->daddr);
 
-	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+	if (transport->dscp & SCTP_DSCP_SET_MASK)
+		tclass = transport->dscp & SCTP_DSCP_VAL_MASK;
+
+	if (INET_ECN_is_capable(tclass))
+		IP6_ECN_flow_xmit(sk, fl6->flowlabel);
 
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
 		skb->ignore_df = 1;
@@ -223,7 +228,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
 	rcu_read_lock();
 	res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
-		       np->tclass);
+		       tclass);
 	rcu_read_unlock();
 	return res;
 }
@@ -254,6 +259,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl6->flowi6_oif = daddr->v6.sin6_scope_id;
 	else if (asoc)
 		fl6->flowi6_oif = asoc->base.sk->sk_bound_dev_if;
+	if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
+		fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
 
 	pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr);
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 67f73d3a1356..e948db29ab53 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -426,13 +426,16 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	struct dst_entry *dst = NULL;
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
+	__u8 tos = inet_sk(sk)->tos;
 
+	if (t->dscp & SCTP_DSCP_SET_MASK)
+		tos = t->dscp & SCTP_DSCP_VAL_MASK;
 	memset(fl4, 0x0, sizeof(struct flowi4));
 	fl4->daddr  = daddr->v4.sin_addr.s_addr;
 	fl4->fl4_dport = daddr->v4.sin_port;
 	fl4->flowi4_proto = IPPROTO_SCTP;
 	if (asoc) {
-		fl4->flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
+		fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
 		fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
 		fl4->fl4_sport = htons(asoc->base.bind_addr.port);
 	}
@@ -495,7 +498,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl4->fl4_sport = laddr->a.v4.sin_port;
 		flowi4_update_output(fl4,
 				     asoc->base.sk->sk_bound_dev_if,
-				     RT_CONN_FLAGS(asoc->base.sk),
+				     RT_CONN_FLAGS_TOS(asoc->base.sk, tos),
 				     daddr->v4.sin_addr.s_addr,
 				     laddr->a.v4.sin_addr.s_addr);
 
@@ -971,16 +974,21 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 			       struct sctp_transport *transport)
 {
 	struct inet_sock *inet = inet_sk(skb->sk);
+	__u8 dscp = inet->tos;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,
-		 skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr);
+		 skb->len, &transport->fl.u.ip4.saddr,
+		 &transport->fl.u.ip4.daddr);
+
+	if (transport->dscp & SCTP_DSCP_SET_MASK)
+		dscp = transport->dscp & SCTP_DSCP_VAL_MASK;
 
 	inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
 			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
 
 	SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
 
-	return ip_queue_xmit(&inet->sk, skb, &transport->fl);
+	return __ip_queue_xmit(&inet->sk, skb, &transport->fl, dscp);
 }
 
 static struct sctp_af sctp_af_inet;
-- 
cgit v1.2.3


From f6ad8c1bcdf014272d08c55b9469536952a0a771 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:12:45 +0100
Subject: net: core: trivial netif_receive_skb_list() entry point

Just calls netif_receive_skb() in a loop.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 64480a0f2c16..f67258f057ca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3477,6 +3477,7 @@ int netif_rx(struct sk_buff *skb);
 int netif_rx_ni(struct sk_buff *skb);
 int netif_receive_skb(struct sk_buff *skb);
 int netif_receive_skb_core(struct sk_buff *skb);
+void netif_receive_skb_list(struct list_head *head);
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
 void napi_gro_flush(struct napi_struct *napi, bool flush_old);
 struct sk_buff *napi_get_frags(struct napi_struct *napi);
diff --git a/net/core/dev.c b/net/core/dev.c
index 08d58e0debe5..85c456a4b551 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4906,6 +4906,25 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
+/**
+ *	netif_receive_skb_list - process many receive buffers from network
+ *	@head: list of skbs to process.
+ *
+ *	For now, just calls netif_receive_skb() in a loop, ignoring the
+ *	return value.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ */
+void netif_receive_skb_list(struct list_head *head)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list)
+		netif_receive_skb(skb);
+}
+EXPORT_SYMBOL(netif_receive_skb_list);
+
 DEFINE_PER_CPU(struct work_struct, flush_works);
 
 /* Network device is going away, flush any packets still pending */
-- 
cgit v1.2.3


From 4ce0017a373afaaa9ef17614d8fa4f6fde261d18 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:13:40 +0100
Subject: net: core: another layer of lists, around PF_MEMALLOC skb handling

First example of a layer splitting the list (rather than merely taking
 individual packets off it).
Involves new list.h function, list_cut_before(), like list_cut_position()
 but cuts on the other side of the given entry.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list.h | 30 ++++++++++++++++++++++++++++++
 net/core/dev.c       | 44 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 66 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 4b129df4d46b..de04cc5ed536 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -285,6 +285,36 @@ static inline void list_cut_position(struct list_head *list,
 		__list_cut_position(list, head, entry);
 }
 
+/**
+ * list_cut_before - cut a list into two, before given entry
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *
+ * This helper moves the initial part of @head, up to but
+ * excluding @entry, from @head to @list.  You should pass
+ * in @entry an element you know is on @head.  @list should
+ * be an empty list or a list you do not care about losing
+ * its data.
+ * If @entry == @head, all entries on @head are moved to
+ * @list.
+ */
+static inline void list_cut_before(struct list_head *list,
+				   struct list_head *head,
+				   struct list_head *entry)
+{
+	if (head->next == entry) {
+		INIT_LIST_HEAD(list);
+		return;
+	}
+	list->next = head->next;
+	list->next->prev = list;
+	list->prev = entry->prev;
+	list->prev->next = list;
+	head->next = entry;
+	entry->prev = head;
+}
+
 static inline void __list_splice(const struct list_head *list,
 				 struct list_head *prev,
 				 struct list_head *next)
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e87361df2ab..9aadef976e8c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4784,6 +4784,14 @@ int netif_receive_skb_core(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb_core);
 
+static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list)
+		__netif_receive_skb_core(skb, pfmemalloc);
+}
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	int ret;
@@ -4809,6 +4817,34 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	return ret;
 }
 
+static void __netif_receive_skb_list(struct list_head *head)
+{
+	unsigned long noreclaim_flag = 0;
+	struct sk_buff *skb, *next;
+	bool pfmemalloc = false; /* Is current sublist PF_MEMALLOC? */
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
+			struct list_head sublist;
+
+			/* Handle the previous sublist */
+			list_cut_before(&sublist, head, &skb->list);
+			__netif_receive_skb_list_core(&sublist, pfmemalloc);
+			pfmemalloc = !pfmemalloc;
+			/* See comments in __netif_receive_skb */
+			if (pfmemalloc)
+				noreclaim_flag = memalloc_noreclaim_save();
+			else
+				memalloc_noreclaim_restore(noreclaim_flag);
+		}
+	}
+	/* Handle the remaining sublist */
+	__netif_receive_skb_list_core(head, pfmemalloc);
+	/* Restore pflags */
+	if (pfmemalloc)
+		memalloc_noreclaim_restore(noreclaim_flag);
+}
+
 static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4843,14 +4879,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 	return ret;
 }
 
-static void __netif_receive_skb_list(struct list_head *head)
-{
-	struct sk_buff *skb, *next;
-
-	list_for_each_entry_safe(skb, next, head, list)
-		__netif_receive_skb(skb);
-}
-
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
-- 
cgit v1.2.3


From 17266ee939849cb095ed7dd9edbec4162172226b Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:14:12 +0100
Subject: net: ipv4: listified version of ip_rcv

Also involved adding a way to run a netfilter hook over a list of packets.
 Rather than attempting to make netfilter know about lists (which would be
 a major project in itself) we just let it call the regular okfn (in this
 case ip_rcv_finish()) for any packets it steals, and have it give us back
 a list of packets it's synchronously accepted (which normally NF_HOOK
 would automatically call okfn() on, but we want to be able to potentially
 pass the list to a listified version of okfn().)
The netfilter hooks themselves are indirect calls that still happen per-
 packet (see nf_hook_entry_hookfn()), but again, changing that can be left
 for future work.

There is potential for out-of-order receives if the netfilter hook ends up
 synchronously stealing packets, as they will be processed before any
 accepts earlier in the list.  However, it was already possible for an
 asynchronous accept to cause out-of-order receives, so presumably this is
 considered OK.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 include/linux/netfilter.h | 22 +++++++++++++++
 include/net/ip.h          |  2 ++
 net/core/dev.c            |  8 +++---
 net/ipv4/af_inet.c        |  1 +
 net/ipv4/ip_input.c       | 68 ++++++++++++++++++++++++++++++++++++++++++-----
 6 files changed, 94 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f67258f057ca..c1ef749b6f9f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2297,6 +2297,9 @@ struct packet_type {
 					 struct net_device *,
 					 struct packet_type *,
 					 struct net_device *);
+	void			(*list_func) (struct list_head *,
+					      struct packet_type *,
+					      struct net_device *);
 	bool			(*id_match)(struct packet_type *ptype,
 					    struct sock *sk);
 	void			*af_packet_priv;
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index dd2052f0efb7..5a5e0a2ab2a3 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -288,6 +288,20 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
 	return ret;
 }
 
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+	     struct list_head *head, struct net_device *in, struct net_device *out,
+	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
+		if (ret != 1)
+			list_del(&skb->list);
+	}
+}
+
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  unsigned int len);
@@ -369,6 +383,14 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	return okfn(net, sk, skb);
 }
 
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+	     struct list_head *head, struct net_device *in, struct net_device *out,
+	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+	/* nothing to do */
+}
+
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct sock *sk, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
diff --git a/include/net/ip.h b/include/net/ip.h
index 09da79d8ceea..99d1b835d2aa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -138,6 +138,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 			  struct ip_options_rcu *opt);
 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	   struct net_device *orig_dev);
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+		 struct net_device *orig_dev);
 int ip_local_deliver(struct sk_buff *skb);
 int ip_mr_input(struct sk_buff *skb);
 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1bc485bb0678..5e22719ce71d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4806,9 +4806,11 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
 		return;
 	if (list_empty(head))
 		return;
-
-	list_for_each_entry_safe(skb, next, head, list)
-		pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+	if (pt_prev->list_func != NULL)
+		pt_prev->list_func(head, pt_prev, orig_dev);
+	else
+		list_for_each_entry_safe(skb, next, head, list)
+			pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
 static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9263a2c114e0..c716be13d58c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1882,6 +1882,7 @@ fs_initcall(ipv4_offload_init);
 static struct packet_type ip_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.func = ip_rcv,
+	.list_func = ip_list_rcv,
 };
 
 static int __init inet_init(void)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 7582713dd18f..914240830bdf 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -408,10 +408,9 @@ drop_error:
 /*
  * 	Main IP Receive routine.
  */
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 {
 	const struct iphdr *iph;
-	struct net *net;
 	u32 len;
 
 	/* When the interface is in promisc. mode, drop all the crap
@@ -421,7 +420,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		goto drop;
 
 
-	net = dev_net(dev);
 	__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,9 +487,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
-		       net, NULL, skb, dev, NULL,
-		       ip_rcv_finish);
+	return skb;
 
 csum_error:
 	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
@@ -500,5 +496,63 @@ inhdr_error:
 drop:
 	kfree_skb(skb);
 out:
-	return NET_RX_DROP;
+	return NULL;
+}
+
+/*
+ * IP receive entry point
+ */
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+	   struct net_device *orig_dev)
+{
+	struct net *net = dev_net(dev);
+
+	skb = ip_rcv_core(skb, net);
+	if (skb == NULL)
+		return NET_RX_DROP;
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
+		       ip_rcv_finish);
+}
+
+static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
+			   struct net *net)
+{
+	struct sk_buff *skb, *next;
+
+	NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
+		     head, dev, NULL, ip_rcv_finish);
+	list_for_each_entry_safe(skb, next, head, list)
+		ip_rcv_finish(net, NULL, skb);
+}
+
+/* Receive a list of IP packets */
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+		 struct net_device *orig_dev)
+{
+	struct net_device *curr_dev = NULL;
+	struct net *curr_net = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *dev = skb->dev;
+		struct net *net = dev_net(dev);
+
+		skb = ip_rcv_core(skb, net);
+		if (skb == NULL)
+			continue;
+
+		if (curr_dev != dev || curr_net != net) {
+			/* dispatch old sublist */
+			list_cut_before(&sublist, head, &skb->list);
+			if (!list_empty(&sublist))
+				ip_sublist_rcv(&sublist, dev, net);
+			/* start new sublist */
+			curr_dev = dev;
+			curr_net = net;
+		}
+	}
+	/* dispatch final sublist */
+	ip_sublist_rcv(head, curr_dev, curr_net);
 }
-- 
cgit v1.2.3


From 06635894a371be69373fcb5484f559284c8d1ac3 Mon Sep 17 00:00:00 2001
From: Sameer Nanda <snanda@chromium.org>
Date: Wed, 2 May 2018 17:44:16 +0200
Subject: mfd: cros_ec: Add USBPD charger commands and struct definitions.

The USBPD charger driver gets information from the ChromeOS EC, this
patch adds the USBPD charger definitions needed by this driver.

Signed-off-by: Sameer Nanda <snanda@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 132 +++++++++++++++++++++++++++++++++--
 1 file changed, 128 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index f2edd9969b40..0d926492ac3a 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2593,14 +2593,18 @@ struct ec_params_current_limit {
 } __packed;
 
 /*
- * Set maximum external power current.
+ * Set maximum external voltage / current.
  */
-#define EC_CMD_EXT_POWER_CURRENT_LIMIT 0xa2
+#define EC_CMD_EXTERNAL_POWER_LIMIT 0x00A2
 
-struct ec_params_ext_power_current_limit {
-	uint32_t limit; /* in mA */
+/* Command v0 is used only on Spring and is obsolete + unsupported */
+struct ec_params_external_power_limit_v1 {
+	uint16_t current_lim; /* in mA, or EC_POWER_LIMIT_NONE to clear limit */
+	uint16_t voltage_lim; /* in mV, or EC_POWER_LIMIT_NONE to clear limit */
 } __packed;
 
+#define EC_POWER_LIMIT_NONE 0xffff
+
 /* Inform the EC when entering a sleep state */
 #define EC_CMD_HOST_SLEEP_EVENT 0xa9
 
@@ -2974,6 +2978,12 @@ enum usb_chg_type {
 	USB_CHG_TYPE_VBUS,
 	USB_CHG_TYPE_UNKNOWN,
 };
+enum usb_power_roles {
+	USB_PD_PORT_POWER_DISCONNECTED,
+	USB_PD_PORT_POWER_SOURCE,
+	USB_PD_PORT_POWER_SINK,
+	USB_PD_PORT_POWER_SINK_NOT_CHARGING,
+};
 
 struct usb_chg_measures {
 	uint16_t voltage_max;
@@ -2991,6 +3001,120 @@ struct ec_response_usb_pd_power_info {
 	uint32_t max_power;
 } __packed;
 
+struct ec_params_usb_pd_info_request {
+	uint8_t port;
+} __packed;
+
+/* Read USB-PD Device discovery info */
+#define EC_CMD_USB_PD_DISCOVERY 0x0113
+struct ec_params_usb_pd_discovery_entry {
+	uint16_t vid;  /* USB-IF VID */
+	uint16_t pid;  /* USB-IF PID */
+	uint8_t ptype; /* product type (hub,periph,cable,ama) */
+} __packed;
+
+/* Override default charge behavior */
+#define EC_CMD_PD_CHARGE_PORT_OVERRIDE 0x0114
+
+/* Negative port parameters have special meaning */
+enum usb_pd_override_ports {
+	OVERRIDE_DONT_CHARGE = -2,
+	OVERRIDE_OFF = -1,
+	/* [0, CONFIG_USB_PD_PORT_COUNT): Port# */
+};
+
+struct ec_params_charge_port_override {
+	int16_t override_port; /* Override port# */
+} __packed;
+
+/* Read (and delete) one entry of PD event log */
+#define EC_CMD_PD_GET_LOG_ENTRY 0x0115
+
+struct ec_response_pd_log {
+	uint32_t timestamp; /* relative timestamp in milliseconds */
+	uint8_t type;       /* event type : see PD_EVENT_xx below */
+	uint8_t size_port;  /* [7:5] port number [4:0] payload size in bytes */
+	uint16_t data;      /* type-defined data payload */
+	uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */
+} __packed;
+
+/* The timestamp is the microsecond counter shifted to get about a ms. */
+#define PD_LOG_TIMESTAMP_SHIFT 10 /* 1 LSB = 1024us */
+
+#define PD_LOG_SIZE_MASK  0x1f
+#define PD_LOG_PORT_MASK  0xe0
+#define PD_LOG_PORT_SHIFT    5
+#define PD_LOG_PORT_SIZE(port, size) (((port) << PD_LOG_PORT_SHIFT) | \
+				      ((size) & PD_LOG_SIZE_MASK))
+#define PD_LOG_PORT(size_port) ((size_port) >> PD_LOG_PORT_SHIFT)
+#define PD_LOG_SIZE(size_port) ((size_port) & PD_LOG_SIZE_MASK)
+
+/* PD event log : entry types */
+/* PD MCU events */
+#define PD_EVENT_MCU_BASE       0x00
+#define PD_EVENT_MCU_CHARGE             (PD_EVENT_MCU_BASE+0)
+#define PD_EVENT_MCU_CONNECT            (PD_EVENT_MCU_BASE+1)
+/* Reserved for custom board event */
+#define PD_EVENT_MCU_BOARD_CUSTOM       (PD_EVENT_MCU_BASE+2)
+/* PD generic accessory events */
+#define PD_EVENT_ACC_BASE       0x20
+#define PD_EVENT_ACC_RW_FAIL   (PD_EVENT_ACC_BASE+0)
+#define PD_EVENT_ACC_RW_ERASE  (PD_EVENT_ACC_BASE+1)
+/* PD power supply events */
+#define PD_EVENT_PS_BASE        0x40
+#define PD_EVENT_PS_FAULT      (PD_EVENT_PS_BASE+0)
+/* PD video dongles events */
+#define PD_EVENT_VIDEO_BASE     0x60
+#define PD_EVENT_VIDEO_DP_MODE (PD_EVENT_VIDEO_BASE+0)
+#define PD_EVENT_VIDEO_CODEC   (PD_EVENT_VIDEO_BASE+1)
+/* Returned in the "type" field, when there is no entry available */
+#define PD_EVENT_NO_ENTRY       0xff
+
+/*
+ * PD_EVENT_MCU_CHARGE event definition :
+ * the payload is "struct usb_chg_measures"
+ * the data field contains the port state flags as defined below :
+ */
+/* Port partner is a dual role device */
+#define CHARGE_FLAGS_DUAL_ROLE         BIT(15)
+/* Port is the pending override port */
+#define CHARGE_FLAGS_DELAYED_OVERRIDE  BIT(14)
+/* Port is the override port */
+#define CHARGE_FLAGS_OVERRIDE          BIT(13)
+/* Charger type */
+#define CHARGE_FLAGS_TYPE_SHIFT               3
+#define CHARGE_FLAGS_TYPE_MASK       (0xf << CHARGE_FLAGS_TYPE_SHIFT)
+/* Power delivery role */
+#define CHARGE_FLAGS_ROLE_MASK         (7 <<  0)
+
+/*
+ * PD_EVENT_PS_FAULT data field flags definition :
+ */
+#define PS_FAULT_OCP                          1
+#define PS_FAULT_FAST_OCP                     2
+#define PS_FAULT_OVP                          3
+#define PS_FAULT_DISCH                        4
+
+/*
+ * PD_EVENT_VIDEO_CODEC payload is "struct mcdp_info".
+ */
+struct mcdp_version {
+	uint8_t major;
+	uint8_t minor;
+	uint16_t build;
+} __packed;
+
+struct mcdp_info {
+	uint8_t family[2];
+	uint8_t chipid[2];
+	struct mcdp_version irom;
+	struct mcdp_version fw;
+} __packed;
+
+/* struct mcdp_info field decoding */
+#define MCDP_CHIPID(chipid) ((chipid[0] << 8) | chipid[1])
+#define MCDP_FAMILY(family) ((family[0] << 8) | family[1])
+
 /* Get info about USB-C SS muxes */
 #define EC_CMD_USB_PD_MUX_INFO 0x11a
 
-- 
cgit v1.2.3


From 7494de0454af50215bc46c93c83b88a32ca39fab Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:38 +0200
Subject: mfd: da9063: Replace regmap_add_irq_chip with devm counterpart

Use devm_regmap_add_irq_chip() instead of plain regmap_add_irq_chip(),
which removes the need for da9063_irq_exit() altogether and also
fixes a bug in da9063_device_init() where the da9063_irq_exit() was
not called in a failpath.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c       | 1 -
 drivers/mfd/da9063-irq.c        | 8 ++------
 include/linux/mfd/da9063/core.h | 1 -
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index 6c2870d4e754..2647bb371d86 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -238,7 +238,6 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 void da9063_device_exit(struct da9063 *da9063)
 {
 	mfd_remove_devices(da9063->dev);
-	da9063_irq_exit(da9063);
 }
 
 MODULE_DESCRIPTION("PMIC driver for Dialog DA9063");
diff --git a/drivers/mfd/da9063-irq.c b/drivers/mfd/da9063-irq.c
index 207bbfe55449..da6ceb41f0d1 100644
--- a/drivers/mfd/da9063-irq.c
+++ b/drivers/mfd/da9063-irq.c
@@ -170,7 +170,8 @@ int da9063_irq_init(struct da9063 *da9063)
 		return -EINVAL;
 	}
 
-	ret = regmap_add_irq_chip(da9063->regmap, da9063->chip_irq,
+	ret = devm_regmap_add_irq_chip(da9063->dev, da9063->regmap,
+			da9063->chip_irq,
 			IRQF_TRIGGER_LOW | IRQF_ONESHOT | IRQF_SHARED,
 			da9063->irq_base, &da9063_irq_chip,
 			&da9063->regmap_irq);
@@ -182,8 +183,3 @@ int da9063_irq_init(struct da9063 *da9063)
 
 	return 0;
 }
-
-void da9063_irq_exit(struct da9063 *da9063)
-{
-	regmap_del_irq_chip(da9063->chip_irq, da9063->regmap_irq);
-}
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index f3ae65db4c86..9ab7049977aa 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -95,6 +95,5 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq);
 int da9063_irq_init(struct da9063 *da9063);
 
 void da9063_device_exit(struct da9063 *da9063);
-void da9063_irq_exit(struct da9063 *da9063);
 
 #endif /* __MFD_DA9063_CORE_H__ */
-- 
cgit v1.2.3


From af8df945876c027969fafefb9ec07b79cfadb16f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:39 +0200
Subject: mfd: da9063: Replace mfd_add_devices with devm counterpart

Use devm_mfd_add_devices() instead of plain mfd_add_devices(), which
removes the need for da9063_device_exit() altogether and also for the
.remove callback in da9063-i2c.c .

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c       | 11 +++--------
 drivers/mfd/da9063-i2c.c        | 10 ----------
 include/linux/mfd/da9063/core.h |  2 --
 3 files changed, 3 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index 2647bb371d86..76258e5709f8 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -226,20 +226,15 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 
 	da9063->irq_base = regmap_irq_chip_get_base(da9063->regmap_irq);
 
-	ret = mfd_add_devices(da9063->dev, -1, da9063_devs,
-			      ARRAY_SIZE(da9063_devs), NULL, da9063->irq_base,
-			      NULL);
+	ret = devm_mfd_add_devices(da9063->dev, -1, da9063_devs,
+				   ARRAY_SIZE(da9063_devs), NULL,
+				   da9063->irq_base, NULL);
 	if (ret)
 		dev_err(da9063->dev, "Cannot add MFD cells\n");
 
 	return ret;
 }
 
-void da9063_device_exit(struct da9063 *da9063)
-{
-	mfd_remove_devices(da9063->dev);
-}
-
 MODULE_DESCRIPTION("PMIC driver for Dialog DA9063");
 MODULE_AUTHOR("Krystian Garbaciak");
 MODULE_AUTHOR("Michal Hajduk");
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index 981805a2c521..29456e807ed4 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -270,15 +270,6 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 	return da9063_device_init(da9063, i2c->irq);
 }
 
-static int da9063_i2c_remove(struct i2c_client *i2c)
-{
-	struct da9063 *da9063 = i2c_get_clientdata(i2c);
-
-	da9063_device_exit(da9063);
-
-	return 0;
-}
-
 static const struct i2c_device_id da9063_i2c_id[] = {
 	{"da9063", PMIC_DA9063},
 	{},
@@ -291,7 +282,6 @@ static struct i2c_driver da9063_i2c_driver = {
 		.of_match_table = of_match_ptr(da9063_dt_ids),
 	},
 	.probe    = da9063_i2c_probe,
-	.remove   = da9063_i2c_remove,
 	.id_table = da9063_i2c_id,
 };
 
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 9ab7049977aa..8e6684d884e0 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -94,6 +94,4 @@ struct da9063 {
 int da9063_device_init(struct da9063 *da9063, unsigned int irq);
 int da9063_irq_init(struct da9063 *da9063);
 
-void da9063_device_exit(struct da9063 *da9063);
-
 #endif /* __MFD_DA9063_CORE_H__ */
-- 
cgit v1.2.3


From c727eea92c9232169c0d375309718fb398aaec67 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:43 +0200
Subject: mfd: da9063: Replace DA9063_NUM_IRQ with ARRAY_SIZE

Replace DA9063_NUM_IRQ macro which is not used anywhere with
plain ARRAY_SIZE().

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-irq.c        | 2 +-
 include/linux/mfd/da9063/core.h | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-irq.c b/drivers/mfd/da9063-irq.c
index 044bd867f540..579947f83486 100644
--- a/drivers/mfd/da9063-irq.c
+++ b/drivers/mfd/da9063-irq.c
@@ -94,7 +94,7 @@ static const struct regmap_irq da9063_irqs[] = {
 static const struct regmap_irq_chip da9063_irq_chip = {
 	.name = "da9063-irq",
 	.irqs = da9063_irqs,
-	.num_irqs = DA9063_NUM_IRQ,
+	.num_irqs = ARRAY_SIZE(da9063_irqs),
 	.num_regs = 4,
 	.status_base = DA9063_REG_EVENT_A,
 	.mask_base = DA9063_REG_IRQ_MASK_A,
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 8e6684d884e0..260cd5834861 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -72,9 +72,6 @@ enum da9063_irqs {
 	DA9063_IRQ_GPI15,
 };
 
-#define DA9063_IRQ_BASE_OFFSET	0
-#define DA9063_NUM_IRQ		(DA9063_IRQ_GPI15 + 1 - DA9063_IRQ_BASE_OFFSET)
-
 struct da9063 {
 	/* Device */
 	struct device	*dev;
-- 
cgit v1.2.3


From df7878f9dc77a9f630893811f0401bc021df4fbf Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:44 +0200
Subject: mfd: da9063: Rename PMIC_DA9063 to PMIC_CHIP_ID_DA9063

The PMIC_DA9063 is a complete misnomer, it denotes the value of the
DA9063 chip ID register, so rename it as such. It is also the value
of chip ID register of DA9063L though, so drop the enum as all the
DA9063 "models" share the same chip ID and thus the distinction will
have to be made using DT or otherwise.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c            | 2 +-
 drivers/mfd/da9063-i2c.c             | 2 +-
 drivers/regulator/da9063-regulator.c | 2 +-
 include/linux/mfd/da9063/core.h      | 4 +---
 4 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index f57558590283..c54777cc2f12 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -192,7 +192,7 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 		dev_err(da9063->dev, "Cannot read chip model id.\n");
 		return -EIO;
 	}
-	if (model != PMIC_DA9063) {
+	if (model != PMIC_CHIP_ID_DA9063) {
 		dev_err(da9063->dev, "Invalid chip model id: 0x%02x\n", model);
 		return -ENODEV;
 	}
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index e9797153bc19..d1fe88777e3f 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -181,7 +181,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 }
 
 static const struct i2c_device_id da9063_i2c_id[] = {
-	{"da9063", PMIC_DA9063},
+	{ "da9063", PMIC_CHIP_ID_DA9063 },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, da9063_i2c_id);
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index 2df26f36c687..7f4ac0413182 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -585,7 +585,7 @@ static struct da9063_dev_model regulators_models[] = {
 	{
 		.regulator_info = da9063_regulator_info,
 		.n_regulators = ARRAY_SIZE(da9063_regulator_info),
-		.dev_model = PMIC_DA9063,
+		.dev_model = PMIC_CHIP_ID_DA9063,
 	},
 	{ }
 };
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 260cd5834861..e015b065db7e 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -29,9 +29,7 @@
 #define DA9063_DRVNAME_RTC		"da9063-rtc"
 #define DA9063_DRVNAME_VIBRATION	"da9063-vibration"
 
-enum da9063_models {
-	PMIC_DA9063 = 0x61,
-};
+#define PMIC_CHIP_ID_DA9063		0x61
 
 enum da9063_variant_codes {
 	PMIC_DA9063_AD = 0x3,
-- 
cgit v1.2.3


From 492510dd7d39794e809d8218a2839e857c101ce5 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:45 +0200
Subject: mfd: da9063: Replace model with type

The model number stored in the struct da9063 is the same for all
variants of the da9063 since it is the chip ID, which is always
the same. Replace that with a separate identifier instead, which
allows us to discern the DA9063 variants by setting the type
based on either DT match or otherwise.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c            | 1 -
 drivers/mfd/da9063-i2c.c             | 5 +++--
 drivers/regulator/da9063-regulator.c | 8 ++++----
 include/linux/mfd/da9063/core.h      | 6 +++++-
 4 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index c54777cc2f12..ded59990f18c 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -215,7 +215,6 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 		return -ENODEV;
 	}
 
-	da9063->model = model;
 	da9063->variant_code = variant_code;
 
 	ret = da9063_irq_init(da9063);
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index d1fe88777e3f..6fe9c3464b41 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -146,7 +146,7 @@ static const struct of_device_id da9063_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, da9063_dt_ids);
 static int da9063_i2c_probe(struct i2c_client *i2c,
-	const struct i2c_device_id *id)
+			    const struct i2c_device_id *id)
 {
 	struct da9063 *da9063;
 	int ret;
@@ -158,6 +158,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, da9063);
 	da9063->dev = &i2c->dev;
 	da9063->chip_irq = i2c->irq;
+	da9063->type = id->driver_data;
 
 	if (da9063->variant_code == PMIC_DA9063_AD) {
 		da9063_regmap_config.rd_table = &da9063_ad_readable_table;
@@ -181,7 +182,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 }
 
 static const struct i2c_device_id da9063_i2c_id[] = {
-	{ "da9063", PMIC_CHIP_ID_DA9063 },
+	{ "da9063", PMIC_TYPE_DA9063 },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, da9063_i2c_id);
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index 7f4ac0413182..27ba2a1d14a3 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -98,7 +98,7 @@ struct da9063_regulator_info {
 struct da9063_dev_model {
 	const struct da9063_regulator_info	*regulator_info;
 	unsigned				n_regulators;
-	unsigned				dev_model;
+	enum da9063_type			type;
 };
 
 /* Single regulator settings */
@@ -585,7 +585,7 @@ static struct da9063_dev_model regulators_models[] = {
 	{
 		.regulator_info = da9063_regulator_info,
 		.n_regulators = ARRAY_SIZE(da9063_regulator_info),
-		.dev_model = PMIC_CHIP_ID_DA9063,
+		.type = PMIC_TYPE_DA9063,
 	},
 	{ }
 };
@@ -741,12 +741,12 @@ static int da9063_regulator_probe(struct platform_device *pdev)
 
 	/* Find regulators set for particular device model */
 	for (model = regulators_models; model->regulator_info; model++) {
-		if (model->dev_model == da9063->model)
+		if (model->type == da9063->type)
 			break;
 	}
 	if (!model->regulator_info) {
 		dev_err(&pdev->dev, "Chip model not recognised (%u)\n",
-			da9063->model);
+			da9063->type);
 		return -ENODEV;
 	}
 
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index e015b065db7e..9e36097adc42 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -31,6 +31,10 @@
 
 #define PMIC_CHIP_ID_DA9063		0x61
 
+enum da9063_type {
+	PMIC_TYPE_DA9063 = 0,
+};
+
 enum da9063_variant_codes {
 	PMIC_DA9063_AD = 0x3,
 	PMIC_DA9063_BB = 0x5,
@@ -73,7 +77,7 @@ enum da9063_irqs {
 struct da9063 {
 	/* Device */
 	struct device	*dev;
-	unsigned short	model;
+	enum da9063_type type;
 	unsigned char	variant_code;
 	unsigned int	flags;
 
-- 
cgit v1.2.3


From 8ae81814cca96a4b7f66dcf1aeb904a0a077f7f4 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:46 +0200
Subject: mfd: da9063: Add DA9063L type

Add type for DA9063L, which is a reduced variant of the DA9063
without RTC block and with less regulators.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/da9063/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 9e36097adc42..71b09154e2db 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -33,6 +33,7 @@
 
 enum da9063_type {
 	PMIC_TYPE_DA9063 = 0,
+	PMIC_TYPE_DA9063L,
 };
 
 enum da9063_variant_codes {
-- 
cgit v1.2.3


From 30000d80b506819ae5dae926f636412a34594ed6 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 3 May 2018 16:15:17 +0200
Subject: backlight: Remove obsolete comment for ->state

Jani spotted this when reviewing my earlier patch to remove the driver
internal usage of this field in:

  Commit 3cf91adaa594 ("backlight: Nuke BL_CORE_DRIVER1")

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/backlight.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 7fbf0539e14a..0b5897446dca 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -79,7 +79,6 @@ struct backlight_properties {
 	/* Backlight type */
 	enum backlight_type type;
 	/* Flags used to signal drivers of state changes */
-	/* Upper 4 bits are reserved for driver internal use */
 	unsigned int state;
 
 #define BL_CORE_SUSPENDED	(1 << 0)	/* backlight is suspended */
-- 
cgit v1.2.3


From 25db26a91364db00f5a30da2fea8e9afe14a163c Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Tue, 3 Jul 2018 15:42:53 -0700
Subject: net/sched: Introduce the ETF Qdisc

The ETF (Earliest TxTime First) qdisc uses the information added
earlier in this series (the socket option SO_TXTIME and the new
role of sk_buff->tstamp) to schedule packets transmission based
on absolute time.

For some workloads, just bandwidth enforcement is not enough, and
precise control of the transmission of packets is necessary.

Example:

$ tc qdisc replace dev enp2s0 parent root handle 100 mqprio num_tc 3 \
           map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 0

$ tc qdisc add dev enp2s0 parent 100:1 etf delta 100000 \
           clockid CLOCK_TAI

In this example, the Qdisc will provide SW best-effort for the control
of the transmission time to the network adapter, the time stamp in the
socket will be in reference to the clockid CLOCK_TAI and packets
will leave the qdisc "delta" (100000) nanoseconds before its transmission
time.

The ETF qdisc will buffer packets sorted by their txtime. It will drop
packets on enqueue() if their skbuff clockid does not match the clock
reference of the Qdisc. Moreover, on dequeue(), a packet will be dropped
if it expires while being enqueued.

The qdisc also supports the SO_TXTIME deadline mode. For this mode, it
will dequeue a packet as soon as possible and change the skb timestamp
to 'now' during etf_dequeue().

Note that both the qdisc's and the SO_TXTIME ABIs allow for a clockid
to be configured, but it's been decided that usage of CLOCK_TAI should
be enforced until we decide to allow for other clockids to be used.
The rationale here is that PTP times are usually in the TAI scale, thus
no other clocks should be necessary. For now, the qdisc will return
EINVAL if any clocks other than CLOCK_TAI are used.

Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |   1 +
 include/uapi/linux/pkt_sched.h |  17 ++
 net/sched/Kconfig              |  11 ++
 net/sched/Makefile             |   1 +
 net/sched/sch_etf.c            | 384 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 414 insertions(+)
 create mode 100644 net/sched/sch_etf.c

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c1ef749b6f9f..f06ee8f91e74 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -798,6 +798,7 @@ enum tc_setup_type {
 	TC_SETUP_QDISC_RED,
 	TC_SETUP_QDISC_PRIO,
 	TC_SETUP_QDISC_MQ,
+	TC_SETUP_QDISC_ETF,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index bad3c03bcf43..d5e933ce1447 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -937,4 +937,21 @@ enum {
 
 #define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
 
+
+/* ETF */
+struct tc_etf_qopt {
+	__s32 delta;
+	__s32 clockid;
+	__u32 flags;
+#define TC_ETF_DEADLINE_MODE_ON	BIT(0)
+};
+
+enum {
+	TCA_ETF_UNSPEC,
+	TCA_ETF_PARMS,
+	__TCA_ETF_MAX,
+};
+
+#define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169fb5325..fcc89706745b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -183,6 +183,17 @@ config NET_SCH_CBS
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_cbs.
 
+config NET_SCH_ETF
+	tristate "Earliest TxTime First (ETF)"
+	help
+	  Say Y here if you want to use the Earliest TxTime First (ETF) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_etf.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_etf.
+
 config NET_SCH_GRED
 	tristate "Generic Random Early Detection (GRED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d3804878..9a5a7077d217 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
 obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
+obj-$(CONFIG_NET_SCH_ETF)	+= sch_etf.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
new file mode 100644
index 000000000000..4b7f4903ac17
--- /dev/null
+++ b/net/sched/sch_etf.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
+ *
+ * Authors:	Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
+ *		Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/rbtree.h>
+#include <linux/skbuff.h>
+#include <linux/posix-timers.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
+
+struct etf_sched_data {
+	bool deadline_mode;
+	int clockid;
+	int queue;
+	s32 delta; /* in ns */
+	ktime_t last; /* The txtime of the last skb sent to the netdevice. */
+	struct rb_root head;
+	struct qdisc_watchdog watchdog;
+	ktime_t (*get_time)(void);
+};
+
+static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
+	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
+};
+
+static inline int validate_input_params(struct tc_etf_qopt *qopt,
+					struct netlink_ext_ack *extack)
+{
+	/* Check if params comply to the following rules:
+	 *	* Clockid and delta must be valid.
+	 *
+	 *	* Dynamic clockids are not supported.
+	 *
+	 *	* Delta must be a positive integer.
+	 */
+	if (qopt->clockid < 0) {
+		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
+		return -ENOTSUPP;
+	}
+
+	if (qopt->clockid != CLOCK_TAI) {
+		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
+		return -EINVAL;
+	}
+
+	if (qopt->delta < 0) {
+		NL_SET_ERR_MSG(extack, "Delta must be positive");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	ktime_t txtime = nskb->tstamp;
+	struct sock *sk = nskb->sk;
+	ktime_t now;
+
+	if (!sk)
+		return false;
+
+	if (!sock_flag(sk, SOCK_TXTIME))
+		return false;
+
+	/* We don't perform crosstimestamping.
+	 * Drop if packet's clockid differs from qdisc's.
+	 */
+	if (sk->sk_clockid != q->clockid)
+		return false;
+
+	if (sk->sk_txtime_deadline_mode != q->deadline_mode)
+		return false;
+
+	now = q->get_time();
+	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
+		return false;
+
+	return true;
+}
+
+static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node *p;
+
+	p = rb_first(&q->head);
+	if (!p)
+		return NULL;
+
+	return rb_to_skb(p);
+}
+
+static void reset_watchdog(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb = etf_peek_timesortedlist(sch);
+	ktime_t next;
+
+	if (!skb)
+		return;
+
+	next = ktime_sub_ns(skb->tstamp, q->delta);
+	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
+}
+
+static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
+				      struct sk_buff **to_free)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node **p = &q->head.rb_node, *parent = NULL;
+	ktime_t txtime = nskb->tstamp;
+
+	if (!is_packet_valid(sch, nskb))
+		return qdisc_drop(nskb, sch, to_free);
+
+	while (*p) {
+		struct sk_buff *skb;
+
+		parent = *p;
+		skb = rb_to_skb(parent);
+		if (ktime_after(txtime, skb->tstamp))
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&nskb->rbnode, parent, p);
+	rb_insert_color(&nskb->rbnode, &q->head);
+
+	qdisc_qstats_backlog_inc(sch, nskb);
+	sch->q.qlen++;
+
+	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
+	reset_watchdog(sch);
+
+	return NET_XMIT_SUCCESS;
+}
+
+static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
+				 bool drop)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	rb_erase(&skb->rbnode, &q->head);
+
+	/* The rbnode field in the skb re-uses these fields, now that
+	 * we are done with the rbnode, reset them.
+	 */
+	skb->next = NULL;
+	skb->prev = NULL;
+	skb->dev = qdisc_dev(sch);
+
+	qdisc_qstats_backlog_dec(sch, skb);
+
+	if (drop) {
+		struct sk_buff *to_free = NULL;
+
+		qdisc_drop(skb, sch, &to_free);
+		kfree_skb_list(to_free);
+		qdisc_qstats_overlimit(sch);
+	} else {
+		qdisc_bstats_update(sch, skb);
+
+		q->last = skb->tstamp;
+	}
+
+	sch->q.qlen--;
+}
+
+static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	ktime_t now, next;
+
+	skb = etf_peek_timesortedlist(sch);
+	if (!skb)
+		return NULL;
+
+	now = q->get_time();
+
+	/* Drop if packet has expired while in queue. */
+	/* FIXME: Must return error on the socket's error queue */
+	if (ktime_before(skb->tstamp, now)) {
+		timesortedlist_erase(sch, skb, true);
+		skb = NULL;
+		goto out;
+	}
+
+	/* When in deadline mode, dequeue as soon as possible and change the
+	 * txtime from deadline to (now + delta).
+	 */
+	if (q->deadline_mode) {
+		timesortedlist_erase(sch, skb, false);
+		skb->tstamp = now;
+		goto out;
+	}
+
+	next = ktime_sub_ns(skb->tstamp, q->delta);
+
+	/* Dequeue only if now is within the [txtime - delta, txtime] range. */
+	if (ktime_after(now, next))
+		timesortedlist_erase(sch, skb, false);
+	else
+		skb = NULL;
+
+out:
+	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
+	reset_watchdog(sch);
+
+	return skb;
+}
+
+static int etf_init(struct Qdisc *sch, struct nlattr *opt,
+		    struct netlink_ext_ack *extack)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct nlattr *tb[TCA_ETF_MAX + 1];
+	struct tc_etf_qopt *qopt;
+	int err;
+
+	if (!opt) {
+		NL_SET_ERR_MSG(extack,
+			       "Missing ETF qdisc options which are mandatory");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_ETF_PARMS]) {
+		NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
+		return -EINVAL;
+	}
+
+	qopt = nla_data(tb[TCA_ETF_PARMS]);
+
+	pr_debug("delta %d clockid %d deadline %s\n",
+		 qopt->delta, qopt->clockid,
+		 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
+
+	err = validate_input_params(qopt, extack);
+	if (err < 0)
+		return err;
+
+	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+	/* Everything went OK, save the parameters used. */
+	q->delta = qopt->delta;
+	q->clockid = qopt->clockid;
+	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
+
+	switch (q->clockid) {
+	case CLOCK_REALTIME:
+		q->get_time = ktime_get_real;
+		break;
+	case CLOCK_MONOTONIC:
+		q->get_time = ktime_get;
+		break;
+	case CLOCK_BOOTTIME:
+		q->get_time = ktime_get_boottime;
+		break;
+	case CLOCK_TAI:
+		q->get_time = ktime_get_clocktai;
+		break;
+	default:
+		NL_SET_ERR_MSG(extack, "Clockid is not supported");
+		return -ENOTSUPP;
+	}
+
+	qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
+
+	return 0;
+}
+
+static void timesortedlist_clear(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node *p = rb_first(&q->head);
+
+	while (p) {
+		struct sk_buff *skb = rb_to_skb(p);
+
+		p = rb_next(p);
+
+		rb_erase(&skb->rbnode, &q->head);
+		rtnl_kfree_skbs(skb, skb);
+		sch->q.qlen--;
+	}
+}
+
+static void etf_reset(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	/* Only cancel watchdog if it's been initialized. */
+	if (q->watchdog.qdisc == sch)
+		qdisc_watchdog_cancel(&q->watchdog);
+
+	/* No matter which mode we are on, it's safe to clear both lists. */
+	timesortedlist_clear(sch);
+	__qdisc_reset_queue(&sch->q);
+
+	sch->qstats.backlog = 0;
+	sch->q.qlen = 0;
+
+	q->last = 0;
+}
+
+static void etf_destroy(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	/* Only cancel watchdog if it's been initialized. */
+	if (q->watchdog.qdisc == sch)
+		qdisc_watchdog_cancel(&q->watchdog);
+}
+
+static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct tc_etf_qopt opt = { };
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	opt.delta = q->delta;
+	opt.clockid = q->clockid;
+	if (q->deadline_mode)
+		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
+
+	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
+	.id		=	"etf",
+	.priv_size	=	sizeof(struct etf_sched_data),
+	.enqueue	=	etf_enqueue_timesortedlist,
+	.dequeue	=	etf_dequeue_timesortedlist,
+	.peek		=	etf_peek_timesortedlist,
+	.init		=	etf_init,
+	.reset		=	etf_reset,
+	.destroy	=	etf_destroy,
+	.dump		=	etf_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init etf_module_init(void)
+{
+	return register_qdisc(&etf_qdisc_ops);
+}
+
+static void __exit etf_module_exit(void)
+{
+	unregister_qdisc(&etf_qdisc_ops);
+}
+module_init(etf_module_init)
+module_exit(etf_module_exit)
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From b5cb15d9372abc9adc4e844c0c1bf594ca6a7695 Mon Sep 17 00:00:00 2001
From: Chris von Recklinghausen <crecklin@redhat.com>
Date: Tue, 3 Jul 2018 15:43:08 -0400
Subject: usercopy: Allow boot cmdline disabling of hardening

Enabling HARDENED_USERCOPY may cause measurable regressions in networking
performance: up to 8% under UDP flood.

I ran a small packet UDP flood using pktgen vs. a host b2b connected. On
the receiver side the UDP packets are processed by a simple user space
process that just reads and drops them:

https://github.com/netoptimizer/network-testing/blob/master/src/udp_sink.c

Not very useful from a functional PoV, but it helps to pin-point
bottlenecks in the networking stack.

When running a kernel with CONFIG_HARDENED_USERCOPY=y, I see a 5-8%
regression in the receive tput, compared to the same kernel without this
option enabled.

With CONFIG_HARDENED_USERCOPY=y, perf shows ~6% of CPU time spent
cumulatively in __check_object_size (~4%) and __virt_addr_valid (~2%).

The call-chain is:

__GI___libc_recvfrom
entry_SYSCALL_64_after_hwframe
do_syscall_64
__x64_sys_recvfrom
__sys_recvfrom
inet_recvmsg
udp_recvmsg
__check_object_size

udp_recvmsg() actually calls copy_to_iter() (inlined) and the latters
calls check_copy_size() (again, inlined).

A generic distro may want to enable HARDENED_USERCOPY in their default
kernel config, but at the same time, such distro may want to be able to
avoid the performance penalties in with the default configuration and
disable the stricter check on a per-boot basis.

This change adds a boot parameter that conditionally disables
HARDENED_USERCOPY via "hardened_usercopy=off".

Signed-off-by: Chris von Recklinghausen <crecklin@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 11 +++++++++++
 include/linux/jump_label.h                      |  6 ++++++
 mm/usercopy.c                                   | 25 +++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..560d4dc66f02 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -816,6 +816,17 @@
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
+	hardened_usercopy=
+                        [KNL] Under CONFIG_HARDENED_USERCOPY, whether
+                        hardening is enabled for this boot. Hardened
+                        usercopy checking is used to protect the kernel
+                        from reading or writing beyond known memory
+                        allocation boundaries as a proactive defense
+                        against bounds-checking flaws in the kernel's
+                        copy_to_user()/copy_from_user() interface.
+                on      Perform hardened usercopy checks (default).
+                off     Disable hardened usercopy checks.
+
 	disable_radix	[PPC]
 			Disable RADIX MMU mode on POWER9
 
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b46b541c67c4..1a0b6f17a5d6 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -299,12 +299,18 @@ struct static_key_false {
 #define DEFINE_STATIC_KEY_TRUE(name)	\
 	struct static_key_true name = STATIC_KEY_TRUE_INIT
 
+#define DEFINE_STATIC_KEY_TRUE_RO(name)	\
+	struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT
+
 #define DECLARE_STATIC_KEY_TRUE(name)	\
 	extern struct static_key_true name
 
 #define DEFINE_STATIC_KEY_FALSE(name)	\
 	struct static_key_false name = STATIC_KEY_FALSE_INIT
 
+#define DEFINE_STATIC_KEY_FALSE_RO(name)	\
+	struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT
+
 #define DECLARE_STATIC_KEY_FALSE(name)	\
 	extern struct static_key_false name
 
diff --git a/mm/usercopy.c b/mm/usercopy.c
index e9e9325f7638..852eb4e53f06 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -20,6 +20,8 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
+#include <linux/atomic.h>
+#include <linux/jump_label.h>
 #include <asm/sections.h>
 
 /*
@@ -240,6 +242,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
 	}
 }
 
+static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks);
+
 /*
  * Validates that the given object is:
  * - not bogus address
@@ -248,6 +252,9 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
  */
 void __check_object_size(const void *ptr, unsigned long n, bool to_user)
 {
+	if (static_branch_unlikely(&bypass_usercopy_checks))
+		return;
+
 	/* Skip all tests if size is zero. */
 	if (!n)
 		return;
@@ -279,3 +286,21 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user)
 	check_kernel_text_object((const unsigned long)ptr, n, to_user);
 }
 EXPORT_SYMBOL(__check_object_size);
+
+static bool enable_checks __initdata = true;
+
+static int __init parse_hardened_usercopy(char *str)
+{
+	return strtobool(str, &enable_checks);
+}
+
+__setup("hardened_usercopy=", parse_hardened_usercopy);
+
+static int __init set_hardened_usercopy(void)
+{
+	if (enable_checks == false)
+		static_branch_enable(&bypass_usercopy_checks);
+	return 1;
+}
+
+late_initcall(set_hardened_usercopy);
-- 
cgit v1.2.3


From 4d4fb5dc988a36307711be292bde6e39b8bdbceb Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Tue, 19 Jun 2018 08:47:22 +0300
Subject: net/mlx5: Limit scope of dump_fill_mkey function

mlx5_core_dump_fill_mkey() is going to be used in next
patch in IB and doesn't need to be visible to whole
mlx5_core. Move that command to mlx5_ib.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cmd.c             | 15 +++++++++++++++
 drivers/infiniband/hw/mlx5/cmd.h             |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 17 -----------------
 include/linux/mlx5/driver.h                  |  2 --
 4 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 188512bf46e6..ccc0b5d06a7d 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -32,6 +32,21 @@
 
 #include "cmd.h"
 
+int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey)
+{
+	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
+	int err;
+
+	MLX5_SET(query_special_contexts_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*mkey = MLX5_GET(query_special_contexts_out, out,
+				 dump_fill_mkey);
+	return err;
+}
+
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
 {
 	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index e7206c8a8011..98ea4648c655 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/mlx5/driver.h>
 
+int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
 			       void *out, int out_size);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index f4f02f775c93..0670165afd5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -146,23 +146,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
 
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
-			     u32 *mkey)
-{
-	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
-	int err;
-
-	MLX5_SET(query_special_contexts_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (!err)
-		*mkey = MLX5_GET(query_special_contexts_out, out,
-				 dump_fill_mkey);
-	return err;
-}
-EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
-
 static inline u32 mlx5_get_psv(u32 *out, int psv_index)
 {
 	switch (psv_index) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80cbb7fdce4a..1cb1c0317b77 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1067,8 +1067,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 			 u32 *out, int outlen);
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
-			     u32 *mkey);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
-- 
cgit v1.2.3


From b183ee27f5fb07c8428e2fe45d5f35dac611c45d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 19 Jun 2018 08:47:23 +0300
Subject: net/mlx5: Add hardware definitions for dump_fill_mkey

MLX5 IB HCA offers the memory key, dump_fill_mkey to boost
performance by forcing local HCA operations to skip the PCI bus
access,

This patch adds needed hardware definitions.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index deb3a459a22e..1853e7fd6924 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -885,7 +885,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_eq_sz[0x8];
 	u8         reserved_at_e8[0x2];
 	u8         log_max_mkey[0x6];
-	u8         reserved_at_f0[0xc];
+	u8         reserved_at_f0[0x8];
+	u8         dump_fill_mkey[0x1];
+	u8         reserved_at_f9[0x3];
 	u8         log_max_eq[0x4];
 
 	u8         max_indirection[0x8];
-- 
cgit v1.2.3


From 7a872b6fb7fdc4213e9bb4e1c83a65e6b8af7ebd Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 4 Jul 2018 20:19:06 -0700
Subject: soc: ti: wkup_m3_ipc: Add rtc_only with ddr in self refresh mode
 support

Adds rtc_only support. This needs resume function to shutdown and
reboot the m3.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 drivers/soc/ti/wkup_m3_ipc.c | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/wkup_m3_ipc.h  |  3 +++
 2 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c
index 369aef5e7228..b732c39e2754 100644
--- a/drivers/soc/ti/wkup_m3_ipc.c
+++ b/drivers/soc/ti/wkup_m3_ipc.c
@@ -329,12 +329,24 @@ static int wkup_m3_finish_low_power(struct wkup_m3_ipc *m3_ipc)
 	return 0;
 }
 
+/**
+ * wkup_m3_set_rtc_only - Set the rtc_only flag
+ * @wkup_m3_wakeup: struct wkup_m3_wakeup_src * gets assigned the
+ *                  wakeup src value
+ */
+static void wkup_m3_set_rtc_only(struct wkup_m3_ipc *m3_ipc)
+{
+	if (m3_ipc_state)
+		m3_ipc_state->is_rtc_only = true;
+}
+
 static struct wkup_m3_ipc_ops ipc_ops = {
 	.set_mem_type = wkup_m3_set_mem_type,
 	.set_resume_address = wkup_m3_set_resume_address,
 	.prepare_low_power = wkup_m3_prepare_low_power,
 	.finish_low_power = wkup_m3_finish_low_power,
 	.request_pm_status = wkup_m3_request_pm_status,
+	.set_rtc_only = wkup_m3_set_rtc_only,
 };
 
 /**
@@ -484,6 +496,32 @@ static int wkup_m3_ipc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int wkup_m3_ipc_suspend(struct device *dev)
+{
+	/*
+	 * Nothing needs to be done on suspend even with rtc_only flag set
+	 */
+	return 0;
+}
+
+static int wkup_m3_ipc_resume(struct device *dev)
+{
+	if (m3_ipc_state->is_rtc_only) {
+		rproc_shutdown(m3_ipc_state->rproc);
+		rproc_boot(m3_ipc_state->rproc);
+	}
+
+	m3_ipc_state->is_rtc_only = false;
+
+	return 0;
+}
+
+static const struct dev_pm_ops wkup_m3_ipc_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(wkup_m3_ipc_suspend, wkup_m3_ipc_resume)
+};
+#endif
+
 static const struct of_device_id wkup_m3_ipc_of_match[] = {
 	{ .compatible = "ti,am3352-wkup-m3-ipc", },
 	{ .compatible = "ti,am4372-wkup-m3-ipc", },
@@ -497,6 +535,9 @@ static struct platform_driver wkup_m3_ipc_driver = {
 	.driver = {
 		.name = "wkup_m3_ipc",
 		.of_match_table = wkup_m3_ipc_of_match,
+#ifdef CONFIG_PM
+		.pm = &wkup_m3_ipc_pm_ops,
+#endif
 	},
 };
 
diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h
index d6ba7d39a62f..d639df15e8ba 100644
--- a/include/linux/wkup_m3_ipc.h
+++ b/include/linux/wkup_m3_ipc.h
@@ -40,6 +40,7 @@ struct wkup_m3_ipc {
 	struct mbox_chan *mbox;
 
 	struct wkup_m3_ipc_ops *ops;
+	int is_rtc_only;
 };
 
 struct wkup_m3_ipc_ops {
@@ -48,8 +49,10 @@ struct wkup_m3_ipc_ops {
 	int (*prepare_low_power)(struct wkup_m3_ipc *m3_ipc, int state);
 	int (*finish_low_power)(struct wkup_m3_ipc *m3_ipc);
 	int (*request_pm_status)(struct wkup_m3_ipc *m3_ipc);
+	void (*set_rtc_only)(struct wkup_m3_ipc *m3_ipc);
 };
 
 struct wkup_m3_ipc *wkup_m3_ipc_get(void);
 void wkup_m3_ipc_put(struct wkup_m3_ipc *m3_ipc);
+void wkup_m3_set_rtc_only_mode(void);
 #endif /* _LINUX_WKUP_M3_IPC_H */
-- 
cgit v1.2.3


From ec93b62fec9c7138d2b75334d192ecc12376f885 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Wed, 4 Jul 2018 20:19:06 -0700
Subject: soc: ti: wkup_m3_ipc: Add wkup_m3_request_wake_src

Add wkup_m3_request_wake_src to allow users to get the name of
the wakeup source after a DeepSleep or Standby transition.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 drivers/soc/ti/wkup_m3_ipc.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/wkup_m3_ipc.h  |  6 ++++++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c
index b732c39e2754..6840688236b9 100644
--- a/drivers/soc/ti/wkup_m3_ipc.c
+++ b/drivers/soc/ti/wkup_m3_ipc.c
@@ -46,6 +46,7 @@
 #define M3_BASELINE_VERSION		0x191
 #define M3_STATUS_RESP_MASK		(0xffff << 16)
 #define M3_FW_VERSION_MASK		0xffff
+#define M3_WAKE_SRC_MASK		0xff
 
 #define M3_STATE_UNKNOWN		0
 #define M3_STATE_RESET			1
@@ -55,6 +56,23 @@
 
 static struct wkup_m3_ipc *m3_ipc_state;
 
+static const struct wkup_m3_wakeup_src wakeups[] = {
+	{.irq_nr = 35,	.src = "USB0_PHY"},
+	{.irq_nr = 36,	.src = "USB1_PHY"},
+	{.irq_nr = 40,	.src = "I2C0"},
+	{.irq_nr = 41,	.src = "RTC Timer"},
+	{.irq_nr = 42,	.src = "RTC Alarm"},
+	{.irq_nr = 43,	.src = "Timer0"},
+	{.irq_nr = 44,	.src = "Timer1"},
+	{.irq_nr = 45,	.src = "UART"},
+	{.irq_nr = 46,	.src = "GPIO0"},
+	{.irq_nr = 48,	.src = "MPU_WAKE"},
+	{.irq_nr = 49,	.src = "WDT0"},
+	{.irq_nr = 50,	.src = "WDT1"},
+	{.irq_nr = 51,	.src = "ADC_TSC"},
+	{.irq_nr = 0,	.src = "Unknown"},
+};
+
 static void am33xx_txev_eoi(struct wkup_m3_ipc *m3_ipc)
 {
 	writel(AM33XX_M3_TXEV_ACK,
@@ -329,6 +347,26 @@ static int wkup_m3_finish_low_power(struct wkup_m3_ipc *m3_ipc)
 	return 0;
 }
 
+/**
+ * wkup_m3_request_wake_src - Get the wakeup source info passed from wkup_m3
+ * @m3_ipc: Pointer to wkup_m3_ipc context
+ */
+static const char *wkup_m3_request_wake_src(struct wkup_m3_ipc *m3_ipc)
+{
+	unsigned int wakeup_src_idx;
+	int j, val;
+
+	val = wkup_m3_ctrl_ipc_read(m3_ipc, 6);
+
+	wakeup_src_idx = val & M3_WAKE_SRC_MASK;
+
+	for (j = 0; j < ARRAY_SIZE(wakeups) - 1; j++) {
+		if (wakeups[j].irq_nr == wakeup_src_idx)
+			return wakeups[j].src;
+	}
+	return wakeups[j].src;
+}
+
 /**
  * wkup_m3_set_rtc_only - Set the rtc_only flag
  * @wkup_m3_wakeup: struct wkup_m3_wakeup_src * gets assigned the
@@ -346,6 +384,7 @@ static struct wkup_m3_ipc_ops ipc_ops = {
 	.prepare_low_power = wkup_m3_prepare_low_power,
 	.finish_low_power = wkup_m3_finish_low_power,
 	.request_pm_status = wkup_m3_request_pm_status,
+	.request_wake_src = wkup_m3_request_wake_src,
 	.set_rtc_only = wkup_m3_set_rtc_only,
 };
 
diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h
index d639df15e8ba..e497e621dbb7 100644
--- a/include/linux/wkup_m3_ipc.h
+++ b/include/linux/wkup_m3_ipc.h
@@ -43,12 +43,18 @@ struct wkup_m3_ipc {
 	int is_rtc_only;
 };
 
+struct wkup_m3_wakeup_src {
+	int irq_nr;
+	char src[10];
+};
+
 struct wkup_m3_ipc_ops {
 	void (*set_mem_type)(struct wkup_m3_ipc *m3_ipc, int mem_type);
 	void (*set_resume_address)(struct wkup_m3_ipc *m3_ipc, void *addr);
 	int (*prepare_low_power)(struct wkup_m3_ipc *m3_ipc, int state);
 	int (*finish_low_power)(struct wkup_m3_ipc *m3_ipc);
 	int (*request_pm_status)(struct wkup_m3_ipc *m3_ipc);
+	const char *(*request_wake_src)(struct wkup_m3_ipc *m3_ipc);
 	void (*set_rtc_only)(struct wkup_m3_ipc *m3_ipc);
 };
 
-- 
cgit v1.2.3


From 6312fe77751f57d4fa2b28abeef84c6a95c28136 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Thu, 5 Jul 2018 14:34:32 +0800
Subject: net: limit each hash list length to MAX_GRO_SKBS

After commit 07d78363dcff ("net: Convert NAPI gro list into a small hash
table.")' there is 8 hash buckets, which allows more flows to be held for
merging.  but MAX_GRO_SKBS, the total held skb for merging, is 8 skb still,
limit the hash table performance.

keep MAX_GRO_SKBS as 8 skb, but limit each hash list length to 8 skb, not
the total 8 skb

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++-
 net/core/dev.c            | 56 +++++++++++++++++++----------------------------
 2 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f06ee8f91e74..b683971e500d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -302,6 +302,11 @@ struct netdev_boot_setup {
 
 int __init netdev_boot_setup(char *str);
 
+struct gro_list {
+	struct list_head	list;
+	int			count;
+};
+
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
@@ -323,7 +328,7 @@ struct napi_struct {
 	int			poll_owner;
 #endif
 	struct net_device	*dev;
-	struct list_head	gro_hash[GRO_HASH_BUCKETS];
+	struct gro_list		gro_hash[GRO_HASH_BUCKETS];
 	struct sk_buff		*skb;
 	struct hrtimer		timer;
 	struct list_head	dev_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7e6a2f66db5c..89825c1eccdc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -149,7 +149,6 @@
 
 #include "net-sysfs.h"
 
-/* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 
 /* This should be increased if a protocol with a bigger head is added. */
@@ -5151,9 +5150,10 @@ out:
 	return netif_receive_skb_internal(skb);
 }
 
-static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head,
+static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 				   bool flush_old)
 {
+	struct list_head *head = &napi->gro_hash[index].list;
 	struct sk_buff *skb, *p;
 
 	list_for_each_entry_safe_reverse(skb, p, head, list) {
@@ -5162,22 +5162,20 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *h
 		list_del_init(&skb->list);
 		napi_gro_complete(skb);
 		napi->gro_count--;
+		napi->gro_hash[index].count--;
 	}
 }
 
-/* napi->gro_hash contains packets ordered by age.
+/* napi->gro_hash[].list contains packets ordered by age.
  * youngest packets at the head of it.
  * Complete skbs in reverse order to reduce latencies.
  */
 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
-	int i;
-
-	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
-		struct list_head *head = &napi->gro_hash[i];
+	u32 i;
 
-		__napi_gro_flush_chain(napi, head, flush_old);
-	}
+	for (i = 0; i < GRO_HASH_BUCKETS; i++)
+		__napi_gro_flush_chain(napi, i, flush_old);
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
@@ -5189,7 +5187,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
 	struct list_head *head;
 	struct sk_buff *p;
 
-	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)];
+	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
 	list_for_each_entry(p, head, list) {
 		unsigned long diffs;
 
@@ -5257,27 +5255,13 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 	}
 }
 
-static void gro_flush_oldest(struct napi_struct *napi)
+static void gro_flush_oldest(struct list_head *head)
 {
-	struct sk_buff *oldest = NULL;
-	unsigned long age = jiffies;
-	int i;
-
-	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
-		struct list_head *head = &napi->gro_hash[i];
-		struct sk_buff *skb;
-
-		if (list_empty(head))
-			continue;
+	struct sk_buff *oldest;
 
-		skb = list_last_entry(head, struct sk_buff, list);
-		if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) {
-			oldest = skb;
-			age = NAPI_GRO_CB(skb)->age;
-		}
-	}
+	oldest = list_last_entry(head, struct sk_buff, list);
 
-	/* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is
+	/* We are called with head length >= MAX_GRO_SKBS, so this is
 	 * impossible.
 	 */
 	if (WARN_ON_ONCE(!oldest))
@@ -5292,6 +5276,7 @@ static void gro_flush_oldest(struct napi_struct *napi)
 
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
@@ -5358,6 +5343,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		list_del_init(&pp->list);
 		napi_gro_complete(pp);
 		napi->gro_count--;
+		napi->gro_hash[hash].count--;
 	}
 
 	if (same_flow)
@@ -5366,10 +5352,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (NAPI_GRO_CB(skb)->flush)
 		goto normal;
 
-	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-		gro_flush_oldest(napi);
+	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
+		gro_flush_oldest(gro_head);
 	} else {
 		napi->gro_count++;
+		napi->gro_hash[hash].count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
@@ -6006,8 +5993,10 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
 	napi->gro_count = 0;
-	for (i = 0; i < GRO_HASH_BUCKETS; i++)
-		INIT_LIST_HEAD(&napi->gro_hash[i]);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		INIT_LIST_HEAD(&napi->gro_hash[i].list);
+		napi->gro_hash[i].count = 0;
+	}
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
@@ -6047,8 +6036,9 @@ static void flush_gro_hash(struct napi_struct *napi)
 	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 		struct sk_buff *skb, *n;
 
-		list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list)
+		list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
 			kfree_skb(skb);
+		napi->gro_hash[i].count = 0;
 	}
 }
 
-- 
cgit v1.2.3


From 0380cf7dbaca75c524e34b30979f0806124fa8e6 Mon Sep 17 00:00:00 2001
From: pascal paillet <p.paillet@st.com>
Date: Thu, 5 Jul 2018 14:25:57 +0000
Subject: regulator: core: Change suspend_late to suspend

Change suspend_late ops to suspend normal ops. The goal is to avoid
requesting all the regulator drivers to be operational in suspend late
phase.

Signed-off-by: pascal paillet <p.paillet@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c         | 26 +++++++++++++-------------
 include/linux/regulator/driver.h |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 6ed568b96c0e..da9b0fed8330 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -4441,7 +4441,7 @@ void regulator_unregister(struct regulator_dev *rdev)
 EXPORT_SYMBOL_GPL(regulator_unregister);
 
 #ifdef CONFIG_SUSPEND
-static int _regulator_suspend_late(struct device *dev, void *data)
+static int _regulator_suspend(struct device *dev, void *data)
 {
 	struct regulator_dev *rdev = dev_to_rdev(dev);
 	suspend_state_t *state = data;
@@ -4455,20 +4455,20 @@ static int _regulator_suspend_late(struct device *dev, void *data)
 }
 
 /**
- * regulator_suspend_late - prepare regulators for system wide suspend
+ * regulator_suspend - prepare regulators for system wide suspend
  * @state: system suspend state
  *
  * Configure each regulator with it's suspend operating parameters for state.
  */
-static int regulator_suspend_late(struct device *dev)
+static int regulator_suspend(struct device *dev)
 {
 	suspend_state_t state = pm_suspend_target_state;
 
 	return class_for_each_device(&regulator_class, NULL, &state,
-				     _regulator_suspend_late);
+				     _regulator_suspend);
 }
 
-static int _regulator_resume_early(struct device *dev, void *data)
+static int _regulator_resume(struct device *dev, void *data)
 {
 	int ret = 0;
 	struct regulator_dev *rdev = dev_to_rdev(dev);
@@ -4481,35 +4481,35 @@ static int _regulator_resume_early(struct device *dev, void *data)
 
 	regulator_lock(rdev);
 
-	if (rdev->desc->ops->resume_early &&
+	if (rdev->desc->ops->resume &&
 	    (rstate->enabled == ENABLE_IN_SUSPEND ||
 	     rstate->enabled == DISABLE_IN_SUSPEND))
-		ret = rdev->desc->ops->resume_early(rdev);
+		ret = rdev->desc->ops->resume(rdev);
 
 	regulator_unlock(rdev);
 
 	return ret;
 }
 
-static int regulator_resume_early(struct device *dev)
+static int regulator_resume(struct device *dev)
 {
 	suspend_state_t state = pm_suspend_target_state;
 
 	return class_for_each_device(&regulator_class, NULL, &state,
-				     _regulator_resume_early);
+				     _regulator_resume);
 }
 
 #else /* !CONFIG_SUSPEND */
 
-#define regulator_suspend_late	NULL
-#define regulator_resume_early	NULL
+#define regulator_suspend	NULL
+#define regulator_resume	NULL
 
 #endif /* !CONFIG_SUSPEND */
 
 #ifdef CONFIG_PM
 static const struct dev_pm_ops __maybe_unused regulator_pm_ops = {
-	.suspend_late	= regulator_suspend_late,
-	.resume_early	= regulator_resume_early,
+	.suspend	= regulator_suspend,
+	.resume		= regulator_resume,
 };
 #endif
 
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index dea96ee39fdc..0fd8fbb74763 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -220,7 +220,7 @@ struct regulator_ops {
 	/* set regulator suspend operating mode (defined in consumer.h) */
 	int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode);
 
-	int (*resume_early)(struct regulator_dev *rdev);
+	int (*resume)(struct regulator_dev *rdev);
 
 	int (*set_pull_down) (struct regulator_dev *);
 };
-- 
cgit v1.2.3


From d8842211b6f63d3f069df973d137de0a85964965 Mon Sep 17 00:00:00 2001
From: pascal paillet <p.paillet@st.com>
Date: Thu, 5 Jul 2018 14:25:56 +0000
Subject: driver core: Add device_link_remove function

Device_link_remove uses the same arguments than device_link_add. The Goal
is to avoid storing the link pointer.

Signed-off-by: pascal paillet <p.paillet@st.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/core.c    | 30 ++++++++++++++++++++++++++++++
 include/linux/device.h |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 36622b52e419..3b380b1f2768 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -365,6 +365,36 @@ void device_link_del(struct device_link *link)
 }
 EXPORT_SYMBOL_GPL(device_link_del);
 
+/**
+ * device_link_remove - remove a link between two devices.
+ * @consumer: Consumer end of the link.
+ * @supplier: Supplier end of the link.
+ *
+ * The caller must ensure proper synchronization of this function with runtime
+ * PM.
+ */
+void device_link_remove(void *consumer, struct device *supplier)
+{
+	struct device_link *link;
+
+	if (WARN_ON(consumer == supplier))
+		return;
+
+	device_links_write_lock();
+	device_pm_lock();
+
+	list_for_each_entry(link, &supplier->links.consumers, s_node) {
+		if (link->consumer == consumer) {
+			kref_put(&link->kref, __device_link_del);
+			break;
+		}
+	}
+
+	device_pm_unlock();
+	device_links_write_unlock();
+}
+EXPORT_SYMBOL_GPL(device_link_remove);
+
 static void device_links_missing_supplier(struct device *dev)
 {
 	struct device_link *link;
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..9c1c3b1d5e11 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1316,6 +1316,7 @@ extern const char *dev_driver_string(const struct device *dev);
 struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags);
 void device_link_del(struct device_link *link);
+void device_link_remove(void *consumer, struct device *supplier);
 
 #ifdef CONFIG_PRINTK
 
-- 
cgit v1.2.3


From 2282e125a406e09331c5a785e3df29035c99a607 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:21 +0200
Subject: leds: triggers: let struct led_trigger::activate() return an error
 code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Given that activating a trigger can fail, let the callback return an
indication. This prevents to have a trigger active according to the
"trigger" sysfs attribute but not functional.

All users are changed accordingly to return 0 for now. There is no intended
change in behaviour.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/led-triggers.c               | 24 +++++++++++++++++++++---
 drivers/leds/trigger/ledtrig-activity.c   |  8 +++++---
 drivers/leds/trigger/ledtrig-backlight.c  |  8 +++++---
 drivers/leds/trigger/ledtrig-default-on.c |  3 ++-
 drivers/leds/trigger/ledtrig-gpio.c       |  8 +++++---
 drivers/leds/trigger/ledtrig-heartbeat.c  |  8 +++++---
 drivers/leds/trigger/ledtrig-netdev.c     |  8 +++++---
 drivers/leds/trigger/ledtrig-oneshot.c    |  8 +++++---
 drivers/leds/trigger/ledtrig-timer.c      |  8 +++++---
 drivers/leds/trigger/ledtrig-transient.c  |  8 +++++---
 drivers/tty/vt/keyboard.c                 |  4 +++-
 drivers/usb/core/ledtrig-usbport.c        |  7 ++++---
 include/linux/leds.h                      | 14 +++++++++-----
 net/bluetooth/leds.c                      |  6 ++++--
 net/mac80211/led.c                        | 20 +++++++++++++++-----
 net/rfkill/core.c                         |  4 +++-
 16 files changed, 101 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 431123b048a2..a8786f4b3453 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -103,15 +103,16 @@ ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr,
 EXPORT_SYMBOL_GPL(led_trigger_show);
 
 /* Caller must ensure led_cdev->trigger_lock held */
-void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
+int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 {
 	unsigned long flags;
 	char *event = NULL;
 	char *envp[2];
 	const char *name;
+	int ret;
 
 	if (!led_cdev->trigger && !trig)
-		return;
+		return 0;
 
 	name = trig ? trig->name : "none";
 	event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name);
@@ -134,8 +135,14 @@ void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 		list_add_tail(&led_cdev->trig_list, &trig->led_cdevs);
 		write_unlock_irqrestore(&trig->leddev_list_lock, flags);
 		led_cdev->trigger = trig;
+
 		if (trig->activate)
-			trig->activate(led_cdev);
+			ret = trig->activate(led_cdev);
+		else
+			ret = 0;
+
+		if (ret)
+			goto err_activate;
 	}
 
 	if (event) {
@@ -146,6 +153,17 @@ void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 				"%s: Error sending uevent\n", __func__);
 		kfree(event);
 	}
+
+	return 0;
+
+err_activate:
+	led_cdev->trigger = NULL;
+	write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
+	list_del(&led_cdev->trig_list);
+	write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, flags);
+	led_set_brightness(led_cdev, LED_OFF);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(led_trigger_set);
 
diff --git a/drivers/leds/trigger/ledtrig-activity.c b/drivers/leds/trigger/ledtrig-activity.c
index b39e1bb9a9ec..2fc065fb1754 100644
--- a/drivers/leds/trigger/ledtrig-activity.c
+++ b/drivers/leds/trigger/ledtrig-activity.c
@@ -178,20 +178,20 @@ static ssize_t led_invert_store(struct device *dev,
 
 static DEVICE_ATTR(invert, 0644, led_invert_show, led_invert_store);
 
-static void activity_activate(struct led_classdev *led_cdev)
+static int activity_activate(struct led_classdev *led_cdev)
 {
 	struct activity_data *activity_data;
 	int rc;
 
 	activity_data = kzalloc(sizeof(*activity_data), GFP_KERNEL);
 	if (!activity_data)
-		return;
+		return 0;
 
 	led_cdev->trigger_data = activity_data;
 	rc = device_create_file(led_cdev->dev, &dev_attr_invert);
 	if (rc) {
 		kfree(led_cdev->trigger_data);
-		return;
+		return 0;
 	}
 
 	activity_data->led_cdev = led_cdev;
@@ -201,6 +201,8 @@ static void activity_activate(struct led_classdev *led_cdev)
 	led_activity_function(&activity_data->timer);
 	set_bit(LED_BLINK_SW, &led_cdev->work_flags);
 	led_cdev->activated = true;
+
+	return 0;
 }
 
 static void activity_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c
index 1ca1f1608f76..84512960d630 100644
--- a/drivers/leds/trigger/ledtrig-backlight.c
+++ b/drivers/leds/trigger/ledtrig-backlight.c
@@ -97,7 +97,7 @@ static ssize_t bl_trig_invert_store(struct device *dev,
 }
 static DEVICE_ATTR(inverted, 0644, bl_trig_invert_show, bl_trig_invert_store);
 
-static void bl_trig_activate(struct led_classdev *led)
+static int bl_trig_activate(struct led_classdev *led)
 {
 	int ret;
 
@@ -107,7 +107,7 @@ static void bl_trig_activate(struct led_classdev *led)
 	led->trigger_data = n;
 	if (!n) {
 		dev_err(led->dev, "unable to allocate backlight trigger\n");
-		return;
+		return 0;
 	}
 
 	ret = device_create_file(led->dev, &dev_attr_inverted);
@@ -124,11 +124,13 @@ static void bl_trig_activate(struct led_classdev *led)
 		dev_err(led->dev, "unable to register backlight trigger\n");
 	led->activated = true;
 
-	return;
+	return 0;
 
 err_invert:
 	led->trigger_data = NULL;
 	kfree(n);
+
+	return 0;
 }
 
 static void bl_trig_deactivate(struct led_classdev *led)
diff --git a/drivers/leds/trigger/ledtrig-default-on.c b/drivers/leds/trigger/ledtrig-default-on.c
index 4ccea04b7a6b..2996fe672198 100644
--- a/drivers/leds/trigger/ledtrig-default-on.c
+++ b/drivers/leds/trigger/ledtrig-default-on.c
@@ -16,9 +16,10 @@
 #include <linux/leds.h>
 #include "../leds.h"
 
-static void defon_trig_activate(struct led_classdev *led_cdev)
+static int defon_trig_activate(struct led_classdev *led_cdev)
 {
 	led_set_brightness_nosleep(led_cdev, led_cdev->max_brightness);
+	return 0;
 }
 
 static struct led_trigger defon_led_trigger = {
diff --git a/drivers/leds/trigger/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c
index 93906a17a4b6..f5358c40d03f 100644
--- a/drivers/leds/trigger/ledtrig-gpio.c
+++ b/drivers/leds/trigger/ledtrig-gpio.c
@@ -162,14 +162,14 @@ static ssize_t gpio_trig_gpio_store(struct device *dev,
 }
 static DEVICE_ATTR(gpio, 0644, gpio_trig_gpio_show, gpio_trig_gpio_store);
 
-static void gpio_trig_activate(struct led_classdev *led)
+static int gpio_trig_activate(struct led_classdev *led)
 {
 	struct gpio_trig_data *gpio_data;
 	int ret;
 
 	gpio_data = kzalloc(sizeof(*gpio_data), GFP_KERNEL);
 	if (!gpio_data)
-		return;
+		return 0;
 
 	ret = device_create_file(led->dev, &dev_attr_gpio);
 	if (ret)
@@ -187,7 +187,7 @@ static void gpio_trig_activate(struct led_classdev *led)
 	led->trigger_data = gpio_data;
 	led->activated = true;
 
-	return;
+	return 0;
 
 err_brightness:
 	device_remove_file(led->dev, &dev_attr_inverted);
@@ -197,6 +197,8 @@ err_inverted:
 
 err_gpio:
 	kfree(gpio_data);
+
+	return 0;
 }
 
 static void gpio_trig_deactivate(struct led_classdev *led)
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index 304b929edb8e..03a8dfce8fb9 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -121,21 +121,21 @@ static ssize_t led_invert_store(struct device *dev,
 
 static DEVICE_ATTR(invert, 0644, led_invert_show, led_invert_store);
 
-static void heartbeat_trig_activate(struct led_classdev *led_cdev)
+static int heartbeat_trig_activate(struct led_classdev *led_cdev)
 {
 	struct heartbeat_trig_data *heartbeat_data;
 	int rc;
 
 	heartbeat_data = kzalloc(sizeof(*heartbeat_data), GFP_KERNEL);
 	if (!heartbeat_data)
-		return;
+		return 0;
 
 	led_cdev->trigger_data = heartbeat_data;
 	heartbeat_data->led_cdev = led_cdev;
 	rc = device_create_file(led_cdev->dev, &dev_attr_invert);
 	if (rc) {
 		kfree(led_cdev->trigger_data);
-		return;
+		return 0;
 	}
 
 	timer_setup(&heartbeat_data->timer, led_heartbeat_function, 0);
@@ -145,6 +145,8 @@ static void heartbeat_trig_activate(struct led_classdev *led_cdev)
 	led_heartbeat_function(&heartbeat_data->timer);
 	set_bit(LED_BLINK_SW, &led_cdev->work_flags);
 	led_cdev->activated = true;
+
+	return 0;
 }
 
 static void heartbeat_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index 6df4781a6308..bb05937255de 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -388,14 +388,14 @@ static void netdev_trig_work(struct work_struct *work)
 			(atomic_read(&trigger_data->interval)*2));
 }
 
-static void netdev_trig_activate(struct led_classdev *led_cdev)
+static int netdev_trig_activate(struct led_classdev *led_cdev)
 {
 	struct led_netdev_data *trigger_data;
 	int rc;
 
 	trigger_data = kzalloc(sizeof(struct led_netdev_data), GFP_KERNEL);
 	if (!trigger_data)
-		return;
+		return 0;
 
 	spin_lock_init(&trigger_data->lock);
 
@@ -432,7 +432,7 @@ static void netdev_trig_activate(struct led_classdev *led_cdev)
 	rc = register_netdevice_notifier(&trigger_data->notifier);
 	if (rc)
 		goto err_out_interval;
-	return;
+	return 0;
 
 err_out_interval:
 	device_remove_file(led_cdev->dev, &dev_attr_interval);
@@ -447,6 +447,8 @@ err_out_device_name:
 err_out:
 	led_cdev->trigger_data = NULL;
 	kfree(trigger_data);
+
+	return 0;
 }
 
 static void netdev_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-oneshot.c b/drivers/leds/trigger/ledtrig-oneshot.c
index b8ea9f0f1e19..7bbf92bd7f80 100644
--- a/drivers/leds/trigger/ledtrig-oneshot.c
+++ b/drivers/leds/trigger/ledtrig-oneshot.c
@@ -122,14 +122,14 @@ static DEVICE_ATTR(delay_off, 0644, led_delay_off_show, led_delay_off_store);
 static DEVICE_ATTR(invert, 0644, led_invert_show, led_invert_store);
 static DEVICE_ATTR(shot, 0200, NULL, led_shot);
 
-static void oneshot_trig_activate(struct led_classdev *led_cdev)
+static int oneshot_trig_activate(struct led_classdev *led_cdev)
 {
 	struct oneshot_trig_data *oneshot_data;
 	int rc;
 
 	oneshot_data = kzalloc(sizeof(*oneshot_data), GFP_KERNEL);
 	if (!oneshot_data)
-		return;
+		return 0;
 
 	led_cdev->trigger_data = oneshot_data;
 
@@ -151,7 +151,7 @@ static void oneshot_trig_activate(struct led_classdev *led_cdev)
 
 	led_cdev->activated = true;
 
-	return;
+	return 0;
 
 err_out_invert:
 	device_remove_file(led_cdev->dev, &dev_attr_invert);
@@ -161,6 +161,8 @@ err_out_delayon:
 	device_remove_file(led_cdev->dev, &dev_attr_delay_on);
 err_out_trig_data:
 	kfree(led_cdev->trigger_data);
+
+	return 0;
 }
 
 static void oneshot_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-timer.c b/drivers/leds/trigger/ledtrig-timer.c
index 10fc0966b0e3..527055d815ad 100644
--- a/drivers/leds/trigger/ledtrig-timer.c
+++ b/drivers/leds/trigger/ledtrig-timer.c
@@ -70,7 +70,7 @@ static ssize_t led_delay_off_store(struct device *dev,
 static DEVICE_ATTR(delay_on, 0644, led_delay_on_show, led_delay_on_store);
 static DEVICE_ATTR(delay_off, 0644, led_delay_off_show, led_delay_off_store);
 
-static void timer_trig_activate(struct led_classdev *led_cdev)
+static int timer_trig_activate(struct led_classdev *led_cdev)
 {
 	int rc;
 
@@ -78,7 +78,7 @@ static void timer_trig_activate(struct led_classdev *led_cdev)
 
 	rc = device_create_file(led_cdev->dev, &dev_attr_delay_on);
 	if (rc)
-		return;
+		return 0;
 	rc = device_create_file(led_cdev->dev, &dev_attr_delay_off);
 	if (rc)
 		goto err_out_delayon;
@@ -87,10 +87,12 @@ static void timer_trig_activate(struct led_classdev *led_cdev)
 		      &led_cdev->blink_delay_off);
 	led_cdev->activated = true;
 
-	return;
+	return 0;
 
 err_out_delayon:
 	device_remove_file(led_cdev->dev, &dev_attr_delay_on);
+
+	return 0;
 }
 
 static void timer_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c
index 9d1769073562..a55fc58179a9 100644
--- a/drivers/leds/trigger/ledtrig-transient.c
+++ b/drivers/leds/trigger/ledtrig-transient.c
@@ -152,7 +152,7 @@ static DEVICE_ATTR(duration, 0644, transient_duration_show,
 		   transient_duration_store);
 static DEVICE_ATTR(state, 0644, transient_state_show, transient_state_store);
 
-static void transient_trig_activate(struct led_classdev *led_cdev)
+static int transient_trig_activate(struct led_classdev *led_cdev)
 {
 	int rc;
 	struct transient_trig_data *tdata;
@@ -161,7 +161,7 @@ static void transient_trig_activate(struct led_classdev *led_cdev)
 	if (!tdata) {
 		dev_err(led_cdev->dev,
 			"unable to allocate transient trigger\n");
-		return;
+		return 0;
 	}
 	led_cdev->trigger_data = tdata;
 	tdata->led_cdev = led_cdev;
@@ -181,7 +181,7 @@ static void transient_trig_activate(struct led_classdev *led_cdev)
 	timer_setup(&tdata->timer, transient_timer_function, 0);
 	led_cdev->activated = true;
 
-	return;
+	return 0;
 
 err_out_state:
 	device_remove_file(led_cdev->dev, &dev_attr_duration);
@@ -191,6 +191,8 @@ err_out:
 	dev_err(led_cdev->dev, "unable to register transient trigger\n");
 	led_cdev->trigger_data = NULL;
 	kfree(tdata);
+
+	return 0;
 }
 
 static void transient_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index d5b4a2b44ab8..de310621b8e7 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -959,7 +959,7 @@ struct kbd_led_trigger {
 	unsigned int mask;
 };
 
-static void kbd_led_trigger_activate(struct led_classdev *cdev)
+static int kbd_led_trigger_activate(struct led_classdev *cdev)
 {
 	struct kbd_led_trigger *trigger =
 		container_of(cdev->trigger, struct kbd_led_trigger, trigger);
@@ -970,6 +970,8 @@ static void kbd_led_trigger_activate(struct led_classdev *cdev)
 				  ledstate & trigger->mask ?
 					LED_FULL : LED_OFF);
 	tasklet_enable(&keyboard_tasklet);
+
+	return 0;
 }
 
 #define KBD_LED_TRIGGER(_led_bit, _name) {			\
diff --git a/drivers/usb/core/ledtrig-usbport.c b/drivers/usb/core/ledtrig-usbport.c
index d775ffea20c3..aa1d51458da7 100644
--- a/drivers/usb/core/ledtrig-usbport.c
+++ b/drivers/usb/core/ledtrig-usbport.c
@@ -298,14 +298,14 @@ static int usbport_trig_notify(struct notifier_block *nb, unsigned long action,
 	return NOTIFY_DONE;
 }
 
-static void usbport_trig_activate(struct led_classdev *led_cdev)
+static int usbport_trig_activate(struct led_classdev *led_cdev)
 {
 	struct usbport_trig_data *usbport_data;
 	int err;
 
 	usbport_data = kzalloc(sizeof(*usbport_data), GFP_KERNEL);
 	if (!usbport_data)
-		return;
+		return 0;
 	usbport_data->led_cdev = led_cdev;
 
 	/* List of ports */
@@ -322,10 +322,11 @@ static void usbport_trig_activate(struct led_classdev *led_cdev)
 	usb_register_notify(&usbport_data->nb);
 
 	led_cdev->activated = true;
-	return;
+	return 0;
 
 err_free:
 	kfree(usbport_data);
+	return 0;
 }
 
 static void usbport_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/include/linux/leds.h b/include/linux/leds.h
index b7e82550e655..ba5baaaa43bf 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -253,7 +253,7 @@ static inline bool led_sysfs_is_disabled(struct led_classdev *led_cdev)
 struct led_trigger {
 	/* Trigger Properties */
 	const char	 *name;
-	void		(*activate)(struct led_classdev *led_cdev);
+	int		(*activate)(struct led_classdev *led_cdev);
 	void		(*deactivate)(struct led_classdev *led_cdev);
 
 	/* LEDs under control by this trigger (for simple triggers) */
@@ -288,8 +288,8 @@ extern void led_trigger_blink_oneshot(struct led_trigger *trigger,
 				      unsigned long *delay_off,
 				      int invert);
 extern void led_trigger_set_default(struct led_classdev *led_cdev);
-extern void led_trigger_set(struct led_classdev *led_cdev,
-			struct led_trigger *trigger);
+extern int led_trigger_set(struct led_classdev *led_cdev,
+			   struct led_trigger *trigger);
 extern void led_trigger_remove(struct led_classdev *led_cdev);
 
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
@@ -334,8 +334,12 @@ static inline void led_trigger_blink_oneshot(struct led_trigger *trigger,
 				      unsigned long *delay_off,
 				      int invert) {}
 static inline void led_trigger_set_default(struct led_classdev *led_cdev) {}
-static inline void led_trigger_set(struct led_classdev *led_cdev,
-				struct led_trigger *trigger) {}
+static inline int led_trigger_set(struct led_classdev *led_cdev,
+				  struct led_trigger *trigger)
+{
+	return 0;
+}
+
 static inline void led_trigger_remove(struct led_classdev *led_cdev) {}
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 {
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
index cb670b5594eb..6d59a5023231 100644
--- a/net/bluetooth/leds.c
+++ b/net/bluetooth/leds.c
@@ -43,7 +43,7 @@ void hci_leds_update_powered(struct hci_dev *hdev, bool enabled)
 	led_trigger_event(bt_power_led_trigger, enabled ? LED_FULL : LED_OFF);
 }
 
-static void power_activate(struct led_classdev *led_cdev)
+static int power_activate(struct led_classdev *led_cdev)
 {
 	struct hci_basic_led_trigger *htrig;
 	bool powered;
@@ -52,10 +52,12 @@ static void power_activate(struct led_classdev *led_cdev)
 	powered = test_bit(HCI_UP, &htrig->hdev->flags);
 
 	led_trigger_event(led_cdev->trigger, powered ? LED_FULL : LED_OFF);
+
+	return 0;
 }
 
 static struct led_trigger *led_allocate_basic(struct hci_dev *hdev,
-			void (*activate)(struct led_classdev *led_cdev),
+			int (*activate)(struct led_classdev *led_cdev),
 			const char *name)
 {
 	struct hci_basic_led_trigger *htrig;
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index ba0b507ea691..d6c66fc19716 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -52,13 +52,15 @@ void ieee80211_free_led_names(struct ieee80211_local *local)
 	kfree(local->radio_led.name);
 }
 
-static void ieee80211_tx_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_tx_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     tx_led);
 
 	atomic_inc(&local->tx_led_active);
+
+	return 0;
 }
 
 static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
@@ -70,13 +72,15 @@ static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->tx_led_active);
 }
 
-static void ieee80211_rx_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_rx_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     rx_led);
 
 	atomic_inc(&local->rx_led_active);
+
+	return 0;
 }
 
 static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
@@ -88,13 +92,15 @@ static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->rx_led_active);
 }
 
-static void ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     assoc_led);
 
 	atomic_inc(&local->assoc_led_active);
+
+	return 0;
 }
 
 static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
@@ -106,13 +112,15 @@ static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->assoc_led_active);
 }
 
-static void ieee80211_radio_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_radio_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     radio_led);
 
 	atomic_inc(&local->radio_led_active);
+
+	return 0;
 }
 
 static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
@@ -124,13 +132,15 @@ static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->radio_led_active);
 }
 
-static void ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     tpt_led);
 
 	atomic_inc(&local->tpt_led_active);
+
+	return 0;
 }
 
 static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev)
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index a7a4e6ff9be2..1355f5ca8d22 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -141,13 +141,15 @@ static void rfkill_led_trigger_event(struct rfkill *rfkill)
 		led_trigger_event(trigger, LED_FULL);
 }
 
-static void rfkill_led_trigger_activate(struct led_classdev *led)
+static int rfkill_led_trigger_activate(struct led_classdev *led)
 {
 	struct rfkill *rfkill;
 
 	rfkill = container_of(led->trigger, struct rfkill, led_trigger);
 
 	rfkill_led_trigger_event(rfkill);
+
+	return 0;
 }
 
 const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
-- 
cgit v1.2.3


From a7e7a3156300a7e1982b03cc9cb8fb0c86434c49 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:22 +0200
Subject: leds: triggers: add device attribute support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As many triggers use device attributes, add support for these in
led_trigger_set which allows simplifying the drivers accordingly.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/led-triggers.c | 12 ++++++++++++
 include/linux/leds.h        | 11 +++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index a8786f4b3453..3f3e8728d82c 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -127,6 +127,7 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 		led_stop_software_blink(led_cdev);
 		if (led_cdev->trigger->deactivate)
 			led_cdev->trigger->deactivate(led_cdev);
+		device_remove_groups(led_cdev->dev, led_cdev->trigger->groups);
 		led_cdev->trigger = NULL;
 		led_set_brightness(led_cdev, LED_OFF);
 	}
@@ -143,6 +144,12 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 
 		if (ret)
 			goto err_activate;
+
+		ret = device_add_groups(led_cdev->dev, trig->groups);
+		if (ret) {
+			dev_err(led_cdev->dev, "Failed to add trigger attributes\n");
+			goto err_add_groups;
+		}
 	}
 
 	if (event) {
@@ -156,7 +163,12 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 
 	return 0;
 
+err_add_groups:
+
+	if (trig->deactivate)
+		trig->deactivate(led_cdev);
 err_activate:
+
 	led_cdev->trigger = NULL;
 	write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
 	list_del(&led_cdev->trig_list);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index ba5baaaa43bf..33484a5c7478 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -262,8 +262,19 @@ struct led_trigger {
 
 	/* Link to next registered trigger */
 	struct list_head  next_trig;
+
+	const struct attribute_group **groups;
 };
 
+/*
+ * Currently the attributes in struct led_trigger::groups are added directly to
+ * the LED device. As this might change in the future, the following
+ * macros abstract getting the LED device and its trigger_data from the dev
+ * parameter passed to the attribute accessor functions.
+ */
+#define led_trigger_get_led(dev)	((struct led_classdev *)dev_get_drvdata((dev)))
+#define led_trigger_get_drvdata(dev)	(led_get_trigger_data(led_trigger_get_led(dev)))
+
 ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr,
 			const char *buf, size_t count);
 ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr,
-- 
cgit v1.2.3


From a0b750768371e410d77b60bcf49c18bd45078d55 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:24 +0200
Subject: leds: triggers: define module_led_trigger helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This helps to simplify modules that provide a simple led_trigger. It's
inspired by module_platform_driver, module_i2c_driver et al.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 include/linux/leds.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 33484a5c7478..a3ee10846a4b 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -326,6 +326,10 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 extern void led_trigger_rename_static(const char *name,
 				      struct led_trigger *trig);
 
+#define module_led_trigger(__led_trigger) \
+	module_driver(__led_trigger, led_trigger_register, \
+		      led_trigger_unregister)
+
 #else
 
 /* Trigger has no members */
-- 
cgit v1.2.3


From 9acc560de2aac73ef99c54f0fdfb86b4684296b5 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:25 +0200
Subject: leds: triggers: new function led_set_trigger_data()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is the natural counter part to the already existing
led_get_trigger_data().

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 include/linux/leds.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index a3ee10846a4b..834683d603f9 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -303,6 +303,12 @@ extern int led_trigger_set(struct led_classdev *led_cdev,
 			   struct led_trigger *trigger);
 extern void led_trigger_remove(struct led_classdev *led_cdev);
 
+static inline void led_set_trigger_data(struct led_classdev *led_cdev,
+					void *trigger_data)
+{
+	led_cdev->trigger_data = trigger_data;
+}
+
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 {
 	return led_cdev->trigger_data;
@@ -356,6 +362,7 @@ static inline int led_trigger_set(struct led_classdev *led_cdev,
 }
 
 static inline void led_trigger_remove(struct led_classdev *led_cdev) {}
+static inline void led_set_trigger_data(struct led_classdev *led_cdev) {}
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 {
 	return NULL;
-- 
cgit v1.2.3


From 576c7218a1546e0153480b208b125509cec71470 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 25 Jun 2018 13:17:41 -0500
Subject: PCI: Export pcie_get_speed_cap and pcie_get_width_cap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So drivers can use them.  This can be used to replace
duplicate code in the drm subsystem.

Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/pci/pci.c   | 2 ++
 include/linux/pci.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 97acba712e4e..22adaf35b136 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5222,6 +5222,7 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
 
 	return PCI_SPEED_UNKNOWN;
 }
+EXPORT_SYMBOL(pcie_get_speed_cap);
 
 /**
  * pcie_get_width_cap - query for the PCI device's link width capability
@@ -5240,6 +5241,7 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev)
 
 	return PCIE_LNK_WIDTH_UNKNOWN;
 }
+EXPORT_SYMBOL(pcie_get_width_cap);
 
 /**
  * pcie_bandwidth_capable - calculate a PCI device's link bandwidth capability
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..6e0c0803b241 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -261,6 +261,9 @@ enum pci_bus_speed {
 	PCI_SPEED_UNKNOWN		= 0xff,
 };
 
+enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev);
+enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
+
 struct pci_cap_saved_data {
 	u16		cap_nr;
 	bool		cap_extended;
-- 
cgit v1.2.3


From 1b0707a781eeaeeaacb464e18bb3f049b99b496b Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 19:50:38 +0000
Subject: Bluetooth: remove unused bt-nokia-h4p.h header

Nothing in tree use this header which seems a remains of a staging
driver.
This patch remove it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/platform_data/bt-nokia-h4p.h | 38 ------------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 include/linux/platform_data/bt-nokia-h4p.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/bt-nokia-h4p.h b/include/linux/platform_data/bt-nokia-h4p.h
deleted file mode 100644
index 30d169dfadf3..000000000000
--- a/include/linux/platform_data/bt-nokia-h4p.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * This file is part of Nokia H4P bluetooth driver
- *
- * Copyright (C) 2010 Nokia Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-
-/**
- * struct hci_h4p_platform data - hci_h4p Platform data structure
- */
-struct hci_h4p_platform_data {
-	int chip_type;
-	int bt_sysclk;
-	unsigned int bt_wakeup_gpio;
-	unsigned int host_wakeup_gpio;
-	unsigned int reset_gpio;
-	int reset_gpio_shared;
-	unsigned int uart_irq;
-	phys_addr_t uart_base;
-	const char *uart_iclk;
-	const char *uart_fclk;
-	void (*set_pm_limits)(struct device *dev, bool set);
-};
-- 
cgit v1.2.3


From 0f725561e168485eff7277d683405c05b192f537 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 7 Jun 2018 09:56:59 -0700
Subject: iommu/vt-d: Add definitions for PFSID

When SRIOV VF device IOTLB is invalidated, we need to provide
the PF source ID such that IOMMU hardware can gauge the depth
of invalidation queue which is shared among VFs. This is needed
when device invalidation throttle (DIT) capability is supported.

This patch adds bit definitions for checking and tracking PFSID.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: "Ashok Raj" <ashok.raj@intel.com>
Cc: "Lu Baolu" <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 1 +
 include/linux/intel-iommu.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 14e4b3722428..7f6194ef48f4 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -422,6 +422,7 @@ struct device_domain_info {
 	struct list_head global; /* link to global list */
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
+	u16 pfsid;		/* SRIOV physical function source ID */
 	u8 pasid_supported:3;
 	u8 pasid_enabled:1;
 	u8 pri_supported:1;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1df940196ab2..3b1c37155572 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -114,6 +114,7 @@
  * Extended Capability Register
  */
 
+#define ecap_dit(e)		((e >> 41) & 0x1)
 #define ecap_pasid(e)		((e >> 40) & 0x1)
 #define ecap_pss(e)		((e >> 35) & 0x1f)
 #define ecap_eafs(e)		((e >> 34) & 0x1)
@@ -283,6 +284,7 @@ enum {
 #define QI_DEV_IOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
 #define QI_DEV_IOTLB_QDEP(qdep)	(((qdep) & 0x1f) << 16)
 #define QI_DEV_IOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
 #define QI_DEV_IOTLB_SIZE	1
 #define QI_DEV_IOTLB_MAX_INVS	32
 
@@ -307,6 +309,7 @@ enum {
 #define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
 #define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 16)
 #define QI_DEV_EIOTLB_QDEP(qd)	((u64)((qd) & 0x1f) << 4)
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
 #define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
-- 
cgit v1.2.3


From 1c48db44924298ad0cb5a6386b88017539be8822 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 7 Jun 2018 09:57:00 -0700
Subject: iommu/vt-d: Fix dev iotlb pfsid use

PFSID should be used in the invalidation descriptor for flushing
device IOTLBs on SRIOV VFs.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: "Ashok Raj" <ashok.raj@intel.com>
Cc: "Lu Baolu" <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c        |  6 +++---
 drivers/iommu/intel-iommu.c | 17 ++++++++++++++++-
 include/linux/intel-iommu.h |  5 ++---
 3 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 75456b5aa825..d9c748b6f9e4 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1339,8 +1339,8 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 	qi_submit_sync(&desc, iommu);
 }
 
-void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
-			u64 addr, unsigned mask)
+void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+			u16 qdep, u64 addr, unsigned mask)
 {
 	struct qi_desc desc;
 
@@ -1355,7 +1355,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 		qdep = 0;
 
 	desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
-		   QI_DIOTLB_TYPE;
+		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
 
 	qi_submit_sync(&desc, iommu);
 }
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7f6194ef48f4..497ef94c5a8c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1475,6 +1475,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 		return;
 
 	pdev = to_pci_dev(info->dev);
+	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
+	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
+	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
+	 * reserved, which should be set to 0.
+	 */
+	if (!ecap_dit(info->iommu->ecap))
+		info->pfsid = 0;
+	else {
+		struct pci_dev *pf_pdev;
+
+		/* pdev will be returned if device is not a vf */
+		pf_pdev = pci_physfn(pdev);
+		info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
+	}
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 	/* The PCIe spec, in its wisdom, declares that the behaviour of
@@ -1540,7 +1554,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 
 		sid = info->bus << 8 | info->devfn;
 		qdep = info->ats_qdep;
-		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
+		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+				qdep, addr, mask);
 	}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3b1c37155572..6692b40ca814 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -455,9 +455,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
 			     u8 fm, u64 type);
 extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 			  unsigned int size_order, u64 type);
-extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
-			       u64 addr, unsigned mask);
-
+extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+			u16 qdep, u64 addr, unsigned mask);
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
-- 
cgit v1.2.3


From bad614b24293ae463e74d2465685f0e4e229baca Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 12 Jun 2018 16:41:21 -0500
Subject: iommu: Enable debugfs exposure of IOMMU driver internals

Provide base enablement for using debugfs to expose internal data of an
IOMMU driver. When called, create the /sys/kernel/debug/iommu directory.

Emit a strong warning at boot time to indicate that this feature is
enabled.

This function is called from iommu_init, and creates the initial DebugFS
directory. Drivers may then call iommu_debugfs_new_driver_dir() to
instantiate a device-specific directory to expose internal data.
It will return a pointer to the new dentry structure created in
/sys/kernel/debug/iommu, or NULL in the event of a failure.

Since the IOMMU driver can not be removed from the running system, there
is no need for an "off" function.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig         | 10 +++++++
 drivers/iommu/Makefile        |  1 +
 drivers/iommu/iommu-debugfs.c | 66 +++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu.c         |  2 ++
 include/linux/iommu.h         |  7 +++++
 5 files changed, 86 insertions(+)
 create mode 100644 drivers/iommu/iommu-debugfs.c

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index e055d228bfb9..ab9181d8af3b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -60,6 +60,16 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
 
 endmenu
 
+config IOMMU_DEBUGFS
+	bool "Export IOMMU internals in DebugFS"
+	depends on DEBUG_FS
+	help
+	  Allows exposure of IOMMU device internals. This option enables
+	  the use of debugfs by IOMMU drivers as required. Devices can,
+	  at initialization time, cause the IOMMU code to create a top-level
+	  debug/iommu directory, and then populate a subdirectory with
+	  entries as required.
+
 config IOMMU_IOVA
 	tristate
 
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 1fb695854809..74cfbc392862 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
 obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
diff --git a/drivers/iommu/iommu-debugfs.c b/drivers/iommu/iommu-debugfs.c
new file mode 100644
index 000000000000..3b1bf88fd1b0
--- /dev/null
+++ b/drivers/iommu/iommu-debugfs.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU debugfs core infrastructure
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/debugfs.h>
+
+struct dentry *iommu_debugfs_dir;
+
+/**
+ * iommu_debugfs_setup - create the top-level iommu directory in debugfs
+ *
+ * Provide base enablement for using debugfs to expose internal data of an
+ * IOMMU driver. When called, this function creates the
+ * /sys/kernel/debug/iommu directory.
+ *
+ * Emit a strong warning at boot time to indicate that this feature is
+ * enabled.
+ *
+ * This function is called from iommu_init; drivers may then call
+ * iommu_debugfs_new_driver_dir() to instantiate a vendor-specific
+ * directory to be used to expose internal data.
+ */
+void iommu_debugfs_setup(void)
+{
+	if (!iommu_debugfs_dir) {
+		iommu_debugfs_dir = debugfs_create_dir("iommu", NULL);
+		pr_warn("\n");
+		pr_warn("*************************************************************\n");
+		pr_warn("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE    **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("**  IOMMU DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL  **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("** This means that this kernel is built to expose internal **\n");
+		pr_warn("** IOMMU data structures, which may compromise security on **\n");
+		pr_warn("** your system.                                            **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("** If you see this message and you are not debugging the   **\n");
+		pr_warn("** kernel, report this immediately to your vendor!         **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE    **\n");
+		pr_warn("*************************************************************\n");
+	}
+}
+
+/**
+ * iommu_debugfs_new_driver_dir - create a vendor directory under debugfs/iommu
+ * @vendor: name of the vendor-specific subdirectory to create
+ *
+ * This function is called by an IOMMU driver to create the top-level debugfs
+ * directory for that driver.
+ *
+ * Return: upon success, a pointer to the dentry for the new directory.
+ *         NULL in case of failure.
+ */
+struct dentry *iommu_debugfs_new_driver_dir(const char *vendor)
+{
+	return debugfs_create_dir(vendor, iommu_debugfs_dir);
+}
+EXPORT_SYMBOL_GPL(iommu_debugfs_new_driver_dir);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 63b37563db7e..d227b864a109 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1748,6 +1748,8 @@ static int __init iommu_init(void)
 					       NULL, kernel_kobj);
 	BUG_ON(!iommu_group_kset);
 
+	iommu_debugfs_setup();
+
 	return 0;
 }
 core_initcall(iommu_init);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 19938ee6eb31..7447b0b0579a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -698,4 +698,11 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 
 #endif /* CONFIG_IOMMU_API */
 
+#ifdef CONFIG_IOMMU_DEBUGFS
+extern	struct dentry *iommu_debugfs_dir;
+void iommu_debugfs_setup(void);
+#else
+static inline void iommu_debugfs_setup(void) {}
+#endif
+
 #endif /* __LINUX_IOMMU_H */
-- 
cgit v1.2.3


From 818b7587b4d34e989ea6c042eeb8d50ffa5be13e Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Wed, 27 Jun 2018 10:31:20 -0500
Subject: x86: irq_remapping: Move irq remapping mode enum

The enum is currently defined in Intel-specific DMAR header file,
but it is also used by APIC common code. Therefore, move it to
a more appropriate interrupt-remapping common header file.
This will also be used by subsequent patches.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/include/asm/irq_remapping.h | 5 +++++
 include/linux/dmar.h                 | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 023b4a9fc846..5f26962eff42 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,6 +33,11 @@ enum irq_remap_cap {
 	IRQ_POSTING_CAP = 0,
 };
 
+enum {
+	IRQ_REMAP_XAPIC_MODE,
+	IRQ_REMAP_X2APIC_MODE,
+};
+
 struct vcpu_data {
 	u64 pi_desc_addr;	/* Physical address of PI Descriptor */
 	u32 vector;		/* Guest vector of the interrupt */
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e2433bc50210..843a41ba7e28 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -265,11 +265,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src)
 #define PDA_LOW_BIT    26
 #define PDA_HIGH_BIT   32
 
-enum {
-	IRQ_REMAP_XAPIC_MODE,
-	IRQ_REMAP_X2APIC_MODE,
-};
-
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
  */
-- 
cgit v1.2.3


From 3ffa6583e24e1ad1abab836d24bfc9d2308074e5 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Mon, 25 Jun 2018 09:51:48 +0200
Subject: power: remove possible deadlock when unregistering power_supply

If a device gets removed right after having registered a power_supply node,
we might enter in a deadlock between the remove call (that has a lock on
the parent device) and the deferred register work.

Allow the deferred register work to exit without taking the lock when
we are in the remove state.

Stack trace on a Ubuntu 16.04:

[16072.109121] INFO: task kworker/u16:2:1180 blocked for more than 120 seconds.
[16072.109127]       Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109129] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109132] kworker/u16:2   D    0  1180      2 0x80000000
[16072.109142] Workqueue: events_power_efficient power_supply_deferred_register_work
[16072.109144] Call Trace:
[16072.109152]  __schedule+0x3d6/0x8b0
[16072.109155]  schedule+0x36/0x80
[16072.109158]  schedule_preempt_disabled+0xe/0x10
[16072.109161]  __mutex_lock.isra.2+0x2ab/0x4e0
[16072.109166]  __mutex_lock_slowpath+0x13/0x20
[16072.109168]  ? __mutex_lock_slowpath+0x13/0x20
[16072.109171]  mutex_lock+0x2f/0x40
[16072.109174]  power_supply_deferred_register_work+0x2b/0x50
[16072.109179]  process_one_work+0x15b/0x410
[16072.109182]  worker_thread+0x4b/0x460
[16072.109186]  kthread+0x10c/0x140
[16072.109189]  ? process_one_work+0x410/0x410
[16072.109191]  ? kthread_create_on_node+0x70/0x70
[16072.109194]  ret_from_fork+0x35/0x40
[16072.109199] INFO: task test:2257 blocked for more than 120 seconds.
[16072.109202]       Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109204] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109206] test            D    0  2257   2256 0x00000004
[16072.109208] Call Trace:
[16072.109211]  __schedule+0x3d6/0x8b0
[16072.109215]  schedule+0x36/0x80
[16072.109218]  schedule_timeout+0x1f3/0x360
[16072.109221]  ? check_preempt_curr+0x5a/0xa0
[16072.109224]  ? ttwu_do_wakeup+0x1e/0x150
[16072.109227]  wait_for_completion+0xb4/0x140
[16072.109230]  ? wait_for_completion+0xb4/0x140
[16072.109233]  ? wake_up_q+0x70/0x70
[16072.109236]  flush_work+0x129/0x1e0
[16072.109240]  ? worker_detach_from_pool+0xb0/0xb0
[16072.109243]  __cancel_work_timer+0x10f/0x190
[16072.109247]  ? device_del+0x264/0x310
[16072.109250]  ? __wake_up+0x44/0x50
[16072.109253]  cancel_delayed_work_sync+0x13/0x20
[16072.109257]  power_supply_unregister+0x37/0xb0
[16072.109260]  devm_power_supply_release+0x11/0x20
[16072.109263]  release_nodes+0x110/0x200
[16072.109266]  devres_release_group+0x7c/0xb0
[16072.109274]  wacom_remove+0xc2/0x110 [wacom]
[16072.109279]  hid_device_remove+0x6e/0xd0 [hid]
[16072.109284]  device_release_driver_internal+0x158/0x210
[16072.109288]  device_release_driver+0x12/0x20
[16072.109291]  bus_remove_device+0xec/0x160
[16072.109293]  device_del+0x1de/0x310
[16072.109298]  hid_destroy_device+0x27/0x60 [hid]
[16072.109303]  usbhid_disconnect+0x51/0x70 [usbhid]
[16072.109308]  usb_unbind_interface+0x77/0x270
[16072.109311]  device_release_driver_internal+0x158/0x210
[16072.109315]  device_release_driver+0x12/0x20
[16072.109318]  usb_driver_release_interface+0x77/0x80
[16072.109321]  proc_ioctl+0x20f/0x250
[16072.109325]  usbdev_do_ioctl+0x57f/0x1140
[16072.109327]  ? __wake_up+0x44/0x50
[16072.109331]  usbdev_ioctl+0xe/0x20
[16072.109336]  do_vfs_ioctl+0xa4/0x600
[16072.109339]  ? vfs_write+0x15a/0x1b0
[16072.109343]  SyS_ioctl+0x79/0x90
[16072.109347]  entry_SYSCALL_64_fastpath+0x24/0xab
[16072.109349] RIP: 0033:0x7f20da807f47
[16072.109351] RSP: 002b:00007ffc422ae398 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[16072.109353] RAX: ffffffffffffffda RBX: 00000000010b8560 RCX: 00007f20da807f47
[16072.109355] RDX: 00007ffc422ae3a0 RSI: 00000000c0105512 RDI: 0000000000000009
[16072.109356] RBP: 0000000000000000 R08: 00007ffc422ae3e0 R09: 0000000000000010
[16072.109357] R10: 00000000000000a6 R11: 0000000000000246 R12: 0000000000000000
[16072.109359] R13: 00000000010b8560 R14: 00007ffc422ae2e0 R15: 0000000000000000

Reported-and-tested-by: Richard Hughes <rhughes@redhat.com>
Tested-by: Aaron Skomra <Aaron.Skomra@wacom.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Fixes: 7f1a57fdd6cb ("power_supply: Fix possible NULL pointer dereference on early uevent")
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/power_supply_core.c | 11 +++++++++--
 include/linux/power_supply.h             |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index d21f478741c1..e85361878450 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/err.h>
@@ -140,8 +141,13 @@ static void power_supply_deferred_register_work(struct work_struct *work)
 	struct power_supply *psy = container_of(work, struct power_supply,
 						deferred_register_work.work);
 
-	if (psy->dev.parent)
-		mutex_lock(&psy->dev.parent->mutex);
+	if (psy->dev.parent) {
+		while (!mutex_trylock(&psy->dev.parent->mutex)) {
+			if (psy->removing)
+				return;
+			msleep(10);
+		}
+	}
 
 	power_supply_changed(psy);
 
@@ -1082,6 +1088,7 @@ EXPORT_SYMBOL_GPL(devm_power_supply_register_no_ws);
 void power_supply_unregister(struct power_supply *psy)
 {
 	WARN_ON(atomic_dec_return(&psy->use_cnt));
+	psy->removing = true;
 	cancel_work_sync(&psy->changed_work);
 	cancel_delayed_work_sync(&psy->deferred_register_work);
 	sysfs_remove_link(&psy->dev.kobj, "powers");
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index b21c4bd96b84..f80769175c56 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -269,6 +269,7 @@ struct power_supply {
 	spinlock_t changed_lock;
 	bool changed;
 	bool initialized;
+	bool removing;
 	atomic_t use_cnt;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
-- 
cgit v1.2.3


From 7efe25a70c37395b41b034335345d6855926c2a6 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 19:44:56 +0000
Subject: iommu/shmobile: Remove unused include/linux/platform_data/sh_ipmmu.h
 header

include/linux/platform_data/sh_ipmmu.h is unused since commit

	ae50dc4874c5 ("iommu/shmobile: Remove unused Renesas IPMMU/IPMMUI")

Let's remove it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/platform_data/sh_ipmmu.h | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 include/linux/platform_data/sh_ipmmu.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/sh_ipmmu.h b/include/linux/platform_data/sh_ipmmu.h
deleted file mode 100644
index 39f7405cdac5..000000000000
--- a/include/linux/platform_data/sh_ipmmu.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* sh_ipmmu.h
- *
- * Copyright (C) 2012  Hideki EIRAKU
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- */
-
-#ifndef __SH_IPMMU_H__
-#define __SH_IPMMU_H__
-
-struct shmobile_ipmmu_platform_data {
-	const char * const *dev_names;
-	unsigned int num_dev_names;
-};
-
-#endif /* __SH_IPMMU_H__ */
-- 
cgit v1.2.3


From 663336ee2628096df0ce2b546b148cb74b5249fe Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 9 May 2018 08:15:46 -0700
Subject: device: Add #define dev_fmt similar to #define pr_fmt

Add a prefixing macro to dev_<level> uses similar to the pr_fmt
prefixing macro used in pr_<level> calls.

This can help avoid some string duplication in dev_<level> uses.

The default, like pr_fmt, is an empty #define dev_fmt(fmt) fmt

Rename the existing dev_<level> functions to _dev_<level> and
introduce #define dev_<level> _dev_<level> macros that use the
new #define dev_fmt

Miscellanea:

o Consistently use #defines with fmt, ... and ##__VA_ARGS__
o Remove unnecessary externs

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    |  12 +++---
 include/linux/device.h | 103 ++++++++++++++++++++++++++++---------------------
 2 files changed, 64 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index df3e1a44707a..ceb8ce90aebb 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3002,12 +3002,12 @@ void func(const struct device *dev, const char *fmt, ...)	\
 }								\
 EXPORT_SYMBOL(func);
 
-define_dev_printk_level(dev_emerg, KERN_EMERG);
-define_dev_printk_level(dev_alert, KERN_ALERT);
-define_dev_printk_level(dev_crit, KERN_CRIT);
-define_dev_printk_level(dev_err, KERN_ERR);
-define_dev_printk_level(dev_warn, KERN_WARNING);
-define_dev_printk_level(dev_notice, KERN_NOTICE);
+define_dev_printk_level(_dev_emerg, KERN_EMERG);
+define_dev_printk_level(_dev_alert, KERN_ALERT);
+define_dev_printk_level(_dev_crit, KERN_CRIT);
+define_dev_printk_level(_dev_err, KERN_ERR);
+define_dev_printk_level(_dev_warn, KERN_WARNING);
+define_dev_printk_level(_dev_notice, KERN_NOTICE);
 define_dev_printk_level(_dev_info, KERN_INFO);
 
 #endif
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..2eaa9ea13c09 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1317,30 +1317,34 @@ struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags);
 void device_link_del(struct device_link *link);
 
+#ifndef dev_fmt
+#define dev_fmt(fmt) fmt
+#endif
+
 #ifdef CONFIG_PRINTK
 
-extern __printf(3, 0)
+__printf(3, 0)
 int dev_vprintk_emit(int level, const struct device *dev,
 		     const char *fmt, va_list args);
-extern __printf(3, 4)
+__printf(3, 4)
 int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...);
 
-extern __printf(3, 4)
+__printf(3, 4)
 void dev_printk(const char *level, const struct device *dev,
 		const char *fmt, ...);
-extern __printf(2, 3)
-void dev_emerg(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_alert(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_crit(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_err(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_warn(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_notice(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
+__printf(2, 3)
+void _dev_emerg(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_alert(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_crit(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_err(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_warn(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_notice(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
 void _dev_info(const struct device *dev, const char *fmt, ...);
 
 #else
@@ -1358,26 +1362,26 @@ static inline void __dev_printk(const char *level, const struct device *dev,
 {}
 static inline __printf(3, 4)
 void dev_printk(const char *level, const struct device *dev,
-		const char *fmt, ...)
+		 const char *fmt, ...)
 {}
 
 static inline __printf(2, 3)
-void dev_emerg(const struct device *dev, const char *fmt, ...)
+void _dev_emerg(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_crit(const struct device *dev, const char *fmt, ...)
+void _dev_crit(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_alert(const struct device *dev, const char *fmt, ...)
+void _dev_alert(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_err(const struct device *dev, const char *fmt, ...)
+void _dev_err(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_warn(const struct device *dev, const char *fmt, ...)
+void _dev_warn(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_notice(const struct device *dev, const char *fmt, ...)
+void _dev_notice(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
 void _dev_info(const struct device *dev, const char *fmt, ...)
@@ -1386,27 +1390,36 @@ void _dev_info(const struct device *dev, const char *fmt, ...)
 #endif
 
 /*
- * Stupid hackaround for existing uses of non-printk uses dev_info
- *
- * Note that the definition of dev_info below is actually _dev_info
- * and a macro is used to avoid redefining dev_info
+ * #defines for all the dev_<level> macros to prefix with whatever
+ * possible use of #define dev_fmt(fmt) ...
  */
 
-#define dev_info(dev, fmt, arg...) _dev_info(dev, fmt, ##arg)
+#define dev_emerg(dev, fmt, ...)					\
+	_dev_emerg(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_crit(dev, fmt, ...)						\
+	_dev_crit(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_alert(dev, fmt, ...)					\
+	_dev_alert(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_err(dev, fmt, ...)						\
+	_dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_warn(dev, fmt, ...)						\
+	_dev_warn(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_notice(dev, fmt, ...)					\
+	_dev_notice(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_info(dev, fmt, ...)						\
+	_dev_info(dev, dev_fmt(fmt), ##__VA_ARGS__)
 
 #if defined(CONFIG_DYNAMIC_DEBUG)
-#define dev_dbg(dev, format, ...)		     \
-do {						     \
-	dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \
-} while (0)
+#define dev_dbg(dev, fmt, ...)						\
+	dynamic_dev_dbg(dev, dev_fmt(fmt), ##__VA_ARGS__)
 #elif defined(DEBUG)
-#define dev_dbg(dev, format, arg...)		\
-	dev_printk(KERN_DEBUG, dev, format, ##arg)
+#define dev_dbg(dev, fmt, ...)						\
+	dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__)
 #else
-#define dev_dbg(dev, format, arg...)				\
-({								\
-	if (0)							\
-		dev_printk(KERN_DEBUG, dev, format, ##arg);	\
+#define dev_dbg(dev, fmt, ...)						\
+({									\
+	if (0)								\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 })
 #endif
 
@@ -1478,7 +1491,7 @@ do {									\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
 	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
 	    __ratelimit(&_rs))						\
-		__dynamic_dev_dbg(&descriptor, dev, fmt,		\
+		__dynamic_dev_dbg(&descriptor, dev, dev_fmt(fmt),	\
 				  ##__VA_ARGS__);			\
 } while (0)
 #elif defined(DEBUG)
@@ -1488,23 +1501,23 @@ do {									\
 				      DEFAULT_RATELIMIT_INTERVAL,	\
 				      DEFAULT_RATELIMIT_BURST);		\
 	if (__ratelimit(&_rs))						\
-		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 } while (0)
 #else
 #define dev_dbg_ratelimited(dev, fmt, ...)				\
 do {									\
 	if (0)								\
-		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 } while (0)
 #endif
 
 #ifdef VERBOSE_DEBUG
 #define dev_vdbg	dev_dbg
 #else
-#define dev_vdbg(dev, format, arg...)				\
-({								\
-	if (0)							\
-		dev_printk(KERN_DEBUG, dev, format, ##arg);	\
+#define dev_vdbg(dev, fmt, ...)						\
+({									\
+	if (0)								\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 })
 #endif
 
-- 
cgit v1.2.3


From 5740c99e9d30b81fcc478797e7215c61e241f44e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 6 Jul 2018 23:57:03 +0200
Subject: vfs: dedupe: return int

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/btrfs/ctree.h   |  4 ++--
 fs/btrfs/ioctl.c   | 10 +++-------
 fs/ocfs2/file.c    | 17 ++++++-----------
 fs/read_write.c    |  4 ++--
 fs/xfs/xfs_file.c  | 19 ++-----------------
 include/linux/fs.h |  2 +-
 6 files changed, 16 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346aceea9..1c7c13334423 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3247,8 +3247,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-			   struct file *dst_file, u64 dst_loff);
+int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+			    struct file *dst_file, u64 dst_loff);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c2837a32d689..94dc8e6c44ce 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3600,13 +3600,12 @@ out_free:
 	return ret;
 }
 
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-				struct file *dst_file, u64 dst_loff)
+int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+			    struct file *dst_file, u64 dst_loff)
 {
 	struct inode *src = file_inode(src_file);
 	struct inode *dst = file_inode(dst_file);
 	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-	ssize_t res;
 
 	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
 		/*
@@ -3617,10 +3616,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 		return -EINVAL;
 	}
 
-	res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
-	if (res)
-		return res;
-	return olen;
+	return btrfs_extent_same(src, loff, olen, dst, dst_loff);
 }
 
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 255f758af03a..f96f018463f7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in,
 					 len, false);
 }
 
-static ssize_t ocfs2_file_dedupe_range(struct file *src_file,
-				       u64 loff,
-				       u64 len,
-				       struct file *dst_file,
-				       u64 dst_loff)
+static int ocfs2_file_dedupe_range(struct file *src_file,
+				   u64 loff,
+				   u64 len,
+				   struct file *dst_file,
+				   u64 dst_loff)
 {
-	int error;
-
-	error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
 					  len, true);
-	if (error)
-		return error;
-	return len;
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/read_write.c b/fs/read_write.c
index f43bb12b4759..fa64e51ef4cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1976,7 +1976,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	u16 count = same->dest_count;
 	struct file *dst_file;
 	loff_t dst_off;
-	ssize_t deduped;
+	int deduped;
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EINVAL;
@@ -2056,7 +2056,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 			else if (deduped < 0)
 				info->status = deduped;
 			else
-				info->bytes_deduped += deduped;
+				info->bytes_deduped += len;
 		}
 
 next_file:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a3e7767a5715..547ef7e8aec1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -933,7 +933,7 @@ xfs_file_clone_range(
 				     len, false);
 }
 
-STATIC ssize_t
+STATIC int
 xfs_file_dedupe_range(
 	struct file	*src_file,
 	u64		loff,
@@ -941,23 +941,8 @@ xfs_file_dedupe_range(
 	struct file	*dst_file,
 	u64		dst_loff)
 {
-	struct inode	*srci = file_inode(src_file);
-	u64		max_dedupe;
-	int		error;
-
-	/*
-	 * Since we have to read all these pages in to compare them, cut
-	 * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
-	 * That means we won't do more than MAX_RW_COUNT IO per request.
-	 */
-	max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
-	if (len > max_dedupe)
-		len = max_dedupe;
-	error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
 				     len, true);
-	if (error)
-		return error;
-	return len;
 }
 
 STATIC int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..b81c4b7e339f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1749,7 +1749,7 @@ struct file_operations {
 			loff_t, size_t, unsigned int);
 	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
-	ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+	int (*dedupe_file_range)(struct file *, u64, u64, struct file *,
 			u64);
 } __randomize_layout;
 
-- 
cgit v1.2.3


From 87eb5eb2423213ac0e7315ce5d275f1ff80e0263 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 6 Jul 2018 23:57:03 +0200
Subject: vfs: dedupe: rationalize args

Clean up f_op->dedupe_file_range() interface.

1) Use loff_t for offsets and length instead of u64
2) Order the arguments the same way as {copy|clone}_file_range().

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/btrfs/ctree.h   |  5 +++--
 fs/btrfs/ioctl.c   |  7 ++++---
 fs/ocfs2/file.c    | 12 ++++++------
 fs/read_write.c    |  4 ++--
 fs/xfs/xfs_file.c  | 12 ++++++------
 include/linux/fs.h |  2 +-
 6 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1c7c13334423..d9d924017dfb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3247,8 +3247,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
-int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-			    struct file *dst_file, u64 dst_loff);
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+			    struct file *dst_file, loff_t dst_loff,
+			    u64 olen);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 94dc8e6c44ce..755c9a306321 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3600,8 +3600,9 @@ out_free:
 	return ret;
 }
 
-int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-			    struct file *dst_file, u64 dst_loff)
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+			    struct file *dst_file, loff_t dst_loff,
+			    u64 olen)
 {
 	struct inode *src = file_inode(src_file);
 	struct inode *dst = file_inode(dst_file);
@@ -3616,7 +3617,7 @@ int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 		return -EINVAL;
 	}
 
-	return btrfs_extent_same(src, loff, olen, dst, dst_loff);
+	return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
 }
 
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f96f018463f7..9fa35cb6f6e0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2537,13 +2537,13 @@ static int ocfs2_file_clone_range(struct file *file_in,
 					 len, false);
 }
 
-static int ocfs2_file_dedupe_range(struct file *src_file,
-				   u64 loff,
-				   u64 len,
-				   struct file *dst_file,
-				   u64 dst_loff)
+static int ocfs2_file_dedupe_range(struct file *file_in,
+				   loff_t pos_in,
+				   struct file *file_out,
+				   loff_t pos_out,
+				   u64 len)
 {
-	return ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 					  len, true);
 }
 
diff --git a/fs/read_write.c b/fs/read_write.c
index fa64e51ef4cf..c31794f92c2c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2049,8 +2049,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 			info->status = -EINVAL;
 		} else {
 			deduped = dst_file->f_op->dedupe_file_range(file, off,
-							len, dst_file,
-							info->dest_offset);
+							dst_file,
+							info->dest_offset, len);
 			if (deduped == -EBADE)
 				info->status = FILE_DEDUPE_RANGE_DIFFERS;
 			else if (deduped < 0)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 547ef7e8aec1..0f40ba54d83f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -935,13 +935,13 @@ xfs_file_clone_range(
 
 STATIC int
 xfs_file_dedupe_range(
-	struct file	*src_file,
-	u64		loff,
-	u64		len,
-	struct file	*dst_file,
-	u64		dst_loff)
+	struct file	*file_in,
+	loff_t		pos_in,
+	struct file	*file_out,
+	loff_t		pos_out,
+	u64		len)
 {
-	return xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 				     len, true);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b81c4b7e339f..a8fee2f44981 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1749,7 +1749,7 @@ struct file_operations {
 			loff_t, size_t, unsigned int);
 	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
-	int (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
 } __randomize_layout;
 
-- 
cgit v1.2.3


From 06ae48269d1e0324d806fca30fe77112f4a4a14a Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Fri, 6 Jul 2018 15:13:18 -0700
Subject: lib: reciprocal_div: implement the improved algorithm on the paper
 mentioned

The new added "reciprocal_value_adv" implements the advanced version of the
algorithm described in Figure 4.2 of the paper except when
"divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then
requires u128 divide on host. The exception case could be easily handled
before calling "reciprocal_value_adv".

The advanced version requires more complex calculation to get the
reciprocal multiplier and other control variables, but then could reduce
the required emulation operations.

It makes no sense to use this advanced version for host divide emulation,
those extra complexities for calculating multiplier etc could completely
waive our saving on emulation operations.

However, it makes sense to use it for JIT divide code generation (for
example eBPF JIT backends) for which we are willing to trade performance of
JITed code with that of host. As shown by the following pseudo code, the
required emulation operations could go down from 6 (the basic version) to 3
or 4.

To use the result of "reciprocal_value_adv", suppose we want to calculate
n/d, the C-style pseudo code will be the following, it could be easily
changed to real code generation for other JIT targets.

  struct reciprocal_value_adv rvalue;
  u8 pre_shift, exp;

  // handle exception case.
  if (d >= (1U << 31)) {
    result = n >= d;
    return;
  }
  rvalue = reciprocal_value_adv(d, 32)
  exp = rvalue.exp;
  if (rvalue.is_wide_m && !(d & 1)) {
    // floor(log2(d & (2^32 -d)))
    pre_shift = fls(d & -d) - 1;
    rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
  } else {
    pre_shift = 0;
  }

  // code generation starts.
  if (imm == 1U << exp) {
    result = n >> exp;
  } else if (rvalue.is_wide_m) {
    // pre_shift must be zero when reached here.
    t = (n * rvalue.m) >> 32;
    result = n - t;
    result >>= 1;
    result += t;
    result >>= rvalue.sh - 1;
  } else {
    if (pre_shift)
      result = n >> pre_shift;
    result = ((u64)result * rvalue.m) >> 32;
    result >>= rvalue.sh;
  }

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/reciprocal_div.h | 68 ++++++++++++++++++++++++++++++++++++++++++
 lib/reciprocal_div.c           | 41 +++++++++++++++++++++++++
 2 files changed, 109 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h
index e031e9f2f9d8..585ce89c0f33 100644
--- a/include/linux/reciprocal_div.h
+++ b/include/linux/reciprocal_div.h
@@ -25,6 +25,9 @@ struct reciprocal_value {
 	u8 sh1, sh2;
 };
 
+/* "reciprocal_value" and "reciprocal_divide" together implement the basic
+ * version of the algorithm described in Figure 4.1 of the paper.
+ */
 struct reciprocal_value reciprocal_value(u32 d);
 
 static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
@@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
 	return (t + ((a - t) >> R.sh1)) >> R.sh2;
 }
 
+struct reciprocal_value_adv {
+	u32 m;
+	u8 sh, exp;
+	bool is_wide_m;
+};
+
+/* "reciprocal_value_adv" implements the advanced version of the algorithm
+ * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
+ * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
+ * exception case could be easily handled before calling "reciprocal_value_adv".
+ *
+ * The advanced version requires more complex calculation to get the reciprocal
+ * multiplier and other control variables, but then could reduce the required
+ * emulation operations.
+ *
+ * It makes no sense to use this advanced version for host divide emulation,
+ * those extra complexities for calculating multiplier etc could completely
+ * waive our saving on emulation operations.
+ *
+ * However, it makes sense to use it for JIT divide code generation for which
+ * we are willing to trade performance of JITed code with that of host. As shown
+ * by the following pseudo code, the required emulation operations could go down
+ * from 6 (the basic version) to 3 or 4.
+ *
+ * To use the result of "reciprocal_value_adv", suppose we want to calculate
+ * n/d, the pseudo C code will be:
+ *
+ *   struct reciprocal_value_adv rvalue;
+ *   u8 pre_shift, exp;
+ *
+ *   // handle exception case.
+ *   if (d >= (1U << 31)) {
+ *     result = n >= d;
+ *     return;
+ *   }
+ *
+ *   rvalue = reciprocal_value_adv(d, 32)
+ *   exp = rvalue.exp;
+ *   if (rvalue.is_wide_m && !(d & 1)) {
+ *     // floor(log2(d & (2^32 -d)))
+ *     pre_shift = fls(d & -d) - 1;
+ *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
+ *   } else {
+ *     pre_shift = 0;
+ *   }
+ *
+ *   // code generation starts.
+ *   if (imm == 1U << exp) {
+ *     result = n >> exp;
+ *   } else if (rvalue.is_wide_m) {
+ *     // pre_shift must be zero when reached here.
+ *     t = (n * rvalue.m) >> 32;
+ *     result = n - t;
+ *     result >>= 1;
+ *     result += t;
+ *     result >>= rvalue.sh - 1;
+ *   } else {
+ *     if (pre_shift)
+ *       result = n >> pre_shift;
+ *     result = ((u64)result * rvalue.m) >> 32;
+ *     result >>= rvalue.sh;
+ *   }
+ */
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
+
 #endif /* _LINUX_RECIPROCAL_DIV_H */
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index fcb4ce682c6f..bf043258fa00 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
@@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d)
 	return R;
 }
 EXPORT_SYMBOL(reciprocal_value);
+
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
+{
+	struct reciprocal_value_adv R;
+	u32 l, post_shift;
+	u64 mhigh, mlow;
+
+	/* ceil(log2(d)) */
+	l = fls(d - 1);
+	/* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to
+	 * be handled before calling "reciprocal_value_adv", please see the
+	 * comment at include/linux/reciprocal_div.h.
+	 */
+	WARN(l == 32,
+	     "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor",
+	     d, __func__);
+	post_shift = l;
+	mlow = 1ULL << (32 + l);
+	do_div(mlow, d);
+	mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
+	do_div(mhigh, d);
+
+	for (; post_shift > 0; post_shift--) {
+		u64 lo = mlow >> 1, hi = mhigh >> 1;
+
+		if (lo >= hi)
+			break;
+
+		mlow = lo;
+		mhigh = hi;
+	}
+
+	R.m = (u32)mhigh;
+	R.sh = post_shift;
+	R.exp = l;
+	R.is_wide_m = mhigh > U32_MAX;
+
+	return R;
+}
+EXPORT_SYMBOL(reciprocal_value_adv);
-- 
cgit v1.2.3


From 000244d3dc1f8114e38fe9ee2d9a0986404d9cbe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Jul 2018 14:44:02 +0200
Subject: net: bridge: fix br_vlan_get_{pvid,info} return values

These two functions return the regular -EINVAL failure in the normal
code path, but return a nonstandard '-1' error otherwise, which gets
interpreted as -EPERM.

Let's change it to -EINVAL for the dummy functions as well.

Fixes: 4d4fd36126d6 ("net: bridge: Publish bridge accessor functions")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 7843b98e1c6e..c20c7e197d07 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -105,13 +105,13 @@ static inline bool br_vlan_enabled(const struct net_device *dev)
 
 static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
 {
-	return -1;
+	return -EINVAL;
 }
 
 static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
 				   struct bridge_vlan_info *p_vinfo)
 {
-	return -1;
+	return -EINVAL;
 }
 #endif
 
-- 
cgit v1.2.3


From 543af5861f41af0a5d2432f6fb5976af50f9cee5 Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Fri, 6 Jul 2018 22:05:38 -0400
Subject: uio: change to use the mutex lock instead of the spin lock

We are hitting a regression with the following commit:

commit a93e7b331568227500186a465fee3c2cb5dffd1f
Author: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Date:   Mon May 14 13:32:23 2018 +1200

    uio: Prevent device destruction while fds are open

The problem is the addition of spin_lock_irqsave in uio_write. This
leads to hitting  uio_write -> copy_from_user -> _copy_from_user ->
might_fault and the logs filling up with sleeping warnings.

I also noticed some uio drivers allocate memory, sleep, grab mutexes
from callouts like open() and release and uio is now doing
spin_lock_irqsave while calling them.

Reported-by: Mike Christie <mchristi@redhat.com>
CC: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Reviewed-by: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c          | 32 +++++++++++++-------------------
 include/linux/uio_driver.h |  2 +-
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index b4b2ae1e0473..655ade4fb3b1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -433,7 +433,6 @@ static int uio_open(struct inode *inode, struct file *filep)
 	struct uio_device *idev;
 	struct uio_listener *listener;
 	int ret = 0;
-	unsigned long flags;
 
 	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
@@ -460,10 +459,10 @@ static int uio_open(struct inode *inode, struct file *filep)
 	listener->event_count = atomic_read(&idev->event);
 	filep->private_data = listener;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (idev->info && idev->info->open)
 		ret = idev->info->open(idev->info, inode);
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 	if (ret)
 		goto err_infoopen;
 
@@ -495,12 +494,11 @@ static int uio_release(struct inode *inode, struct file *filep)
 	int ret = 0;
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (idev->info && idev->info->release)
 		ret = idev->info->release(idev->info, inode);
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	module_put(idev->owner);
 	kfree(listener);
@@ -513,12 +511,11 @@ static __poll_t uio_poll(struct file *filep, poll_table *wait)
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
 	__poll_t ret = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq)
 		ret = -EIO;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	if (ret)
 		return ret;
@@ -537,12 +534,11 @@ static ssize_t uio_read(struct file *filep, char __user *buf,
 	DECLARE_WAITQUEUE(wait, current);
 	ssize_t retval = 0;
 	s32 event_count;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq)
 		retval = -EIO;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	if (retval)
 		return retval;
@@ -592,9 +588,8 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	struct uio_device *idev = listener->dev;
 	ssize_t retval;
 	s32 irq_on;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq) {
 		retval = -EIO;
 		goto out;
@@ -618,7 +613,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	retval = idev->info->irqcontrol(idev->info, irq_on);
 
 out:
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 	return retval ? retval : sizeof(s32);
 }
 
@@ -865,7 +860,7 @@ int __uio_register_device(struct module *owner,
 
 	idev->owner = owner;
 	idev->info = info;
-	spin_lock_init(&idev->info_lock);
+	mutex_init(&idev->info_lock);
 	init_waitqueue_head(&idev->wait);
 	atomic_set(&idev->event, 0);
 
@@ -929,7 +924,6 @@ EXPORT_SYMBOL_GPL(__uio_register_device);
 void uio_unregister_device(struct uio_info *info)
 {
 	struct uio_device *idev;
-	unsigned long flags;
 
 	if (!info || !info->uio_dev)
 		return;
@@ -943,9 +937,9 @@ void uio_unregister_device(struct uio_info *info)
 	if (info->irq && info->irq != UIO_IRQ_CUSTOM)
 		free_irq(info->irq, idev);
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	idev->info = NULL;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	device_unregister(&idev->dev);
 
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6c5f2074e14f..6f8b68cd460f 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -75,7 +75,7 @@ struct uio_device {
         struct fasync_struct    *async_queue;
         wait_queue_head_t       wait;
         struct uio_info         *info;
-	spinlock_t		info_lock;
+	struct mutex		info_lock;
         struct kobject          *map_dir;
         struct kobject          *portio_dir;
 };
-- 
cgit v1.2.3


From e0772de8a48b69d39624cdf99fac8f4a3fcc387b Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 19 Jun 2018 17:12:57 +0100
Subject: slimbus: core: add of_slim_device_get() helper

On SLIMBus controllers like Qcom NGD(non ported device), controller
can request logical address once the remote side is powered, having a
helper function like this to explicitly enumerate the bus is helpful.
Also codec drivers which are taking to interface device would need
such a helper too.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/core.c  | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/slimbus.h |  2 ++
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c
index 7ddfc675b131..88248a4ecad9 100644
--- a/drivers/slimbus/core.c
+++ b/drivers/slimbus/core.c
@@ -356,6 +356,45 @@ struct slim_device *slim_get_device(struct slim_controller *ctrl,
 }
 EXPORT_SYMBOL_GPL(slim_get_device);
 
+static int of_slim_match_dev(struct device *dev, void *data)
+{
+	struct device_node *np = data;
+	struct slim_device *sbdev = to_slim_device(dev);
+
+	return (sbdev->dev.of_node == np);
+}
+
+static struct slim_device *of_find_slim_device(struct slim_controller *ctrl,
+					       struct device_node *np)
+{
+	struct slim_device *sbdev;
+	struct device *dev;
+
+	dev = device_find_child(ctrl->dev, np, of_slim_match_dev);
+	if (dev) {
+		sbdev = to_slim_device(dev);
+		return sbdev;
+	}
+
+	return NULL;
+}
+
+/**
+ * of_slim_get_device() - get handle to a device using dt node.
+ *
+ * @ctrl: Controller on which this device will be added/queried
+ * @np: node pointer to device
+ *
+ * Return: pointer to a device if it has already reported. Creates a new
+ * device and returns pointer to it if the device has not yet enumerated.
+ */
+struct slim_device *of_slim_get_device(struct slim_controller *ctrl,
+				       struct device_node *np)
+{
+	return of_find_slim_device(ctrl, np);
+}
+EXPORT_SYMBOL_GPL(of_slim_get_device);
+
 static int slim_device_alloc_laddr(struct slim_device *sbdev,
 				   bool report_present)
 {
diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index c36cf121d2cd..efa36a852dc3 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -138,6 +138,8 @@ static inline void slim_set_devicedata(struct slim_device *dev, void *data)
 	dev_set_drvdata(&dev->dev, data);
 }
 
+struct slim_device *of_slim_get_device(struct slim_controller *ctrl,
+				       struct device_node *np);
 struct slim_device *slim_get_device(struct slim_controller *ctrl,
 				    struct slim_eaddr *e_addr);
 int slim_get_logical_addr(struct slim_device *sbdev);
-- 
cgit v1.2.3


From db455d66b9900a8a81fe474f11c78c8c8a95eefa Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 19 Jun 2018 17:12:58 +0100
Subject: slimbus: core: rearrange slim_eaddr structure

Rearrange struct slim_eaddr so that the structure is packed correctly
to be able to send in SLIMBus messages.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/slimbus.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index efa36a852dc3..63801bcc5e60 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -14,16 +14,16 @@ extern struct bus_type slimbus_bus;
 
 /**
  * struct slim_eaddr - Enumeration address for a SLIMbus device
- * @manf_id: Manufacturer Id for the device
- * @prod_code: Product code
- * @dev_index: Device index
  * @instance: Instance value
+ * @dev_index: Device index
+ * @prod_code: Product code
+ * @manf_id: Manufacturer Id for the device
  */
 struct slim_eaddr {
-	u16 manf_id;
-	u16 prod_code;
-	u8 dev_index;
 	u8 instance;
+	u8 dev_index;
+	u16 prod_code;
+	u16 manf_id;
 } __packed;
 
 /**
-- 
cgit v1.2.3


From abb9c9b8b51ba53b47ad7685ad2a0a64dbbf7bf5 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Thu, 5 Jul 2018 14:54:25 +0100
Subject: slimbus: stream: add stream support

This patch adds support to SLIMbus stream apis for slimbus device.
SLIMbus streaming involves adding support to Data Channel Management and
channel Reconfiguration Messages to slim core plus few stream apis.
>From slim device side the apis are very simple mostly inline with other
stream apis.

Currently it only supports Isochronous and Push/Pull transport protocols,
which are sufficient for audio use cases.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/slimbus.rst |   5 +
 drivers/slimbus/Makefile             |   2 +-
 drivers/slimbus/core.c               |   2 +
 drivers/slimbus/slimbus.h            | 189 ++++++++++++++
 drivers/slimbus/stream.c             | 477 +++++++++++++++++++++++++++++++++++
 include/linux/slimbus.h              |  46 ++++
 6 files changed, 720 insertions(+), 1 deletion(-)
 create mode 100644 drivers/slimbus/stream.c

(limited to 'include/linux')

diff --git a/Documentation/driver-api/slimbus.rst b/Documentation/driver-api/slimbus.rst
index a97449cf603a..410eec79b2a1 100644
--- a/Documentation/driver-api/slimbus.rst
+++ b/Documentation/driver-api/slimbus.rst
@@ -125,3 +125,8 @@ Messaging APIs:
 ~~~~~~~~~~~~~~~
 .. kernel-doc:: drivers/slimbus/messaging.c
    :export:
+
+Streaming APIs:
+~~~~~~~~~~~~~~~
+.. kernel-doc:: drivers/slimbus/stream.c
+   :export:
diff --git a/drivers/slimbus/Makefile b/drivers/slimbus/Makefile
index c78c6e16c2df..d9aa011b6804 100644
--- a/drivers/slimbus/Makefile
+++ b/drivers/slimbus/Makefile
@@ -3,7 +3,7 @@
 # Makefile for kernel SLIMbus framework.
 #
 obj-$(CONFIG_SLIMBUS)			+= slimbus.o
-slimbus-y				:= core.o messaging.o sched.o
+slimbus-y				:= core.o messaging.o sched.o stream.o
 
 #Controllers
 obj-$(CONFIG_SLIM_QCOM_CTRL)		+= slim-qcom-ctrl.o
diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c
index 88248a4ecad9..95b00d28ad6e 100644
--- a/drivers/slimbus/core.c
+++ b/drivers/slimbus/core.c
@@ -114,6 +114,8 @@ static int slim_add_device(struct slim_controller *ctrl,
 	sbdev->dev.release = slim_dev_release;
 	sbdev->dev.driver = NULL;
 	sbdev->ctrl = ctrl;
+	INIT_LIST_HEAD(&sbdev->stream_list);
+	spin_lock_init(&sbdev->stream_list_lock);
 
 	if (node)
 		sbdev->dev.of_node = of_node_get(node);
diff --git a/drivers/slimbus/slimbus.h b/drivers/slimbus/slimbus.h
index 63229e8cd050..6dbc2b320704 100644
--- a/drivers/slimbus/slimbus.h
+++ b/drivers/slimbus/slimbus.h
@@ -45,9 +45,20 @@
 #define SLIM_MSG_MC_ASSIGN_LOGICAL_ADDRESS       0x2
 #define SLIM_MSG_MC_REPORT_ABSENT                0xF
 
+/* Data channel management messages */
+#define SLIM_MSG_MC_CONNECT_SOURCE		0x10
+#define SLIM_MSG_MC_CONNECT_SINK		0x11
+#define SLIM_MSG_MC_DISCONNECT_PORT		0x14
+#define SLIM_MSG_MC_CHANGE_CONTENT		0x18
+
 /* Clock pause Reconfiguration messages */
 #define SLIM_MSG_MC_BEGIN_RECONFIGURATION        0x40
 #define SLIM_MSG_MC_NEXT_PAUSE_CLOCK             0x4A
+#define SLIM_MSG_MC_NEXT_DEFINE_CHANNEL          0x50
+#define SLIM_MSG_MC_NEXT_DEFINE_CONTENT          0x51
+#define SLIM_MSG_MC_NEXT_ACTIVATE_CHANNEL        0x54
+#define SLIM_MSG_MC_NEXT_DEACTIVATE_CHANNEL      0x55
+#define SLIM_MSG_MC_NEXT_REMOVE_CHANNEL          0x58
 #define SLIM_MSG_MC_RECONFIGURE_NOW              0x5F
 
 /*
@@ -69,7 +80,15 @@
 /* Standard values per SLIMbus spec needed by controllers and devices */
 #define SLIM_MAX_CLK_GEAR		10
 #define SLIM_MIN_CLK_GEAR		1
+#define SLIM_SLOT_LEN_BITS		4
+
+/* Indicate that the frequency of the flow and the bus frequency are locked */
+#define SLIM_CHANNEL_CONTENT_FL		BIT(7)
 
+/* Standard values per SLIMbus spec needed by controllers and devices */
+#define SLIM_CL_PER_SUPERFRAME		6144
+#define SLIM_SLOTS_PER_SUPERFRAME	(SLIM_CL_PER_SUPERFRAME >> 2)
+#define SLIM_SL_PER_SUPERFRAME		(SLIM_CL_PER_SUPERFRAME >> 2)
 /* Manager's logical address is set to 0xFF per spec */
 #define SLIM_LA_MANAGER 0xFF
 
@@ -167,6 +186,170 @@ struct slim_sched {
 	struct mutex		m_reconf;
 };
 
+/**
+ * enum slim_port_direction: SLIMbus port direction
+ *
+ * @SLIM_PORT_SINK: SLIMbus port is a sink
+ * @SLIM_PORT_SOURCE: SLIMbus port is a source
+ */
+enum slim_port_direction {
+	SLIM_PORT_SINK = 0,
+	SLIM_PORT_SOURCE,
+};
+/**
+ * enum slim_port_state: SLIMbus Port/Endpoint state machine
+ *	according to SLIMbus Spec 2.0
+ * @SLIM_PORT_DISCONNECTED: SLIMbus port is disconnected
+ *	entered from Unconfigure/configured state after
+ *	DISCONNECT_PORT or REMOVE_CHANNEL core command
+ * @SLIM_PORT_UNCONFIGURED: SLIMbus port is in unconfigured state.
+ *	entered from disconnect state after CONNECT_SOURCE/SINK core command
+ * @SLIM_PORT_CONFIGURED: SLIMbus port is in configured state.
+ *	entered from unconfigured state after DEFINE_CHANNEL, DEFINE_CONTENT
+ *	and ACTIVATE_CHANNEL core commands. Ready for data transmission.
+ */
+enum slim_port_state {
+	SLIM_PORT_DISCONNECTED = 0,
+	SLIM_PORT_UNCONFIGURED,
+	SLIM_PORT_CONFIGURED,
+};
+
+/**
+ * enum slim_channel_state: SLIMbus channel state machine used by core.
+ * @SLIM_CH_STATE_DISCONNECTED: SLIMbus channel is disconnected
+ * @SLIM_CH_STATE_ALLOCATED: SLIMbus channel is allocated
+ * @SLIM_CH_STATE_ASSOCIATED: SLIMbus channel is associated with port
+ * @SLIM_CH_STATE_DEFINED: SLIMbus channel parameters are defined
+ * @SLIM_CH_STATE_CONTENT_DEFINED: SLIMbus channel content is defined
+ * @SLIM_CH_STATE_ACTIVE: SLIMbus channel is active and ready for data
+ * @SLIM_CH_STATE_REMOVED: SLIMbus channel is inactive and removed
+ */
+enum slim_channel_state {
+	SLIM_CH_STATE_DISCONNECTED = 0,
+	SLIM_CH_STATE_ALLOCATED,
+	SLIM_CH_STATE_ASSOCIATED,
+	SLIM_CH_STATE_DEFINED,
+	SLIM_CH_STATE_CONTENT_DEFINED,
+	SLIM_CH_STATE_ACTIVE,
+	SLIM_CH_STATE_REMOVED,
+};
+
+/**
+ * enum slim_ch_data_fmt: SLIMbus channel data Type identifiers according to
+ *	Table 60 of SLIMbus Spec 1.01.01
+ * @SLIM_CH_DATA_FMT_NOT_DEFINED: Undefined
+ * @SLIM_CH_DATA_FMT_LPCM_AUDIO: LPCM audio
+ * @SLIM_CH_DATA_FMT_IEC61937_COMP_AUDIO: IEC61937 Compressed audio
+ * @SLIM_CH_DATA_FMT_PACKED_PDM_AUDIO: Packed PDM audio
+ */
+enum slim_ch_data_fmt {
+	SLIM_CH_DATA_FMT_NOT_DEFINED = 0,
+	SLIM_CH_DATA_FMT_LPCM_AUDIO = 1,
+	SLIM_CH_DATA_FMT_IEC61937_COMP_AUDIO = 2,
+	SLIM_CH_DATA_FMT_PACKED_PDM_AUDIO = 3,
+};
+
+/**
+ * enum slim_ch_aux_fmt: SLIMbus channel Aux Field format IDs according to
+ *	Table 63 of SLIMbus Spec 2.0
+ * @SLIM_CH_AUX_FMT_NOT_APPLICABLE: Undefined
+ * @SLIM_CH_AUX_FMT_ZCUV_TUNNEL_IEC60958: ZCUV for tunneling IEC60958
+ * @SLIM_CH_AUX_FMT_USER_DEFINED: User defined
+ */
+enum slim_ch_aux_bit_fmt {
+	SLIM_CH_AUX_FMT_NOT_APPLICABLE = 0,
+	SLIM_CH_AUX_FMT_ZCUV_TUNNEL_IEC60958 = 1,
+	SLIM_CH_AUX_FMT_USER_DEFINED = 0xF,
+};
+
+/**
+ * struct slim_channel  - SLIMbus channel, used for state machine
+ *
+ * @id: ID of channel
+ * @prrate: Presense rate of channel from Table 66 of SLIMbus 2.0 Specs
+ * @seg_dist: segment distribution code from Table 20 of SLIMbus 2.0 Specs
+ * @data_fmt: Data format of channel.
+ * @aux_fmt: Aux format for this channel.
+ * @state: channel state machine
+ */
+struct slim_channel {
+	int id;
+	int prrate;
+	int seg_dist;
+	enum slim_ch_data_fmt data_fmt;
+	enum slim_ch_aux_bit_fmt aux_fmt;
+	enum slim_channel_state state;
+};
+
+/**
+ * struct slim_port  - SLIMbus port
+ *
+ * @id: Port id
+ * @direction: Port direction, Source or Sink.
+ * @state: state machine of port.
+ * @ch: channel associated with this port.
+ */
+struct slim_port {
+	int id;
+	enum slim_port_direction direction;
+	enum slim_port_state state;
+	struct slim_channel ch;
+};
+
+/**
+ * enum slim_transport_protocol: SLIMbus Transport protocol list from
+ *	Table 47 of SLIMbus 2.0 specs.
+ * @SLIM_PROTO_ISO: Isochronous Protocol, no flow control as data rate match
+ *		channel rate flow control embedded in the data.
+ * @SLIM_PROTO_PUSH: Pushed Protocol, includes flow control, Used to carry
+ *		data whose rate	is equal to, or lower than the channel rate.
+ * @SLIM_PROTO_PULL: Pulled Protocol, similar usage as pushed protocol
+ *		but pull is a unicast.
+ * @SLIM_PROTO_LOCKED: Locked Protocol
+ * @SLIM_PROTO_ASYNC_SMPLX: Asynchronous Protocol-Simplex
+ * @SLIM_PROTO_ASYNC_HALF_DUP: Asynchronous Protocol-Half-duplex
+ * @SLIM_PROTO_EXT_SMPLX: Extended Asynchronous Protocol-Simplex
+ * @SLIM_PROTO_EXT_HALF_DUP: Extended Asynchronous Protocol-Half-duplex
+ */
+enum slim_transport_protocol {
+	SLIM_PROTO_ISO = 0,
+	SLIM_PROTO_PUSH,
+	SLIM_PROTO_PULL,
+	SLIM_PROTO_LOCKED,
+	SLIM_PROTO_ASYNC_SMPLX,
+	SLIM_PROTO_ASYNC_HALF_DUP,
+	SLIM_PROTO_EXT_SMPLX,
+	SLIM_PROTO_EXT_HALF_DUP,
+};
+
+/**
+ * struct slim_stream_runtime  - SLIMbus stream runtime instance
+ *
+ * @dev: Name of the stream
+ * @dev: SLIM Device instance associated with this stream
+ * @state: state of stream
+ * @direction: direction of stream
+ * @prot: Transport protocol used in this stream
+ * @rate: Data rate of samples *
+ * @bps: bits per sample
+ * @ratem: rate multipler which is super frame rate/data rate
+ * @num_ports: number of ports
+ * @ports: pointer to instance of ports
+ * @node: list head for stream associated with slim device.
+ */
+struct slim_stream_runtime {
+	const char *name;
+	struct slim_device *dev;
+	int direction;
+	enum slim_transport_protocol prot;
+	unsigned int rate;
+	unsigned int bps;
+	unsigned int ratem;
+	int num_ports;
+	struct slim_port *ports;
+	struct list_head node;
+};
+
 /**
  * struct slim_controller  - Controls every instance of SLIMbus
  *				(similar to 'master' on SPI)
@@ -196,6 +379,10 @@ struct slim_sched {
  * @wakeup: This function pointer implements controller-specific procedure
  *	to wake it up from clock-pause. Framework will call this to bring
  *	the controller out of clock pause.
+ * @enable_stream: This function pointer implements controller-specific procedure
+ *	to enable a stream.
+ * @disable_stream: This function pointer implements controller-specific procedure
+ *	to disable stream.
  *
  *	'Manager device' is responsible for  device management, bandwidth
  *	allocation, channel setup, and port associations per channel.
@@ -237,6 +424,8 @@ struct slim_controller {
 					     struct slim_eaddr *ea, u8 laddr);
 	int			(*get_laddr)(struct slim_controller *ctrl,
 					     struct slim_eaddr *ea, u8 *laddr);
+	int		(*enable_stream)(struct slim_stream_runtime *rt);
+	int		(*disable_stream)(struct slim_stream_runtime *rt);
 	int			(*wakeup)(struct slim_controller *ctrl);
 };
 
diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c
new file mode 100644
index 000000000000..2fa05324ed07
--- /dev/null
+++ b/drivers/slimbus/stream.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/slimbus.h>
+#include <uapi/sound/asound.h>
+#include "slimbus.h"
+
+/**
+ * struct segdist_code - Segment Distributions code from
+ *	Table 20 of SLIMbus Specs Version 2.0
+ *
+ * @ratem: Channel Rate Multipler(Segments per Superframe)
+ * @seg_interval: Number of slots between the first Slot of Segment
+ *		and the first slot of the next  consecutive Segment.
+ * @segdist_code: Segment Distribution Code SD[11:0]
+ * @seg_offset_mask: Segment offset mask in SD[11:0]
+ * @segdist_codes: List of all possible Segmet Distribution codes.
+ */
+static const struct segdist_code {
+	int ratem;
+	int seg_interval;
+	int segdist_code;
+	u32 seg_offset_mask;
+
+} segdist_codes[] = {
+	{1,	1536,	0x200,	 0xdff},
+	{2,	768,	0x100,	 0xcff},
+	{4,	384,	0x080,	 0xc7f},
+	{8,	192,	0x040,	 0xc3f},
+	{16,	96,	0x020,	 0xc1f},
+	{32,	48,	0x010,	 0xc0f},
+	{64,	24,	0x008,	 0xc07},
+	{128,	12,	0x004,	 0xc03},
+	{256,	6,	0x002,	 0xc01},
+	{512,	3,	0x001,	 0xc00},
+	{3,	512,	0xe00,	 0x1ff},
+	{6,	256,	0xd00,	 0x0ff},
+	{12,	128,	0xc80,	 0x07f},
+	{24,	64,	0xc40,	 0x03f},
+	{48,	32,	0xc20,	 0x01f},
+	{96,	16,	0xc10,	 0x00f},
+	{192,	8,	0xc08,	 0x007},
+	{364,	4,	0xc04,	 0x003},
+	{768,	2,	0xc02,	 0x001},
+};
+
+/*
+ * Presence Rate table for all Natural Frequencies
+ * The Presence rate of a constant bitrate stream is mean flow rate of the
+ * stream expressed in occupied Segments of that Data Channel per second.
+ * Table 66 from SLIMbus 2.0 Specs
+ *
+ * Index of the table corresponds to Presence rate code for the respective rate
+ * in the table.
+ */
+static const int slim_presence_rate_table[] = {
+	0, /* Not Indicated */
+	12000,
+	24000,
+	48000,
+	96000,
+	192000,
+	384000,
+	768000,
+	0, /* Reserved */
+	110250,
+	220500,
+	441000,
+	882000,
+	176400,
+	352800,
+	705600,
+	4000,
+	8000,
+	16000,
+	32000,
+	64000,
+	128000,
+	256000,
+	512000,
+};
+
+/*
+ * slim_stream_allocate() - Allocate a new SLIMbus Stream
+ * @dev:Slim device to be associated with
+ * @name: name of the stream
+ *
+ * This is very first call for SLIMbus streaming, this API will allocate
+ * a new SLIMbus stream and return a valid stream runtime pointer for client
+ * to use it in subsequent stream apis. state of stream is set to ALLOCATED
+ *
+ * Return: valid pointer on success and error code on failure.
+ * From ASoC DPCM framework, this state is linked to startup() operation.
+ */
+struct slim_stream_runtime *slim_stream_allocate(struct slim_device *dev,
+						 const char *name)
+{
+	struct slim_stream_runtime *rt;
+
+	rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+	if (!rt)
+		return ERR_PTR(-ENOMEM);
+
+	rt->name = kasprintf(GFP_KERNEL, "slim-%s", name);
+	if (!rt->name) {
+		kfree(rt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	rt->dev = dev;
+	spin_lock(&dev->stream_list_lock);
+	list_add_tail(&rt->node, &dev->stream_list);
+	spin_unlock(&dev->stream_list_lock);
+
+	return rt;
+}
+EXPORT_SYMBOL_GPL(slim_stream_allocate);
+
+static int slim_connect_port_channel(struct slim_stream_runtime *stream,
+				     struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[2];
+	struct slim_val_inf msg = {0, 2, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_CONNECT_SOURCE;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 6, stream->dev->laddr, &msg);
+
+	if (port->direction == SLIM_PORT_SINK)
+		txn.mc = SLIM_MSG_MC_CONNECT_SINK;
+
+	wbuf[0] = port->id;
+	wbuf[1] = port->ch.id;
+	port->ch.state = SLIM_CH_STATE_ASSOCIATED;
+	port->state = SLIM_PORT_UNCONFIGURED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_disconnect_port(struct slim_stream_runtime *stream,
+				struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[1];
+	struct slim_val_inf msg = {0, 1, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_DISCONNECT_PORT;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 5, stream->dev->laddr, &msg);
+
+	wbuf[0] = port->id;
+	port->ch.state = SLIM_CH_STATE_DISCONNECTED;
+	port->state = SLIM_PORT_DISCONNECTED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_deactivate_remove_channel(struct slim_stream_runtime *stream,
+					  struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[1];
+	struct slim_val_inf msg = {0, 1, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_DEACTIVATE_CHANNEL;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 5, stream->dev->laddr, &msg);
+	int ret;
+
+	wbuf[0] = port->ch.id;
+	ret = slim_do_transfer(sdev->ctrl, &txn);
+	if (ret)
+		return ret;
+
+	txn.mc = SLIM_MSG_MC_NEXT_REMOVE_CHANNEL;
+	port->ch.state = SLIM_CH_STATE_REMOVED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_get_prate_code(int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(slim_presence_rate_table); i++) {
+		if (rate == slim_presence_rate_table[i])
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * slim_stream_prepare() - Prepare a SLIMbus Stream
+ *
+ * @rt: instance of slim stream runtime to configure
+ * @cfg: new configuration for the stream
+ *
+ * This API will configure SLIMbus stream with config parameters from cfg.
+ * return zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to hw_params() operation.
+ */
+int slim_stream_prepare(struct slim_stream_runtime *rt,
+			struct slim_stream_config *cfg)
+{
+	struct slim_controller *ctrl = rt->dev->ctrl;
+	struct slim_port *port;
+	int num_ports, i, port_id;
+
+	if (rt->ports) {
+		dev_err(&rt->dev->dev, "Stream already Prepared\n");
+		return -EINVAL;
+	}
+
+	num_ports = hweight32(cfg->port_mask);
+	rt->ports = kcalloc(num_ports, sizeof(*port), GFP_KERNEL);
+	if (!rt->ports)
+		return -ENOMEM;
+
+	rt->num_ports = num_ports;
+	rt->rate = cfg->rate;
+	rt->bps = cfg->bps;
+	rt->direction = cfg->direction;
+
+	if (cfg->rate % ctrl->a_framer->superfreq) {
+		/*
+		 * data rate not exactly multiple of super frame,
+		 * use PUSH/PULL protocol
+		 */
+		if (cfg->direction == SNDRV_PCM_STREAM_PLAYBACK)
+			rt->prot = SLIM_PROTO_PUSH;
+		else
+			rt->prot = SLIM_PROTO_PULL;
+	} else {
+		rt->prot = SLIM_PROTO_ISO;
+	}
+
+	rt->ratem = cfg->rate/ctrl->a_framer->superfreq;
+
+	i = 0;
+	for_each_set_bit(port_id, &cfg->port_mask, SLIM_DEVICE_MAX_PORTS) {
+		port = &rt->ports[i];
+		port->state = SLIM_PORT_DISCONNECTED;
+		port->id = port_id;
+		port->ch.prrate = slim_get_prate_code(cfg->rate);
+		port->ch.id = cfg->chs[i];
+		port->ch.data_fmt = SLIM_CH_DATA_FMT_NOT_DEFINED;
+		port->ch.aux_fmt = SLIM_CH_AUX_FMT_NOT_APPLICABLE;
+		port->ch.state = SLIM_CH_STATE_ALLOCATED;
+
+		if (cfg->direction == SNDRV_PCM_STREAM_PLAYBACK)
+			port->direction = SLIM_PORT_SINK;
+		else
+			port->direction = SLIM_PORT_SOURCE;
+
+		slim_connect_port_channel(rt, port);
+		i++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(slim_stream_prepare);
+
+static int slim_define_channel_content(struct slim_stream_runtime *stream,
+				       struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[4];
+	struct slim_val_inf msg = {0, 4, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_DEFINE_CONTENT;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 8, stream->dev->laddr, &msg);
+
+	wbuf[0] = port->ch.id;
+	wbuf[1] = port->ch.prrate;
+
+	/* Frequency Locked for ISO Protocol */
+	if (stream->prot != SLIM_PROTO_ISO)
+		wbuf[1] |= SLIM_CHANNEL_CONTENT_FL;
+
+	wbuf[2] = port->ch.data_fmt | (port->ch.aux_fmt << 4);
+	wbuf[3] = stream->bps/SLIM_SLOT_LEN_BITS;
+	port->ch.state = SLIM_CH_STATE_CONTENT_DEFINED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_get_segdist_code(int ratem)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(segdist_codes); i++) {
+		if (segdist_codes[i].ratem == ratem)
+			return segdist_codes[i].segdist_code;
+	}
+
+	return -EINVAL;
+}
+
+static int slim_define_channel(struct slim_stream_runtime *stream,
+				       struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[4];
+	struct slim_val_inf msg = {0, 4, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_DEFINE_CHANNEL;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 8, stream->dev->laddr, &msg);
+
+	port->ch.seg_dist = slim_get_segdist_code(stream->ratem);
+
+	wbuf[0] = port->ch.id;
+	wbuf[1] = port->ch.seg_dist & 0xFF;
+	wbuf[2] = (stream->prot << 4) | ((port->ch.seg_dist & 0xF00) >> 8);
+	if (stream->prot == SLIM_PROTO_ISO)
+		wbuf[3] = stream->bps/SLIM_SLOT_LEN_BITS;
+	else
+		wbuf[3] = stream->bps/SLIM_SLOT_LEN_BITS + 1;
+
+	port->ch.state = SLIM_CH_STATE_DEFINED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_activate_channel(struct slim_stream_runtime *stream,
+				 struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[1];
+	struct slim_val_inf msg = {0, 1, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_ACTIVATE_CHANNEL;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 5, stream->dev->laddr, &msg);
+
+	txn.msg->num_bytes = 1;
+	txn.msg->wbuf = wbuf;
+	wbuf[0] = port->ch.id;
+	port->ch.state = SLIM_CH_STATE_ACTIVE;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+/*
+ * slim_stream_enable() - Enable a prepared SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to enable
+ *
+ * This API will enable all the ports and channels associated with
+ * SLIMbus stream
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to trigger() start operation.
+ */
+int slim_stream_enable(struct slim_stream_runtime *stream)
+{
+	DEFINE_SLIM_BCAST_TXN(txn, SLIM_MSG_MC_BEGIN_RECONFIGURATION,
+				3, SLIM_LA_MANAGER, NULL);
+	struct slim_controller *ctrl = stream->dev->ctrl;
+	int ret, i;
+
+	if (ctrl->enable_stream) {
+		ret = ctrl->enable_stream(stream);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < stream->num_ports; i++)
+			stream->ports[i].ch.state = SLIM_CH_STATE_ACTIVE;
+
+		return ret;
+	}
+
+	ret = slim_do_transfer(ctrl, &txn);
+	if (ret)
+		return ret;
+
+	/* define channels first before activating them */
+	for (i = 0; i < stream->num_ports; i++) {
+		struct slim_port *port = &stream->ports[i];
+
+		slim_define_channel(stream, port);
+		slim_define_channel_content(stream, port);
+	}
+
+	for (i = 0; i < stream->num_ports; i++) {
+		struct slim_port *port = &stream->ports[i];
+
+		slim_activate_channel(stream, port);
+		port->state = SLIM_PORT_CONFIGURED;
+	}
+	txn.mc = SLIM_MSG_MC_RECONFIGURE_NOW;
+
+	return slim_do_transfer(ctrl, &txn);
+}
+EXPORT_SYMBOL_GPL(slim_stream_enable);
+
+/*
+ * slim_stream_disable() - Disable a SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to disable
+ *
+ * This API will disable all the ports and channels associated with
+ * SLIMbus stream
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to trigger() pause operation.
+ */
+int slim_stream_disable(struct slim_stream_runtime *stream)
+{
+	DEFINE_SLIM_BCAST_TXN(txn, SLIM_MSG_MC_BEGIN_RECONFIGURATION,
+				3, SLIM_LA_MANAGER, NULL);
+	struct slim_controller *ctrl = stream->dev->ctrl;
+	int ret, i;
+
+	if (ctrl->disable_stream)
+		ctrl->disable_stream(stream);
+
+	ret = slim_do_transfer(ctrl, &txn);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < stream->num_ports; i++)
+		slim_deactivate_remove_channel(stream, &stream->ports[i]);
+
+	txn.mc = SLIM_MSG_MC_RECONFIGURE_NOW;
+
+	return slim_do_transfer(ctrl, &txn);
+}
+EXPORT_SYMBOL_GPL(slim_stream_disable);
+
+/*
+ * slim_stream_unprepare() - Un-prepare a SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to unprepare
+ *
+ * This API will un allocate all the ports and channels associated with
+ * SLIMbus stream
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to trigger() stop operation.
+ */
+int slim_stream_unprepare(struct slim_stream_runtime *stream)
+{
+	int i;
+
+	for (i = 0; i < stream->num_ports; i++)
+		slim_disconnect_port(stream, &stream->ports[i]);
+
+	kfree(stream->ports);
+	stream->ports = NULL;
+	stream->num_ports = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(slim_stream_unprepare);
+
+/*
+ * slim_stream_free() - Free a SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to free
+ *
+ * This API will un allocate all the memory associated with
+ * slim stream runtime, user is not allowed to make an dereference
+ * to stream after this call.
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to shutdown() operation.
+ */
+int slim_stream_free(struct slim_stream_runtime *stream)
+{
+	struct slim_device *sdev = stream->dev;
+
+	spin_lock(&sdev->stream_list_lock);
+	list_del(&stream->node);
+	spin_unlock(&sdev->stream_list_lock);
+
+	kfree(stream->name);
+	kfree(stream);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(slim_stream_free);
diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index 63801bcc5e60..12c9719b2a55 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -48,6 +48,8 @@ struct slim_controller;
  * @ctrl: slim controller instance.
  * @laddr: 1-byte Logical address of this device.
  * @is_laddr_valid: indicates if the laddr is valid or not
+ * @stream_list: List of streams on this device
+ * @stream_list_lock: lock to protect the stream list
  *
  * This is the client/device handle returned when a SLIMbus
  * device is registered with a controller.
@@ -60,6 +62,8 @@ struct slim_device {
 	enum slim_device_status	status;
 	u8			laddr;
 	bool			is_laddr_valid;
+	struct list_head	stream_list;
+	spinlock_t stream_list_lock;
 };
 
 #define to_slim_device(d) container_of(d, struct slim_device, dev)
@@ -108,6 +112,36 @@ struct slim_val_inf {
 	struct	completion	*comp;
 };
 
+#define SLIM_DEVICE_MAX_CHANNELS	256
+/* A SLIMBus Device may have frmo 0 to 31 Ports (inclusive) */
+#define SLIM_DEVICE_MAX_PORTS		32
+
+/**
+ * struct slim_stream_config - SLIMbus stream configuration
+ *	Configuring a stream is done at hw_params or prepare call
+ *	from audio drivers where they have all the required information
+ *	regarding rate, number of channels and so on.
+ *	There is a 1:1 mapping of channel and ports.
+ *
+ * @rate: data rate
+ * @bps: bits per data sample
+ * @ch_count: number of channels
+ * @chs: pointer to list of channel numbers
+ * @port_mask: port mask of ports to use for this stream
+ * @direction: direction of the stream, SNDRV_PCM_STREAM_PLAYBACK
+ *	or SNDRV_PCM_STREAM_CAPTURE.
+ */
+struct slim_stream_config {
+	unsigned int rate;
+	unsigned int bps;
+	/* MAX 256 channels */
+	unsigned int ch_count;
+	unsigned int *chs;
+	/* Max 32 ports per device */
+	unsigned long port_mask;
+	int direction;
+};
+
 /*
  * use a macro to avoid include chaining to get THIS_MODULE
  */
@@ -163,4 +197,16 @@ int slim_readb(struct slim_device *sdev, u32 addr);
 int slim_writeb(struct slim_device *sdev, u32 addr, u8 value);
 int slim_read(struct slim_device *sdev, u32 addr, size_t count, u8 *val);
 int slim_write(struct slim_device *sdev, u32 addr, size_t count, u8 *val);
+
+/* SLIMbus Stream apis */
+struct slim_stream_runtime;
+struct slim_stream_runtime *slim_stream_allocate(struct slim_device *dev,
+						 const char *sname);
+int slim_stream_prepare(struct slim_stream_runtime *stream,
+			struct slim_stream_config *c);
+int slim_stream_enable(struct slim_stream_runtime *stream);
+int slim_stream_disable(struct slim_stream_runtime *stream);
+int slim_stream_unprepare(struct slim_stream_runtime *stream);
+int slim_stream_free(struct slim_stream_runtime *stream);
+
 #endif /* _LINUX_SLIMBUS_H */
-- 
cgit v1.2.3


From ea614629c69c2c0258ef6414b017c998e3cc61a7 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 16 Jun 2018 18:53:57 -0700
Subject: linux/device.h: fix kernel-doc notation warning

Fix kernel-doc build warning (missing " *" at beginning of line):

../include/linux/device.h:93: warning: bad line:                         this bus.

Fixes: 07397df29e57c ("dma-mapping: move dma configuration to bus infrastructure")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Nipun Gupta <nipun.gupta@nxp.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 2eaa9ea13c09..575c5a35ece5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -90,7 +90,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @num_vf:	Called to find out how many virtual functions a device on this
  *		bus supports.
  * @dma_configure:	Called to setup DMA configuration on a device on
-			this bus.
+ *			this bus.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
  * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
-- 
cgit v1.2.3


From ac3167257b9fe16c9426c2087ead1c9f1b0992b1 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 19 Jun 2018 22:47:28 -0700
Subject: headers: separate linux/mod_devicetable.h from
 linux/platform_device.h

At over 4000 #includes, <linux/platform_device.h> is the 9th most
#included header file in the Linux kernel.  It does not need
<linux/mod_devicetable.h>, so drop that header and explicitly add
<linux/mod_devicetable.h> to source files that need it.

   4146 #include <linux/platform_device.h>

After this patch, there are 225 files that use <linux/mod_devicetable.h>,
for a reduction of around 3900 times that <linux/mod_devicetable.h>
does not have to be read & parsed.

    225 #include <linux/mod_devicetable.h>

This patch was build-tested on 20 different arch-es.

It also makes these drivers SubmitChecklist#1 compliant.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kbuild test robot <lkp@intel.com> # drivers/media/platform/vimc/
Reported-by: kbuild test robot <lkp@intel.com> # drivers/pinctrl/pinctrl-u300.c
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-mmp/sram.c                                      | 1 +
 drivers/ata/pata_imx.c                                        | 1 +
 drivers/ata/pata_samsung_cf.c                                 | 1 +
 drivers/auxdisplay/hd44780.c                                  | 1 +
 drivers/char/hw_random/atmel-rng.c                            | 1 +
 drivers/char/hw_random/exynos-trng.c                          | 1 +
 drivers/char/hw_random/imx-rngc.c                             | 1 +
 drivers/char/hw_random/powernv-rng.c                          | 1 +
 drivers/crypto/mediatek/mtk-platform.c                        | 1 +
 drivers/crypto/qce/core.c                                     | 1 +
 drivers/crypto/stm32/stm32_crc32.c                            | 1 +
 drivers/crypto/ux500/cryp/cryp_core.c                         | 1 +
 drivers/crypto/ux500/hash/hash_core.c                         | 1 +
 drivers/devfreq/tegra-devfreq.c                               | 1 +
 drivers/dma/ep93xx_dma.c                                      | 1 +
 drivers/dma/s3c24xx-dma.c                                     | 1 +
 drivers/extcon/extcon-intel-cht-wc.c                          | 1 +
 drivers/extcon/extcon-qcom-spmi-misc.c                        | 1 +
 drivers/gpu/drm/mediatek/mtk_cec.c                            | 1 +
 drivers/gpu/drm/sun4i/sun6i_drc.c                             | 1 +
 drivers/hsi/controllers/omap_ssi_port.c                       | 1 +
 drivers/hwmon/max197.c                                        | 1 +
 drivers/hwmon/mc13783-adc.c                                   | 1 +
 drivers/media/platform/coda/imx-vdoa.c                        | 1 +
 drivers/media/platform/rcar-fcp.c                             | 1 +
 drivers/media/platform/vimc/vimc-capture.c                    | 1 +
 drivers/media/platform/vimc/vimc-debayer.c                    | 1 +
 drivers/media/platform/vimc/vimc-scaler.c                     | 1 +
 drivers/media/platform/vimc/vimc-sensor.c                     | 1 +
 drivers/memory/tegra/tegra186.c                               | 1 +
 drivers/mfd/atmel-hlcdc.c                                     | 1 +
 drivers/mfd/cros_ec_dev.c                                     | 1 +
 drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c                  | 1 +
 drivers/net/ethernet/calxeda/xgmac.c                          | 1 +
 drivers/net/ethernet/faraday/ftmac100.c                       | 1 +
 drivers/net/wireless/ath/ath9k/ahb.c                          | 1 +
 drivers/net/wireless/ti/wl12xx/main.c                         | 1 +
 drivers/net/wireless/ti/wl18xx/main.c                         | 1 +
 drivers/nvmem/lpc18xx_eeprom.c                                | 1 +
 drivers/nvmem/mtk-efuse.c                                     | 1 +
 drivers/nvmem/qfprom.c                                        | 1 +
 drivers/nvmem/uniphier-efuse.c                                | 1 +
 drivers/perf/arm-ccn.c                                        | 1 +
 drivers/pinctrl/intel/pinctrl-merrifield.c                    | 1 +
 drivers/pinctrl/pinctrl-u300.c                                | 1 +
 drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c                    | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c               | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c              | 1 +
 drivers/platform/goldfish/goldfish_pipe.c                     | 1 +
 drivers/platform/x86/intel_bxtwc_tmu.c                        | 1 +
 drivers/power/avs/smartreflex.c                               | 1 +
 drivers/power/reset/ltc2952-poweroff.c                        | 1 +
 drivers/power/supply/max8998_charger.c                        | 1 +
 drivers/power/supply/olpc_battery.c                           | 1 +
 drivers/ptp/ptp_dte.c                                         | 1 +
 drivers/regulator/tps65912-regulator.c                        | 1 +
 drivers/reset/reset-ath79.c                                   | 1 +
 drivers/reset/reset-axs10x.c                                  | 1 +
 drivers/reset/reset-imx7.c                                    | 1 +
 drivers/rtc/rtc-coh901331.c                                   | 1 +
 drivers/rtc/rtc-cpcap.c                                       | 1 +
 drivers/rtc/rtc-ftrtc010.c                                    | 1 +
 drivers/rtc/rtc-mc13xxx.c                                     | 1 +
 drivers/rtc/rtc-mxc_v2.c                                      | 1 +
 drivers/rtc/rtc-r7301.c                                       | 1 +
 drivers/rtc/rtc-sh.c                                          | 1 +
 drivers/rtc/rtc-tegra.c                                       | 1 +
 drivers/siox/siox-bus-gpio.c                                  | 1 +
 drivers/tty/goldfish.c                                        | 1 +
 drivers/tty/serial/8250/8250_em.c                             | 1 +
 drivers/tty/serial/sccnxp.c                                   | 1 +
 drivers/usb/gadget/udc/fsl_mxc_udc.c                          | 1 +
 drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c | 1 +
 drivers/w1/masters/mxc_w1.c                                   | 1 +
 drivers/watchdog/coh901327_wdt.c                              | 1 +
 drivers/watchdog/davinci_wdt.c                                | 1 +
 drivers/watchdog/imgpdc_wdt.c                                 | 1 +
 drivers/watchdog/max63xx_wdt.c                                | 1 +
 drivers/watchdog/max77620_wdt.c                               | 1 +
 drivers/watchdog/moxart_wdt.c                                 | 1 +
 drivers/watchdog/omap_wdt.c                                   | 1 +
 drivers/watchdog/tangox_wdt.c                                 | 1 +
 include/linux/platform_device.h                               | 2 +-
 90 files changed, 90 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c
index bf5e64906e65..ba91e4fe444d 100644
--- a/arch/arm/mach-mmp/sram.c
+++ b/arch/arm/mach-mmp/sram.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c
index d4caa23f5a88..6f0534047c6d 100644
--- a/drivers/ata/pata_imx.c
+++ b/drivers/ata/pata_imx.c
@@ -17,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/libata.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #define DRV_NAME "pata_imx"
diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index bb96dc35950d..f5bd44b8bd63 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c
@@ -17,6 +17,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/clk.h>
 #include <linux/libata.h>
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index 78d8f1986fec..f1a42f0f1ded 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/slab.h>
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 661c82cde0f2..433426242b87 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/clk.h>
diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index 1947aed7c044..94235761955c 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -19,6 +19,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index 250123bc4905..14730be54edf 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/clk.h>
diff --git a/drivers/char/hw_random/powernv-rng.c b/drivers/char/hw_random/powernv-rng.c
index 263a5bb8e605..791182aa8e04 100644
--- a/drivers/char/hw_random/powernv-rng.c
+++ b/drivers/char/hw_random/powernv-rng.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/random.h>
diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c
index b182e941b0cd..ee0404e27a0f 100644
--- a/drivers/crypto/mediatek/mtk-platform.c
+++ b/drivers/crypto/mediatek/mtk-platform.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include "mtk-platform.h"
diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c
index 718b32a3112e..1c3b36b75467 100644
--- a/drivers/crypto/qce/core.c
+++ b/drivers/crypto/qce/core.c
@@ -14,6 +14,7 @@
 #include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c
index 8f09b8430893..a4a21fcf1e17 100644
--- a/drivers/crypto/stm32/stm32_crc32.c
+++ b/drivers/crypto/stm32/stm32_crc32.c
@@ -7,6 +7,7 @@
 #include <linux/bitrev.h>
 #include <linux/clk.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <crypto/internal/hash.h>
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index cb31b59c9d53..d2663a4e1f5e 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -20,6 +20,7 @@
 #include <linux/irqreturn.h>
 #include <linux/klist.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/semaphore.h>
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 2d0a677bcc76..b141b74a084e 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -21,6 +21,7 @@
 #include <linux/klist.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/crypto.h>
 
diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index ae712159246f..c59d2eee5d30 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/reset.h>
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index ec240592f5c8..a15592383d4e 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 7056fe7513b4..64744eb88720 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -35,6 +35,7 @@
 #include <linux/interrupt.h>
 #include <linux/clk.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/platform_data/dma-s3c24xx.h>
 
diff --git a/drivers/extcon/extcon-intel-cht-wc.c b/drivers/extcon/extcon-intel-cht-wc.c
index b7e9ea377d70..5e1dd2772278 100644
--- a/drivers/extcon/extcon-intel-cht-wc.c
+++ b/drivers/extcon/extcon-intel-cht-wc.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/mfd/intel_soc_pmic.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
diff --git a/drivers/extcon/extcon-qcom-spmi-misc.c b/drivers/extcon/extcon-qcom-spmi-misc.c
index 660bbf163bf5..72bc0f2478e2 100644
--- a/drivers/extcon/extcon-qcom-spmi-misc.c
+++ b/drivers/extcon/extcon-qcom-spmi-misc.c
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c
index 7a3eb8c17ef9..5ce84d0dbf81 100644
--- a/drivers/gpu/drm/mediatek/mtk_cec.c
+++ b/drivers/gpu/drm/mediatek/mtk_cec.c
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include "mtk_cec.h"
diff --git a/drivers/gpu/drm/sun4i/sun6i_drc.c b/drivers/gpu/drm/sun4i/sun6i_drc.c
index b5e071a49045..88eb268fdf73 100644
--- a/drivers/gpu/drm/sun4i/sun6i_drc.c
+++ b/drivers/gpu/drm/sun4i/sun6i_drc.c
@@ -12,6 +12,7 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c
index 7765de2f1ef1..2ada82d2ec8c 100644
--- a/drivers/hsi/controllers/omap_ssi_port.c
+++ b/drivers/hsi/controllers/omap_ssi_port.c
@@ -20,6 +20,7 @@
  * 02110-1301 USA
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/hwmon/max197.c b/drivers/hwmon/max197.c
index 638567fb7cd8..3d9e210beedf 100644
--- a/drivers/hwmon/max197.c
+++ b/drivers/hwmon/max197.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/drivers/hwmon/mc13783-adc.c b/drivers/hwmon/mc13783-adc.c
index 67860ad2e3d9..78fe8759d2a9 100644
--- a/drivers/hwmon/mc13783-adc.c
+++ b/drivers/hwmon/mc13783-adc.c
@@ -23,6 +23,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/hwmon.h>
 #include <linux/slab.h>
 #include <linux/init.h>
diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c
index 85a66e4e2f9a..96ab4b61669a 100644
--- a/drivers/media/platform/coda/imx-vdoa.c
+++ b/drivers/media/platform/coda/imx-vdoa.c
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/videodev2.h>
diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c
index 2988031d285d..3ad9d0dc6fa7 100644
--- a/drivers/media/platform/rcar-fcp.c
+++ b/drivers/media/platform/rcar-fcp.c
@@ -14,6 +14,7 @@
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/media/platform/vimc/vimc-capture.c b/drivers/media/platform/vimc/vimc-capture.c
index 88a1e5670c72..ec68feaac378 100644
--- a/drivers/media/platform/vimc/vimc-capture.c
+++ b/drivers/media/platform/vimc/vimc-capture.c
@@ -17,6 +17,7 @@
 
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <media/v4l2-ioctl.h>
 #include <media/videobuf2-core.h>
diff --git a/drivers/media/platform/vimc/vimc-debayer.c b/drivers/media/platform/vimc/vimc-debayer.c
index 6e10b63ba9ec..77887f66f323 100644
--- a/drivers/media/platform/vimc/vimc-debayer.c
+++ b/drivers/media/platform/vimc/vimc-debayer.c
@@ -17,6 +17,7 @@
 
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 #include <linux/v4l2-mediabus.h>
diff --git a/drivers/media/platform/vimc/vimc-scaler.c b/drivers/media/platform/vimc/vimc-scaler.c
index e583ec7a91da..b0952ee86296 100644
--- a/drivers/media/platform/vimc/vimc-scaler.c
+++ b/drivers/media/platform/vimc/vimc-scaler.c
@@ -17,6 +17,7 @@
 
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 #include <linux/v4l2-mediabus.h>
diff --git a/drivers/media/platform/vimc/vimc-sensor.c b/drivers/media/platform/vimc/vimc-sensor.c
index 605e2a2d5dd5..b2b89315e7ba 100644
--- a/drivers/media/platform/vimc/vimc-sensor.c
+++ b/drivers/media/platform/vimc/vimc-sensor.c
@@ -19,6 +19,7 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/v4l2-mediabus.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
index 7254fb596979..ffda903c49bb 100644
--- a/drivers/memory/tegra/tegra186.c
+++ b/drivers/memory/tegra/tegra186.c
@@ -8,6 +8,7 @@
 
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <dt-bindings/memory/tegra186-mc.h>
diff --git a/drivers/mfd/atmel-hlcdc.c b/drivers/mfd/atmel-hlcdc.c
index 4b15b0840f16..e82543bcfdc8 100644
--- a/drivers/mfd/atmel-hlcdc.c
+++ b/drivers/mfd/atmel-hlcdc.c
@@ -22,6 +22,7 @@
 #include <linux/mfd/atmel-hlcdc.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 306e1fd109bd..27af62ed480a 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -20,6 +20,7 @@
 #include <linux/fs.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c b/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c
index 5c271077ac87..489af7bc005a 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c
@@ -13,6 +13,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include "brcmnand.h"
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 2c63afff1382..13741ee49b9b 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -14,6 +14,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/kernel.h>
 #include <linux/circ_buf.h>
 #include <linux/interrupt.h>
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index aecc76504b69..a1197d3adbe0 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -29,6 +29,7 @@
 #include <linux/io.h>
 #include <linux/mii.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/netdevice.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/net/wireless/ath/ath9k/ahb.c b/drivers/net/wireless/ath/ath9k/ahb.c
index 2bd982c3a479..63019c3de034 100644
--- a/drivers/net/wireless/ath/ath9k/ahb.c
+++ b/drivers/net/wireless/ath/ath9k/ahb.c
@@ -19,6 +19,7 @@
 #include <linux/nl80211.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include "ath9k.h"
 
 static const struct platform_device_id ath9k_platform_id_table[] = {
diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c
index 22009e14a8fc..4a4f797bb10f 100644
--- a/drivers/net/wireless/ti/wl12xx/main.c
+++ b/drivers/net/wireless/ti/wl12xx/main.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <linux/err.h>
diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c
index ca0f936fc119..496b9b63cea1 100644
--- a/drivers/net/wireless/ti/wl18xx/main.c
+++ b/drivers/net/wireless/ti/wl18xx/main.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/ip.h>
 #include <linux/firmware.h>
diff --git a/drivers/nvmem/lpc18xx_eeprom.c b/drivers/nvmem/lpc18xx_eeprom.c
index b1af966206a6..a9534a6e8636 100644
--- a/drivers/nvmem/lpc18xx_eeprom.c
+++ b/drivers/nvmem/lpc18xx_eeprom.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
diff --git a/drivers/nvmem/mtk-efuse.c b/drivers/nvmem/mtk-efuse.c
index e66adf17a747..58c998b2e3bc 100644
--- a/drivers/nvmem/mtk-efuse.c
+++ b/drivers/nvmem/mtk-efuse.c
@@ -14,6 +14,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/io.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c
index 4f650baad983..fbb1f1df6fc7 100644
--- a/drivers/nvmem/qfprom.c
+++ b/drivers/nvmem/qfprom.c
@@ -13,6 +13,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/io.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
diff --git a/drivers/nvmem/uniphier-efuse.c b/drivers/nvmem/uniphier-efuse.c
index 271f0b2ff86a..286910336ef6 100644
--- a/drivers/nvmem/uniphier-efuse.c
+++ b/drivers/nvmem/uniphier-efuse.c
@@ -16,6 +16,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index b416ee18e6bb..860f5bd8dce8 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c
index d9357054d41d..acea41073dfd 100644
--- a/drivers/pinctrl/intel/pinctrl-merrifield.c
+++ b/drivers/pinctrl/intel/pinctrl-merrifield.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pinctrl/pinconf.h>
 #include <linux/pinctrl/pinconf-generic.h>
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index 9cc80a500880..2b1a61dba224 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -13,6 +13,7 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/slab.h>
diff --git a/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c b/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c
index 3cdad8bc8f93..5702b6704137 100644
--- a/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c
+++ b/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include "pinctrl-sprd.h"
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
index 58825f68b58b..c93752cd51c5 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c
index 9f449b35e300..130ce2484705 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
index 0b10ebc07eb8..5a20121d2c93 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
index 8e4d45fea885..76c57b0d5d88 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
index 24788a74c254..4a0027373247 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
index d5d5e579cb08..9a55972e9c16 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
index 032619ad0e73..544f4b465308 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c
index 535bb2e935e4..9a15998b4f73 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c
@@ -15,6 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
index 0f921a653164..0402077d3b3a 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 4866a8b12e98..2da567540c2d 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -48,6 +48,7 @@
 
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
diff --git a/drivers/platform/x86/intel_bxtwc_tmu.c b/drivers/platform/x86/intel_bxtwc_tmu.c
index ea865d4ca220..227943a20212 100644
--- a/drivers/platform/x86/intel_bxtwc_tmu.c
+++ b/drivers/platform/x86/intel_bxtwc_tmu.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/intel_soc_pmic.h>
diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c
index cb0237143dbe..1360a7fa542c 100644
--- a/drivers/power/avs/smartreflex.c
+++ b/drivers/power/avs/smartreflex.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
 #include <linux/io.h>
diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c
index bfcd6fba6363..6b911b6b10a6 100644
--- a/drivers/power/reset/ltc2952-poweroff.c
+++ b/drivers/power/reset/ltc2952-poweroff.c
@@ -62,6 +62,7 @@
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/gpio/consumer.h>
 #include <linux/reboot.h>
 
diff --git a/drivers/power/supply/max8998_charger.c b/drivers/power/supply/max8998_charger.c
index b64cf0f14142..cad7d1a8feec 100644
--- a/drivers/power/supply/max8998_charger.c
+++ b/drivers/power/supply/max8998_charger.c
@@ -21,6 +21,7 @@
 
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
diff --git a/drivers/power/supply/olpc_battery.c b/drivers/power/supply/olpc_battery.c
index 3bc2eea7b3b7..6da79ae14860 100644
--- a/drivers/power/supply/olpc_battery.c
+++ b/drivers/power/supply/olpc_battery.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/err.h>
 #include <linux/device.h>
diff --git a/drivers/ptp/ptp_dte.c b/drivers/ptp/ptp_dte.c
index 6edd3b9c7f01..a7dc43368df4 100644
--- a/drivers/ptp/ptp_dte.c
+++ b/drivers/ptp/ptp_dte.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/types.h>
diff --git a/drivers/regulator/tps65912-regulator.c b/drivers/regulator/tps65912-regulator.c
index a4921a70da55..276faeddc370 100644
--- a/drivers/regulator/tps65912-regulator.c
+++ b/drivers/regulator/tps65912-regulator.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 
diff --git a/drivers/reset/reset-ath79.c b/drivers/reset/reset-ath79.c
index 2674880e5492..a7455916e396 100644
--- a/drivers/reset/reset-ath79.c
+++ b/drivers/reset/reset-ath79.c
@@ -17,6 +17,7 @@
 
 #include <linux/io.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/reboot.h>
diff --git a/drivers/reset/reset-axs10x.c b/drivers/reset/reset-axs10x.c
index afb298e46bd9..a854ef41364d 100644
--- a/drivers/reset/reset-axs10x.c
+++ b/drivers/reset/reset-axs10x.c
@@ -10,6 +10,7 @@
 
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 
diff --git a/drivers/reset/reset-imx7.c b/drivers/reset/reset-imx7.c
index 4db177bc89bc..14bc78d28707 100644
--- a/drivers/reset/reset-imx7.c
+++ b/drivers/reset/reset-imx7.c
@@ -16,6 +16,7 @@
  */
 
 #include <linux/mfd/syscon.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/regmap.h>
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index 2fc517498a5d..fc5cf5c44ae7 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -8,6 +8,7 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/rtc.h>
 #include <linux/clk.h>
 #include <linux/interrupt.h>
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index a8856f2b9bc2..6b477174a82f 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -24,6 +24,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c
index 61f798c6101f..8f1dd88fa827 100644
--- a/drivers/rtc/rtc-ftrtc010.c
+++ b/drivers/rtc/rtc-ftrtc010.c
@@ -26,6 +26,7 @@
 #include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/clk.h>
 
 #define DRV_NAME        "rtc-ftrtc010"
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 1f892b238ddb..0fa33708fc49 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -13,6 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/rtc.h>
 
diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c
index c75f26dc8fcc..007879a5042d 100644
--- a/drivers/rtc/rtc-mxc_v2.c
+++ b/drivers/rtc/rtc-mxc_v2.c
@@ -8,6 +8,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
diff --git a/drivers/rtc/rtc-r7301.c b/drivers/rtc/rtc-r7301.c
index 169704b2ce13..1943c8151152 100644
--- a/drivers/rtc/rtc-r7301.c
+++ b/drivers/rtc/rtc-r7301.c
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/regmap.h>
 #include <linux/platform_device.h>
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 4f98543d1ea5..776b70a14e03 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -15,6 +15,7 @@
  * for more details.
  */
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/kernel.h>
 #include <linux/bcd.h>
 #include <linux/rtc.h>
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index 66efff60c4d5..8dc48fe7fc35 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -25,6 +25,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/rtc.h>
diff --git a/drivers/siox/siox-bus-gpio.c b/drivers/siox/siox-bus-gpio.c
index ea7ef982968b..46b4cda36bac 100644
--- a/drivers/siox/siox-bus-gpio.c
+++ b/drivers/siox/siox-bus-gpio.c
@@ -5,6 +5,7 @@
 
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <linux/delay.h>
diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c
index 37caba7c3aff..c8c5cdfc5e19 100644
--- a/drivers/tty/goldfish.c
+++ b/drivers/tty/goldfish.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/goldfish.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c
index f6a86f2bc4e5..2a76e22d2ec0 100644
--- a/drivers/tty/serial/8250/8250_em.c
+++ b/drivers/tty/serial/8250/8250_em.c
@@ -8,6 +8,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/serial_8250.h>
 #include <linux/serial_reg.h>
 #include <linux/platform_device.h>
diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c
index d6ae3086c2a2..339befdd2f4d 100644
--- a/drivers/tty/serial/sccnxp.c
+++ b/drivers/tty/serial/sccnxp.c
@@ -14,6 +14,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/device.h>
 #include <linux/console.h>
 #include <linux/serial_core.h>
diff --git a/drivers/usb/gadget/udc/fsl_mxc_udc.c b/drivers/usb/gadget/udc/fsl_mxc_udc.c
index f29cf5c6160c..5a321992decc 100644
--- a/drivers/usb/gadget/udc/fsl_mxc_udc.c
+++ b/drivers/usb/gadget/udc/fsl_mxc_udc.c
@@ -11,6 +11,7 @@
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/fsl_devices.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
index 80dc47347e21..3079a3df8c37 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
@@ -12,6 +12,7 @@
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/gpio/consumer.h>
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
index 8851d441e5fd..50b46c4399ea 100644
--- a/drivers/w1/masters/mxc_w1.c
+++ b/drivers/w1/masters/mxc_w1.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <linux/w1.h>
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c
index e3a78f927f83..f29d1edc5bad 100644
--- a/drivers/watchdog/coh901327_wdt.c
+++ b/drivers/watchdog/coh901327_wdt.c
@@ -7,6 +7,7 @@
  * Author: Linus Walleij <linus.walleij@stericsson.com>
  */
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <linux/interrupt.h>
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index 6c6594261cb7..ebb85d60b6d5 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/watchdog.h>
diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c
index 6ed39dee995f..a3134ffa59f8 100644
--- a/drivers/watchdog/imgpdc_wdt.c
+++ b/drivers/watchdog/imgpdc_wdt.c
@@ -44,6 +44,7 @@
 #include <linux/io.h>
 #include <linux/log2.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/watchdog.h>
diff --git a/drivers/watchdog/max63xx_wdt.c b/drivers/watchdog/max63xx_wdt.c
index ac5840d9689a..bf6a068245ba 100644
--- a/drivers/watchdog/max63xx_wdt.c
+++ b/drivers/watchdog/max63xx_wdt.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/watchdog.h>
diff --git a/drivers/watchdog/max77620_wdt.c b/drivers/watchdog/max77620_wdt.c
index 2c9f53eaff4f..70c9cd3ba938 100644
--- a/drivers/watchdog/max77620_wdt.c
+++ b/drivers/watchdog/max77620_wdt.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/mfd/max77620.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
diff --git a/drivers/watchdog/moxart_wdt.c b/drivers/watchdog/moxart_wdt.c
index 2c4a73d1e214..430c3ab84c07 100644
--- a/drivers/watchdog/moxart_wdt.c
+++ b/drivers/watchdog/moxart_wdt.c
@@ -13,6 +13,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index ae77112ce97f..cbd752f9ac56 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -29,6 +29,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/drivers/watchdog/tangox_wdt.c b/drivers/watchdog/tangox_wdt.c
index b1de8297fa40..d0b53f3c0d17 100644
--- a/drivers/watchdog/tangox_wdt.c
+++ b/drivers/watchdog/tangox_wdt.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 3097c943fab9..1a9f38f27f65 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -12,13 +12,13 @@
 #define _PLATFORM_DEVICE_H_
 
 #include <linux/device.h>
-#include <linux/mod_devicetable.h>
 
 #define PLATFORM_DEVID_NONE	(-1)
 #define PLATFORM_DEVID_AUTO	(-2)
 
 struct mfd_cell;
 struct property_entry;
+struct platform_device_id;
 
 struct platform_device {
 	const char	*name;
-- 
cgit v1.2.3


From d8d7218ad842e18fc6976b87c08ed749e8d56313 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 6 Jul 2018 11:49:00 +0900
Subject: xdp: XDP_REDIRECT should check IFF_UP and MTU

Otherwise we end up with attempting to send packets from down devices
or to send oversized packets, which may cause unexpected driver/device
behaviour. Generic XDP has already done this check, so reuse the logic
in native XDP.

Fixes: 814abfabef3c ("xdp: add bpf_redirect helper function")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 6 +++---
 kernel/bpf/devmap.c    | 7 ++++++-
 net/core/filter.c      | 9 +++++++--
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 300baad62c88..c73dd7396886 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -765,8 +765,8 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
-static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
-					   struct net_device *fwd)
+static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
+				 unsigned int pktlen)
 {
 	unsigned int len;
 
@@ -774,7 +774,7 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
 		return -ENETDOWN;
 
 	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
-	if (skb->len > len)
+	if (pktlen > len)
 		return -EMSGSIZE;
 
 	return 0;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 642c97f6d1b8..d361fc1e3bf3 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 {
 	struct net_device *dev = dst->dev;
 	struct xdp_frame *xdpf;
+	int err;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit)
 		return -EOPNOTSUPP;
 
+	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+	if (unlikely(err))
+		return err;
+
 	xdpf = convert_to_xdp_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
@@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 {
 	int err;
 
-	err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
+	err = xdp_ok_fwd_dev(dst->dev, skb->len);
 	if (unlikely(err))
 		return err;
 	skb->dev = dst->dev;
diff --git a/net/core/filter.c b/net/core/filter.c
index 470268024a40..5fa66a33927f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3128,12 +3128,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
 			u32 index)
 {
 	struct xdp_frame *xdpf;
-	int sent;
+	int err, sent;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit) {
 		return -EOPNOTSUPP;
 	}
 
+	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+	if (unlikely(err))
+		return err;
+
 	xdpf = convert_to_xdp_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
@@ -3367,7 +3371,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 		goto err;
 	}
 
-	if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+	err = xdp_ok_fwd_dev(fwd, skb->len);
+	if (unlikely(err))
 		goto err;
 
 	skb->dev = fwd;
-- 
cgit v1.2.3


From b233504033dbd65740e59681820ccfd0a2a8ec53 Mon Sep 17 00:00:00 2001
From: Yifeng Sun <pkusunyifeng@gmail.com>
Date: Mon, 2 Jul 2018 08:18:03 -0700
Subject: openvswitch: kernel datapath clone action

Add 'clone' action to kernel datapath by using existing functions.
When actions within clone don't modify the current flow, the flow
key is not cloned before executing clone actions.

This is a follow up patch for this incomplete work:
https://patchwork.ozlabs.org/patch/722096/

v1 -> v2:
Refactor as advised by reviewer.

Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/openvswitch.h      |  5 +++
 include/uapi/linux/openvswitch.h |  3 ++
 net/openvswitch/actions.c        | 33 ++++++++++++++++++
 net/openvswitch/flow_netlink.c   | 73 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index e6b240b6196c..379affc63e24 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -21,4 +21,9 @@
 
 #include <uapi/linux/openvswitch.h>
 
+#define OVS_CLONE_ATTR_EXEC      0   /* Specify an u32 value. When nonzero,
+				      * actions in clone will not change flow
+				      * keys. False otherwise.
+				      */
+
 #endif /* _LINUX_OPENVSWITCH_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 863aabaa5cc9..dbe0cbe4f1b7 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -840,6 +840,8 @@ struct ovs_action_push_eth {
  * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  * @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the
  * packet, or modify the packet (e.g., change the DSCP field).
+ * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
+ * actions without affecting the original packet and key.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -873,6 +875,7 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_PUSH_NSH,     /* Nested OVS_NSH_KEY_ATTR_*. */
 	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
 	OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
+	OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 30a5df27116e..85ae53d8fd09 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1057,6 +1057,28 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 			     clone_flow_key);
 }
 
+/* When 'last' is true, clone() should always consume the 'skb'.
+ * Otherwise, clone() should keep 'skb' intact regardless what
+ * actions are executed within clone().
+ */
+static int clone(struct datapath *dp, struct sk_buff *skb,
+		 struct sw_flow_key *key, const struct nlattr *attr,
+		 bool last)
+{
+	struct nlattr *actions;
+	struct nlattr *clone_arg;
+	int rem = nla_len(attr);
+	bool dont_clone_flow_key;
+
+	/* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+	clone_arg = nla_data(attr);
+	dont_clone_flow_key = nla_get_u32(clone_arg);
+	actions = nla_next(clone_arg, &rem);
+
+	return clone_execute(dp, skb, key, 0, actions, rem, last,
+			     !dont_clone_flow_key);
+}
+
 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 			 const struct nlattr *attr)
 {
@@ -1336,6 +1358,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 				consume_skb(skb);
 				return 0;
 			}
+			break;
+
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = clone(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
 		}
 
 		if (unlikely(err)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 391c4073a6dc..a70097ecf33c 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2460,6 +2460,40 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	return 0;
 }
 
+static int validate_and_copy_clone(struct net *net,
+				   const struct nlattr *attr,
+				   const struct sw_flow_key *key,
+				   struct sw_flow_actions **sfa,
+				   __be16 eth_type, __be16 vlan_tci,
+				   bool log, bool last)
+{
+	int start, err;
+	u32 exec;
+
+	if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
+		return -EINVAL;
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
+	if (start < 0)
+		return start;
+
+	exec = last || !actions_may_change_flow(attr);
+
+	err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
+				 sizeof(exec), log);
+	if (err)
+		return err;
+
+	err = __ovs_nla_copy_actions(net, attr, key, sfa,
+				     eth_type, vlan_tci, log);
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
 		    bool reset_key,
@@ -2849,6 +2883,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
 			[OVS_ACTION_ATTR_POP_NSH] = 0,
 			[OVS_ACTION_ATTR_METER] = sizeof(u32),
+			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -3038,6 +3073,18 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			/* Non-existent meters are simply ignored.  */
 			break;
 
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_clone(net, a, key, sfa,
+						      eth_type, vlan_tci,
+						      log, last);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
@@ -3116,6 +3163,26 @@ out:
 	return err;
 }
 
+static int clone_action_to_attr(const struct nlattr *attr,
+				struct sk_buff *skb)
+{
+	struct nlattr *start;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
+	if (!start)
+		return -EMSGSIZE;
+
+	err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+
+	if (err)
+		nla_nest_cancel(skb, start);
+	else
+		nla_nest_end(skb, start);
+
+	return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -3204,6 +3271,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_CLONE:
+			err = clone_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;
-- 
cgit v1.2.3


From 69448867abcb231afaa7891ff9d9fd04b2b94a0d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 5 Jul 2018 09:25:43 +0300
Subject: fs: shave 8 bytes off of struct inode

Here is a link to Linus' reply to Jan's concern about making
i_blkbibts byte addressable:
https://marc.info/?l=linux-fsdevel&m=152882624707975&w=2

Here is a link to an lkp.org report about potential performance
improvement in some workload, which could(?) be related to packing
i_blkbits closer to i_bytes/i_lock:
https://marc.info/?l=linux-fsdevel&m=153077048108198&w=2

Changes since v1:
- Add links to relevant discussions

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f140c11d35dd..9b00676f5d9d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -275,6 +275,7 @@ struct writeback_control;
 
 /*
  * Write life time hint values.
+ * Stored in struct inode as u8.
  */
 enum rw_hint {
 	WRITE_LIFE_NOT_SET	= 0,
@@ -609,8 +610,8 @@ struct inode {
 	struct timespec64	i_ctime;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
-	unsigned int		i_blkbits;
-	enum rw_hint		i_write_hint;
+	u8			i_blkbits;
+	u8			i_write_hint;
 	blkcnt_t		i_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
-- 
cgit v1.2.3


From f292b87d3ac020418644d8a4bbf29814890505cb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 6 Jul 2018 14:34:29 -0700
Subject: bpf: include errno.h from bpf-cgroup.h

Commit fdb5c4531c1e ("bpf: fix attach type BPF_LIRC_MODE2 dependency
wrt CONFIG_CGROUP_BPF") caused some build issues, detected by 0-DAY
kernel test infrastructure.

The problem is that cgroup_bpf_prog_attach/detach/query() functions
can return -EINVAL error code, which is not defined. Fix this adding
errno.h to includes.

Fixes: fdb5c4531c1e ("bpf: fix attach type BPF_LIRC_MODE2 dependency wrt CONFIG_CGROUP_BPF")
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Sean Young <sean@mess.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 79795c5fa7c3..d50c2f0a655a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -2,6 +2,7 @@
 #ifndef _BPF_CGROUP_H
 #define _BPF_CGROUP_H
 
+#include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <uapi/linux/bpf.h>
 
-- 
cgit v1.2.3


From 5d6be70add65e3f236642ab0029e356261617cd0 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 29 Jun 2018 13:04:31 +0200
Subject: PM / Domains: Introduce option to attach a device by name to genpd

For the multiple PM domain case, let's introduce a new function called
genpd_dev_pm_attach_by_name(). This allows a device to be associated with
its PM domain through genpd, by using a name based lookup.

Note that, genpd_dev_pm_attach_by_name() shall only be called by the driver
core / PM core, similar to how the existing dev_pm_domain_attach_by_id()
makes use of genpd_dev_pm_attach_by_id(). However, this is implemented by
following changes on top.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Rajendra Nayak <rnayak@codeaurora.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 24 ++++++++++++++++++++++++
 include/linux/pm_domain.h   |  8 ++++++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9e8484189034..79bdca70a81a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2374,6 +2374,30 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
 
+/**
+ * genpd_dev_pm_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * Parse device's OF node to find a PM domain specifier using the
+ * power-domain-names DT property. For further description see
+ * genpd_dev_pm_attach_by_id().
+ */
+struct device *genpd_dev_pm_attach_by_name(struct device *dev, char *name)
+{
+	int index;
+
+	if (!dev->of_node)
+		return NULL;
+
+	index = of_property_match_string(dev->of_node, "power-domain-names",
+					 name);
+	if (index < 0)
+		return NULL;
+
+	return genpd_dev_pm_attach_by_id(dev, index);
+}
+
 static const struct of_device_id idle_state_match[] = {
 	{ .compatible = "domain-idle-state", },
 	{ }
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index cb8d84090cfb..03e14a38462d 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -239,6 +239,8 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev,
 int genpd_dev_pm_attach(struct device *dev);
 struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 					 unsigned int index);
+struct device *genpd_dev_pm_attach_by_name(struct device *dev,
+					   char *name);
 #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
 static inline int of_genpd_add_provider_simple(struct device_node *np,
 					struct generic_pm_domain *genpd)
@@ -290,6 +292,12 @@ static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 	return NULL;
 }
 
+static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev,
+							 char *name)
+{
+	return NULL;
+}
+
 static inline
 struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 {
-- 
cgit v1.2.3


From 27dceb81f445c58b1d10d27d26eaf4ac2e9e0b00 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 29 Jun 2018 13:04:32 +0200
Subject: PM / Domains: Introduce dev_pm_domain_attach_by_name()

For the multiple PM domain case, let's introduce a new API called
dev_pm_domain_attach_by_name(). This allows a consumer driver to associate
its device with one of its PM domains, by using a name based lookup.

Do note that, currently it's only genpd that supports multiple PM domains
per device, but dev_pm_domain_attach_by_name() can easily by extended to
cover other PM domain types, if/when needed.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Rajendra Nayak <rnayak@codeaurora.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/common.c | 17 +++++++++++++++++
 include/linux/pm_domain.h   |  7 +++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index df41b4780b3b..b413951c6abc 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -152,6 +152,23 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id);
 
+/**
+ * dev_pm_domain_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * For a detailed function description, see dev_pm_domain_attach_by_id().
+ */
+struct device *dev_pm_domain_attach_by_name(struct device *dev,
+					    char *name)
+{
+	if (dev->pm_domain)
+		return ERR_PTR(-EEXIST);
+
+	return genpd_dev_pm_attach_by_name(dev, name);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
+
 /**
  * dev_pm_domain_detach - Detach a device from its PM domain.
  * @dev: Device to detach.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 03e14a38462d..776c546d581a 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -309,6 +309,8 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 int dev_pm_domain_attach(struct device *dev, bool power_on);
 struct device *dev_pm_domain_attach_by_id(struct device *dev,
 					  unsigned int index);
+struct device *dev_pm_domain_attach_by_name(struct device *dev,
+					    char *name);
 void dev_pm_domain_detach(struct device *dev, bool power_off);
 void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
 #else
@@ -321,6 +323,11 @@ static inline struct device *dev_pm_domain_attach_by_id(struct device *dev,
 {
 	return NULL;
 }
+static inline struct device *dev_pm_domain_attach_by_name(struct device *dev,
+							  char *name)
+{
+	return NULL;
+}
 static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
 static inline void dev_pm_domain_set(struct device *dev,
 				     struct dev_pm_domain *pd) {}
-- 
cgit v1.2.3


From e88728f46cfbb59cc7e7acf1d230c05ec093764e Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Wed, 27 Jun 2018 18:20:55 +0530
Subject: driver core: Rename flag AUTOREMOVE to AUTOREMOVE_CONSUMER

Now that we want to add another flag to autoremove the device link
on supplier unbind, it's fair to rename the existing flag from
DL_FLAG_AUTOREMOVE to DL_FLAG_AUTOREMOVE_CONSUMER so that we can
add similar flag for supplier later.
And, while we are touching device.h, fix a doc build warning.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/device_link.rst |  8 ++++----
 drivers/base/core.c                      | 15 ++++++++-------
 drivers/gpu/drm/tegra/dc.c               |  2 +-
 drivers/gpu/ipu-v3/ipu-pre.c             |  3 ++-
 drivers/gpu/ipu-v3/ipu-prg.c             |  3 ++-
 drivers/soc/imx/gpc.c                    |  2 +-
 include/linux/device.h                   | 12 ++++++------
 7 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index 70e328e16aad..a005b904a264 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -81,10 +81,10 @@ integration is desired.
 Two other flags are specifically targeted at use cases where the device
 link is added from the consumer's ``->probe`` callback:  ``DL_FLAG_RPM_ACTIVE``
 can be specified to runtime resume the supplier upon addition of the
-device link.  ``DL_FLAG_AUTOREMOVE`` causes the device link to be automatically
-purged when the consumer fails to probe or later unbinds.  This obviates
-the need to explicitly delete the link in the ``->remove`` callback or in
-the error path of the ``->probe`` callback.
+device link.  ``DL_FLAG_AUTOREMOVE_CONSUMER`` causes the device link to be
+automatically purged when the consumer fails to probe or later unbinds.
+This obviates the need to explicitly delete the link in the ``->remove``
+callback or in the error path of the ``->probe`` callback.
 
 Limitations
 ===========
diff --git a/drivers/base/core.c b/drivers/base/core.c
index df3e1a44707a..14c1e3151e08 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -178,10 +178,10 @@ void device_pm_move_to_tail(struct device *dev)
  * of the link.  If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
  * ignored.
  *
- * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically
- * when the consumer device driver unbinds from it.  The combination of both
- * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL
- * to be returned.
+ * If the DL_FLAG_AUTOREMOVE_CONSUMER is set, the link will be removed
+ * automatically when the consumer device driver unbinds from it.
+ * The combination of both DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_STATELESS
+ * set is invalid and will cause NULL to be returned.
  *
  * A side effect of the link creation is re-ordering of dpm_list and the
  * devices_kset list by moving the consumer device and all devices depending
@@ -198,7 +198,8 @@ struct device_link *device_link_add(struct device *consumer,
 	struct device_link *link;
 
 	if (!consumer || !supplier ||
-	    ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE)))
+	    ((flags & DL_FLAG_STATELESS) &&
+	     (flags & DL_FLAG_AUTOREMOVE_CONSUMER)))
 		return NULL;
 
 	device_links_write_lock();
@@ -479,7 +480,7 @@ static void __device_links_no_driver(struct device *dev)
 		if (link->flags & DL_FLAG_STATELESS)
 			continue;
 
-		if (link->flags & DL_FLAG_AUTOREMOVE)
+		if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
 			kref_put(&link->kref, __device_link_del);
 		else if (link->status != DL_STATE_SUPPLIER_UNBIND)
 			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
@@ -515,7 +516,7 @@ void device_links_driver_cleanup(struct device *dev)
 		if (link->flags & DL_FLAG_STATELESS)
 			continue;
 
-		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE);
+		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
 		WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index c3afe7b2237e..965088afcfad 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2312,7 +2312,7 @@ static int tegra_dc_couple(struct tegra_dc *dc)
 	 * POWER_CONTROL registers during CRTC enabling.
 	 */
 	if (dc->soc->coupled_pm && dc->pipe == 1) {
-		u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE;
+		u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER;
 		struct device_link *link;
 		struct device *partner;
 
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index 0f70e8847540..2f8db9d62551 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -128,7 +128,8 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index)
 	list_for_each_entry(pre, &ipu_pre_list, list) {
 		if (pre_node == pre->dev->of_node) {
 			mutex_unlock(&ipu_pre_list_mutex);
-			device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE);
+			device_link_add(dev, pre->dev,
+					DL_FLAG_AUTOREMOVE_CONSUMER);
 			of_node_put(pre_node);
 			return pre;
 		}
diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 83f9dd934a5d..38a3a9764e49 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -100,7 +100,8 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id)
 	list_for_each_entry(prg, &ipu_prg_list, list) {
 		if (prg_node == prg->dev->of_node) {
 			mutex_unlock(&ipu_prg_list_mutex);
-			device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE);
+			device_link_add(dev, prg->dev,
+					DL_FLAG_AUTOREMOVE_CONSUMER);
 			prg->id = ipu_id;
 			of_node_put(prg_node);
 			return prg;
diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
index 32f0748fd067..aa9e65bc965e 100644
--- a/drivers/soc/imx/gpc.c
+++ b/drivers/soc/imx/gpc.c
@@ -202,7 +202,7 @@ static int imx_pgc_power_domain_probe(struct platform_device *pdev)
 			goto genpd_err;
 	}
 
-	device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE);
+	device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE_CONSUMER);
 
 	return 0;
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..3929805cdd59 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -90,7 +90,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @num_vf:	Called to find out how many virtual functions a device on this
  *		bus supports.
  * @dma_configure:	Called to setup DMA configuration on a device on
-			this bus.
+ *			this bus.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
  * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
@@ -784,14 +784,14 @@ enum device_link_state {
  * Device link flags.
  *
  * STATELESS: The core won't track the presence of supplier/consumer drivers.
- * AUTOREMOVE: Remove this link automatically on consumer driver unbind.
+ * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind.
  * PM_RUNTIME: If set, the runtime PM framework will use this link.
  * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
  */
-#define DL_FLAG_STATELESS	BIT(0)
-#define DL_FLAG_AUTOREMOVE	BIT(1)
-#define DL_FLAG_PM_RUNTIME	BIT(2)
-#define DL_FLAG_RPM_ACTIVE	BIT(3)
+#define DL_FLAG_STATELESS		BIT(0)
+#define DL_FLAG_AUTOREMOVE_CONSUMER	BIT(1)
+#define DL_FLAG_PM_RUNTIME		BIT(2)
+#define DL_FLAG_RPM_ACTIVE		BIT(3)
 
 /**
  * struct device_link - Device link representation.
-- 
cgit v1.2.3


From 1689cac5b32a6db6f812e8063ea418a7cf023d03 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Wed, 27 Jun 2018 18:20:56 +0530
Subject: driver core: Add flag to autoremove device link on supplier unbind

Add a flag to autoremove the device links on supplier driver
unbind. This obviates the need to explicitly delete the link
in the remove path.
We remove these links only when the supplier's link to its
consumers has gone to DL_STATE_SUPPLIER_UNBIND state.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Suggested-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/device_link.rst |  4 ++++
 drivers/base/core.c                      | 10 ++++++++++
 include/linux/device.h                   |  2 ++
 3 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index a005b904a264..d6763272e747 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -86,6 +86,10 @@ automatically purged when the consumer fails to probe or later unbinds.
 This obviates the need to explicitly delete the link in the ``->remove``
 callback or in the error path of the ``->probe`` callback.
 
+Similarly, when the device link is added from supplier's ``->probe`` callback,
+``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically
+purged when the supplier fails to probe or later unbinds.
+
 Limitations
 ===========
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 14c1e3151e08..e721218bf352 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -518,6 +518,16 @@ void device_links_driver_cleanup(struct device *dev)
 
 		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
 		WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
+
+		/*
+		 * autoremove the links between this @dev and its consumer
+		 * devices that are not active, i.e. where the link state
+		 * has moved to DL_STATE_SUPPLIER_UNBIND.
+		 */
+		if (link->status == DL_STATE_SUPPLIER_UNBIND &&
+		    link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
+			kref_put(&link->kref, __device_link_del);
+
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 3929805cdd59..e80920452b49 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -787,11 +787,13 @@ enum device_link_state {
  * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind.
  * PM_RUNTIME: If set, the runtime PM framework will use this link.
  * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
+ * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind.
  */
 #define DL_FLAG_STATELESS		BIT(0)
 #define DL_FLAG_AUTOREMOVE_CONSUMER	BIT(1)
 #define DL_FLAG_PM_RUNTIME		BIT(2)
 #define DL_FLAG_RPM_ACTIVE		BIT(3)
+#define DL_FLAG_AUTOREMOVE_SUPPLIER	BIT(4)
 
 /**
  * struct device_link - Device link representation.
-- 
cgit v1.2.3


From 63f3fb8d7ccb813e0a1f2ceedb9a3ebe2685b620 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 2 Jul 2018 15:59:37 -0700
Subject: pinctrl: Document pin_config_group_get() return codes like
 pin_config_get()

The pinconf_generic_dump_one() function makes the assumption that
pin_config_group_get() should return -EINVAL and -ENOTSUPP just like
pin_config_get() does.  Document that so it's more obvious.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index 09eb80f2574a..8dd85d302b90 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -28,7 +28,8 @@ struct seq_file;
  *	is not available on this controller this should return -ENOTSUPP
  *	and if it is available but disabled it should return -EINVAL
  * @pin_config_set: configure an individual pin
- * @pin_config_group_get: get configurations for an entire pin group
+ * @pin_config_group_get: get configurations for an entire pin group; should
+ *	return -ENOTSUPP and -EINVAL using the same rules as pin_config_get.
  * @pin_config_group_set: configure all pins in a group
  * @pin_config_dbg_parse_modify: optional debugfs to modify a pin configuration
  * @pin_config_dbg_show: optional debugfs display hook that will provide
-- 
cgit v1.2.3


From 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 27 Jun 2018 16:20:28 +0200
Subject: printk/nmi: Prevent deadlock when accessing the main log buffer in
 NMI

The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI
when logbuf_lock is available") brought back the possible deadlocks
in printk() and NMI.

The check of logbuf_lock is done only in printk_nmi_enter() to prevent
mixed output. But another CPU might take the lock later, enter NMI, and:

      + Both NMIs might be serialized by yet another lock, for example,
	the one in nmi_cpu_backtrace().

      + The other CPU might get stopped in NMI, see smp_send_stop()
	in panic().

The only safe solution is to use trylock when storing the message
into the main log-buffer. It might cause reordering when some lines
go to the main lock buffer directly and others are delayed via
the per-CPU buffer. It means that it is not useful in general.

This patch replaces the problematic NMI deferred context with NMI
direct context. It can be used to mark a code that might produce
many messages in NMI and the risk of losing them is more critical
than problems with eventual reordering.

The context is then used when dumping trace buffers on oops. It was
the primary motivation for the original fix. Also the reordering is
even smaller issue there because some traces have their own time stamps.

Finally, nmi_cpu_backtrace() need not longer be serialized because
it will always us the per-CPU buffers again.

Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available")
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@suse.com
To: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h      |  4 ++++
 kernel/printk/internal.h    |  9 ++++++-
 kernel/printk/printk_safe.c | 58 +++++++++++++++++++++++++++++----------------
 kernel/trace/trace.c        |  4 +++-
 lib/nmi_backtrace.c         |  3 ---
 5 files changed, 52 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 6d7e800affd8..3ede9f46a494 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -148,9 +148,13 @@ void early_printk(const char *s, ...) { }
 #ifdef CONFIG_PRINTK_NMI
 extern void printk_nmi_enter(void);
 extern void printk_nmi_exit(void);
+extern void printk_nmi_direct_enter(void);
+extern void printk_nmi_direct_exit(void);
 #else
 static inline void printk_nmi_enter(void) { }
 static inline void printk_nmi_exit(void) { }
+static inline void printk_nmi_direct_enter(void) { }
+static inline void printk_nmi_direct_exit(void) { }
 #endif /* PRINTK_NMI */
 
 #ifdef CONFIG_PRINTK
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 2a7d04049af4..0f1898820cba 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -19,11 +19,16 @@
 #ifdef CONFIG_PRINTK
 
 #define PRINTK_SAFE_CONTEXT_MASK	 0x3fffffff
-#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000
+#define PRINTK_NMI_DIRECT_CONTEXT_MASK	 0x40000000
 #define PRINTK_NMI_CONTEXT_MASK		 0x80000000
 
 extern raw_spinlock_t logbuf_lock;
 
+__printf(5, 0)
+int vprintk_store(int facility, int level,
+		  const char *dict, size_t dictlen,
+		  const char *fmt, va_list args);
+
 __printf(1, 0) int vprintk_default(const char *fmt, va_list args);
 __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
 __printf(1, 0) int vprintk_func(const char *fmt, va_list args);
@@ -54,6 +59,8 @@ void __printk_safe_exit(void);
 		local_irq_enable();		\
 	} while (0)
 
+void defer_console_output(void);
+
 #else
 
 __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index d7d091309054..a0a74c533e4b 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -308,24 +308,33 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
 
 void printk_nmi_enter(void)
 {
-	/*
-	 * The size of the extra per-CPU buffer is limited. Use it only when
-	 * the main one is locked. If this CPU is not in the safe context,
-	 * the lock must be taken on another CPU and we could wait for it.
-	 */
-	if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) &&
-	    raw_spin_is_locked(&logbuf_lock)) {
-		this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
-	} else {
-		this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);
-	}
+	this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
 }
 
 void printk_nmi_exit(void)
 {
-	this_cpu_and(printk_context,
-		     ~(PRINTK_NMI_CONTEXT_MASK |
-		       PRINTK_NMI_DEFERRED_CONTEXT_MASK));
+	this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
+}
+
+/*
+ * Marks a code that might produce many messages in NMI context
+ * and the risk of losing them is more critical than eventual
+ * reordering.
+ *
+ * It has effect only when called in NMI context. Then printk()
+ * will try to store the messages into the main logbuf directly
+ * and use the per-CPU buffers only as a fallback when the lock
+ * is not available.
+ */
+void printk_nmi_direct_enter(void)
+{
+	if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
+		this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK);
+}
+
+void printk_nmi_direct_exit(void)
+{
+	this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
 }
 
 #else
@@ -363,6 +372,20 @@ void __printk_safe_exit(void)
 
 __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 {
+	/*
+	 * Try to use the main logbuf even in NMI. But avoid calling console
+	 * drivers that might have their own locks.
+	 */
+	if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
+	    raw_spin_trylock(&logbuf_lock)) {
+		int len;
+
+		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+		raw_spin_unlock(&logbuf_lock);
+		defer_console_output();
+		return len;
+	}
+
 	/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
 	if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
 		return vprintk_nmi(fmt, args);
@@ -371,13 +394,6 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 	if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
 		return vprintk_safe(fmt, args);
 
-	/*
-	 * Use the main logbuf when logbuf_lock is available in NMI.
-	 * But avoid calling console drivers that might have their own locks.
-	 */
-	if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK)
-		return vprintk_deferred(fmt, args);
-
 	/* No obstacles. */
 	return vprintk_default(fmt, args);
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bcd93031d042..f106ad12f72f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8265,6 +8265,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 	tracing_off();
 
 	local_irq_save(flags);
+	printk_nmi_direct_enter();
 
 	/* Simulate the iterator */
 	trace_init_global_iter(&iter);
@@ -8344,7 +8345,8 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 	for_each_tracing_cpu(cpu) {
 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
 	}
- 	atomic_dec(&dump_running);
+	atomic_dec(&dump_running);
+	printk_nmi_direct_exit();
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(ftrace_dump);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 61a6b5aab07e..15ca78e1c7d4 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 
 bool nmi_cpu_backtrace(struct pt_regs *regs)
 {
-	static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
 	int cpu = smp_processor_id();
 
 	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-		arch_spin_lock(&lock);
 		if (regs && cpu_in_idle(instruction_pointer(regs))) {
 			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
 				cpu, (void *)instruction_pointer(regs));
@@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
 			else
 				dump_stack();
 		}
-		arch_spin_unlock(&lock);
 		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
 		return true;
 	}
-- 
cgit v1.2.3


From 6b1d83d274486615cc341e410467a98fd9c27c0a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:19 -0700
Subject: block: Remove bdev_nr_zones()

Remove this function since it has no callers. This function was
introduced in commit 6cc77e9cb080 ("block: introduce zoned block
devices zone write locking").

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matias Bjorling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79226ca8f80f..49a400afb146 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1639,15 +1639,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
 	return 0;
 }
 
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (q)
-		return blk_queue_nr_zones(q);
-	return 0;
-}
-
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
-- 
cgit v1.2.3


From 7c8542b7982264226cf94102950343185869b584 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:20 -0700
Subject: block: Inline blk_queue_nr_zones()

Since the implementation of blk_queue_nr_zones() is trivial and since
it only has a single caller, inline this function.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 2 +-
 include/linux/blkdev.h | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 1c4532e92938..26e1f8e425a8 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -214,7 +214,7 @@ static int queue_zone_wlock_show(void *data, struct seq_file *m)
 	if (!q->seq_zones_wlock)
 		return 0;
 
-	for (i = 0; i < blk_queue_nr_zones(q); i++)
+	for (i = 0; i < q->nr_zones; i++)
 		if (test_bit(i, q->seq_zones_wlock))
 			seq_printf(m, "%u\n", i);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49a400afb146..905daa7c647e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -800,11 +800,6 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
-{
-	return q->nr_zones;
-}
-
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
-- 
cgit v1.2.3


From 6a5ac9846508ad7d1d23881d9d5add35f2e6ae71 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:21 -0700
Subject: block: Make struct request_queue smaller for CONFIG_BLK_DEV_ZONED=n

Exclude zoned block device members from struct request_queue for
CONFIG_BLK_DEV_ZONED == n. Avoid breaking the build by only building
the code that uses these struct request_queue members if
CONFIG_BLK_DEV_ZONED != n.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig                |  4 ++++
 block/Makefile               |  1 +
 block/blk-mq-debugfs-zoned.c | 24 ++++++++++++++++++++++++
 block/blk-mq-debugfs.c       | 15 ---------------
 block/blk-mq-debugfs.h       |  9 +++++++++
 include/linux/blkdev.h       |  6 ++++++
 6 files changed, 44 insertions(+), 15 deletions(-)
 create mode 100644 block/blk-mq-debugfs-zoned.c

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index eb50fd4977c2..dfe7bc770fc9 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -177,6 +177,10 @@ config BLK_DEBUG_FS
 	Unless you are building a kernel for a tiny system, you should
 	say Y here.
 
+config BLK_DEBUG_FS_ZONED
+       bool
+       default BLK_DEBUG_FS && BLK_DEV_ZONED
+
 config BLK_SED_OPAL
 	bool "Logic for interfacing with Opal enabled SEDs"
 	---help---
diff --git a/block/Makefile b/block/Makefile
index 6a56303b9925..a8f94cdb75c3 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -34,4 +34,5 @@ obj-$(CONFIG_BLK_MQ_RDMA)	+= blk-mq-rdma.o
 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
 obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
 obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
+obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
diff --git a/block/blk-mq-debugfs-zoned.c b/block/blk-mq-debugfs-zoned.c
new file mode 100644
index 000000000000..fb2c82c351e4
--- /dev/null
+++ b/block/blk-mq-debugfs-zoned.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 Western Digital Corporation or its affiliates.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/blkdev.h>
+#include "blk-mq-debugfs.h"
+
+int queue_zone_wlock_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	unsigned int i;
+
+	if (!q->seq_zones_wlock)
+		return 0;
+
+	for (i = 0; i < q->nr_zones; i++)
+		if (test_bit(i, q->seq_zones_wlock))
+			seq_printf(m, "%u\n", i);
+
+	return 0;
+}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 26e1f8e425a8..7efe268e4447 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -206,21 +206,6 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
 	return count;
 }
 
-static int queue_zone_wlock_show(void *data, struct seq_file *m)
-{
-	struct request_queue *q = data;
-	unsigned int i;
-
-	if (!q->seq_zones_wlock)
-		return 0;
-
-	for (i = 0; i < q->nr_zones; i++)
-		if (test_bit(i, q->seq_zones_wlock))
-			seq_printf(m, "%u\n", i);
-
-	return 0;
-}
-
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{ "poll_stat", 0400, queue_poll_stat_show },
 	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index b9d366e57097..a9160be12be0 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -80,4 +80,13 @@ static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hc
 }
 #endif
 
+#ifdef CONFIG_BLK_DEBUG_FS_ZONED
+int queue_zone_wlock_show(void *data, struct seq_file *m);
+#else
+static inline int queue_zone_wlock_show(void *data, struct seq_file *m)
+{
+	return 0;
+}
+#endif
+
 #endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 905daa7c647e..ca5a8b046894 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -592,6 +592,7 @@ struct request_queue {
 
 	struct queue_limits	limits;
 
+#ifdef CONFIG_BLK_DEV_ZONED
 	/*
 	 * Zoned block device information for request dispatch control.
 	 * nr_zones is the total number of zones of the device. This is always
@@ -612,6 +613,7 @@ struct request_queue {
 	unsigned int		nr_zones;
 	unsigned long		*seq_zones_bitmap;
 	unsigned long		*seq_zones_wlock;
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 	/*
 	 * sg stuff
@@ -800,6 +802,7 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
@@ -815,6 +818,7 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 		return false;
 	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
 }
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline bool rq_is_sync(struct request *rq)
 {
@@ -1065,6 +1069,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
 	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
@@ -1074,6 +1079,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
 {
 	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
 }
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 /*
  * Some commands like WRITE SAME have a payload or data transfer size which
-- 
cgit v1.2.3


From 5815839b3ca16bb1d45939270871169f6803a121 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 25 Jun 2018 19:31:47 +0800
Subject: blk-mq: introduce new lock for protecting hctx->dispatch_wait

Now hctx->lock is only acquired when adding hctx->dispatch_wait to
one wait queue, but not held when removing it from the wait queue.

IO hang can be observed easily if SCHED RESTART is disabled, that means
now RESTART exits just for fixing the issue in blk_mq_mark_tag_wait().

This patch fixes the issue by introducing hctx->dispatch_wait_lock and
holding it for removing hctx->dispatch_wait in blk_mq_dispatch_wake(),
since we need to avoid acquiring hctx->lock in irq context.

Fixes: eb619fdb2d4cb8b3d3419 ("blk-mq: fix issue with shared tag queue re-running")
Cc: Christoph Hellwig <hch@lst.de>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 26 +++++++++++++++++---------
 include/linux/blk-mq.h |  1 +
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9eee896a3592..df84281f6af6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -998,7 +998,10 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
 
 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
 
+	spin_lock(&hctx->dispatch_wait_lock);
 	list_del_init(&wait->entry);
+	spin_unlock(&hctx->dispatch_wait_lock);
+
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
 }
@@ -1012,7 +1015,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
 static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 				 struct request *rq)
 {
-	struct sbq_wait_state *ws;
+	struct wait_queue_head *wq;
 	wait_queue_entry_t *wait;
 	bool ret;
 
@@ -1035,14 +1038,18 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	if (!list_empty_careful(&wait->entry))
 		return false;
 
-	spin_lock(&hctx->lock);
+	wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait;
+
+	spin_lock_irq(&wq->lock);
+	spin_lock(&hctx->dispatch_wait_lock);
 	if (!list_empty(&wait->entry)) {
-		spin_unlock(&hctx->lock);
+		spin_unlock(&hctx->dispatch_wait_lock);
+		spin_unlock_irq(&wq->lock);
 		return false;
 	}
 
-	ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
-	add_wait_queue(&ws->wait, wait);
+	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue(wq, wait);
 
 	/*
 	 * It's possible that a tag was freed in the window between the
@@ -1051,7 +1058,8 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	 */
 	ret = blk_mq_get_driver_tag(rq);
 	if (!ret) {
-		spin_unlock(&hctx->lock);
+		spin_unlock(&hctx->dispatch_wait_lock);
+		spin_unlock_irq(&wq->lock);
 		return false;
 	}
 
@@ -1059,10 +1067,9 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	 * We got a tag, remove ourselves from the wait queue to ensure
 	 * someone else gets the wakeup.
 	 */
-	spin_lock_irq(&ws->wait.lock);
 	list_del_init(&wait->entry);
-	spin_unlock_irq(&ws->wait.lock);
-	spin_unlock(&hctx->lock);
+	spin_unlock(&hctx->dispatch_wait_lock);
+	spin_unlock_irq(&wq->lock);
 
 	return true;
 }
@@ -2142,6 +2149,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 
 	hctx->nr_ctx = 0;
 
+	spin_lock_init(&hctx->dispatch_wait_lock);
 	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
 	INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3147eb74222..ea690254dab7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,6 +39,7 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 
+	spinlock_t		dispatch_wait_lock;
 	wait_queue_entry_t	dispatch_wait;
 	atomic_t		wait_index;
 
-- 
cgit v1.2.3


From 97889f9ac24f8d2fc8e703ea7f80c162bab10d4d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 25 Jun 2018 19:31:48 +0800
Subject: blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()

We have to remove synchronize_rcu() from blk_queue_cleanup(),
otherwise long delay can be caused during lun probe. For removing
it, we have to avoid to iterate the set->tag_list in IO path, eg,
blk_mq_sched_restart().

This patch reverts 5b79413946d (Revert "blk-mq: don't handle
TAG_SHARED in restart"). Given we have fixed enough IO hang issue,
and there isn't any reason to restart all queues in one tags any more,
see the following reasons:

1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(),
which can wake up queues waiting for driver tag.

2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue,
target or host is ready, but SCSI built-in restart can cover all these well,
see scsi_end_request(), queue will be rerun after any request initiated from
this host/target is completed.

In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30%
when running I/O on these 8 luns concurrently.

Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list")
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Reported-by: Andrew Jones <drjones@redhat.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   | 85 +++-----------------------------------------------
 block/blk-mq.c         | 10 ++----
 include/linux/blkdev.h |  2 --
 3 files changed, 7 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 56c493c6cd90..4e027f6108ae 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
 		return;
 
-	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-		struct request_queue *q = hctx->queue;
-
-		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-			atomic_inc(&q->shared_hctx_restart);
-	} else
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 {
 	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		return false;
-
-	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-		struct request_queue *q = hctx->queue;
-
-		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-			atomic_dec(&q->shared_hctx_restart);
-	} else
-		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+		return;
+	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 
-	return blk_mq_run_hw_queue(hctx, true);
+	blk_mq_run_hw_queue(hctx, true);
 }
 
 /*
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-/**
- * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
- * @pos:    loop cursor.
- * @skip:   the list element that will not be examined. Iteration starts at
- *          @skip->next.
- * @head:   head of the list to examine. This list must have at least one
- *          element, namely @skip.
- * @member: name of the list_head structure within typeof(*pos).
- */
-#define list_for_each_entry_rcu_rr(pos, skip, head, member)		\
-	for ((pos) = (skip);						\
-	     (pos = (pos)->member.next != (head) ? list_entry_rcu(	\
-			(pos)->member.next, typeof(*pos), member) :	\
-	      list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
-	     (pos) != (skip); )
-
-/*
- * Called after a driver tag has been freed to check whether a hctx needs to
- * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
- * queues in a round-robin fashion if the tag set of @hctx is shared with other
- * hardware queues.
- */
-void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
-{
-	struct blk_mq_tags *const tags = hctx->tags;
-	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
-	struct request_queue *const queue = hctx->queue, *q;
-	struct blk_mq_hw_ctx *hctx2;
-	unsigned int i, j;
-
-	if (set->flags & BLK_MQ_F_TAG_SHARED) {
-		/*
-		 * If this is 0, then we know that no hardware queues
-		 * have RESTART marked. We're done.
-		 */
-		if (!atomic_read(&queue->shared_hctx_restart))
-			return;
-
-		rcu_read_lock();
-		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
-					   tag_set_list) {
-			queue_for_each_hw_ctx(q, hctx2, i)
-				if (hctx2->tags == tags &&
-				    blk_mq_sched_restart_hctx(hctx2))
-					goto done;
-		}
-		j = hctx->queue_num + 1;
-		for (i = 0; i < queue->nr_hw_queues; i++, j++) {
-			if (j == queue->nr_hw_queues)
-				j = 0;
-			hctx2 = queue->queue_hw_ctx[j];
-			if (hctx2->tags == tags &&
-			    blk_mq_sched_restart_hctx(hctx2))
-				break;
-		}
-done:
-		rcu_read_unlock();
-	} else {
-		blk_mq_sched_restart_hctx(hctx);
-	}
-}
-
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 				 bool run_queue, bool async)
 {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df84281f6af6..7c6ff13171ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2335,15 +2335,10 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (shared) {
-			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-				atomic_inc(&q->shared_hctx_restart);
+		if (shared)
 			hctx->flags |= BLK_MQ_F_TAG_SHARED;
-		} else {
-			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-				atomic_dec(&q->shared_hctx_restart);
+		else
 			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
-		}
 	}
 }
 
@@ -2374,7 +2369,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
 		blk_mq_update_tag_set_depth(set, false);
 	}
 	mutex_unlock(&set->tag_list_lock);
-	synchronize_rcu();
 	INIT_LIST_HEAD(&q->tag_set_list);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca5a8b046894..9d05646d5059 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -442,8 +442,6 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
-	atomic_t		shared_hctx_restart;
-
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
-- 
cgit v1.2.3


From 6e768717304bdbe8d2897ca8298f6b58863fdc41 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 3 Jul 2018 09:03:16 -0600
Subject: blk-mq: dequeue request one by one from sw queue if hctx is busy

It won't be efficient to dequeue request one by one from sw queue,
but we have to do that when queue is busy for better merge performance.

This patch takes the Exponential Weighted Moving Average(EWMA) to figure
out if queue is busy, then only dequeue request one by one from sw queue
when queue is busy.

Fixes: b347689ffbca ("blk-mq-sched: improve dispatching from sw queue")
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Reported-by: Kashyap Desai <kashyap.desai@broadcom.com>
Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c |  9 +++++++++
 block/blk-mq-sched.c   | 11 ++---------
 block/blk-mq.c         | 33 ++++++++++++++++++++++++++++++++-
 include/linux/blk-mq.h |  3 ++-
 4 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 7efe268e4447..cb1e6cf7ac48 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -622,6 +622,14 @@ static int hctx_active_show(void *data, struct seq_file *m)
 	return 0;
 }
 
+static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+
+	seq_printf(m, "%u\n", hctx->dispatch_busy);
+	return 0;
+}
+
 static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
 	__acquires(&ctx->lock)
 {
@@ -783,6 +791,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"queued", 0600, hctx_queued_show, hctx_queued_write},
 	{"run", 0600, hctx_run_show, hctx_run_write},
 	{"active", 0400, hctx_active_show},
+	{"dispatch_busy", 0400, hctx_dispatch_busy_show},
 	{},
 };
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index f3b4b5ceb4d1..fdc129e64cc4 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -206,15 +206,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 		}
 	} else if (has_sched_dispatch) {
 		blk_mq_do_dispatch_sched(hctx);
-	} else if (q->mq_ops->get_budget) {
-		/*
-		 * If we need to get budget before queuing request, we
-		 * dequeue request one by one from sw queue for avoiding
-		 * to mess up I/O merge when dispatch runs out of resource.
-		 *
-		 * TODO: get more budgets, and dequeue more requests in
-		 * one time.
-		 */
+	} else if (hctx->dispatch_busy) {
+		/* dequeue request one by one from sw queue if queue is busy */
 		blk_mq_do_dispatch_ctx(hctx);
 	} else {
 		blk_mq_flush_busy_ctxs(hctx, &rq_list);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 795ba859b16b..850fdd02c385 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1074,6 +1074,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	return true;
 }
 
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+	unsigned int ewma;
+
+	if (hctx->queue->elevator)
+		return;
+
+	ewma = hctx->dispatch_busy;
+
+	if (!ewma && !busy)
+		return;
+
+	ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+	if (busy)
+		ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+	ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+	hctx->dispatch_busy = ewma;
+}
+
 #define BLK_MQ_RESOURCE_DELAY	3		/* ms units */
 
 /*
@@ -1210,8 +1239,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 		else if (needs_restart && (ret == BLK_STS_RESOURCE))
 			blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
 
+		blk_mq_update_dispatch_busy(hctx, true);
 		return false;
-	}
+	} else
+		blk_mq_update_dispatch_busy(hctx, false);
 
 	/*
 	 * If the host/device is unable to accept more work, inform the
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ea690254dab7..d710e92874cc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -35,9 +35,10 @@ struct blk_mq_hw_ctx {
 	struct sbitmap		ctx_map;
 
 	struct blk_mq_ctx	*dispatch_from;
+	unsigned int		dispatch_busy;
 
-	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
+	struct blk_mq_ctx	**ctxs;
 
 	spinlock_t		dispatch_wait_lock;
 	wait_queue_entry_t	dispatch_wait;
-- 
cgit v1.2.3


From 08e18eab0c579ad84399c1899c11899734854eb2 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:50 -0400
Subject: block: add bi_blkg to the bio for cgroups

Currently io.low uses a bi_cg_private to stash its private data for the
blkg, however other blkcg policies may want to use this as well.  Since
we can get the private data out of the blkg, move this to bi_blkg in the
bio and make it generic, then we can use bio_associate_blkg() to attach
the blkg to the bio.

Theoretically we could simply replace the bi_css with this since we can
get to all the same information from the blkg, however you have to
lookup the blkg, so for example wbc_init_bio() would have to lookup and
possibly allocate the blkg for the css it was trying to attach to the
bio.  This could be problematic and result in us either not attaching
the css at all to the bio, or falling back to the root blkcg if we are
unable to allocate the corresponding blkg.

So for now do this, and in the future if possible we could just replace
the bi_css with bi_blkg and update the helpers to do the correct
translation.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               | 23 +++++++++++++++++++++++
 block/blk-throttle.c      | 21 +++++++--------------
 include/linux/bio.h       |  1 +
 include/linux/blk_types.h |  2 +-
 4 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 67eff5eddc49..044571538574 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -28,6 +28,7 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/cgroup.h>
+#include <linux/blk-cgroup.h>
 
 #include <trace/events/block.h>
 #include "blk.h"
@@ -2036,6 +2037,24 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
 }
 EXPORT_SYMBOL_GPL(bio_associate_blkcg);
 
+/**
+ * bio_associate_blkg - associate a bio with the specified blkg
+ * @bio: target bio
+ * @blkg: the blkg to associate
+ *
+ * Associate @bio with the blkg specified by @blkg.  This is the queue specific
+ * blkcg information associated with the @bio, a reference will be taken on the
+ * @blkg and will be freed when the bio is freed.
+ */
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
+{
+	if (unlikely(bio->bi_blkg))
+		return -EBUSY;
+	blkg_get(blkg);
+	bio->bi_blkg = blkg;
+	return 0;
+}
+
 /**
  * bio_disassociate_task - undo bio_associate_current()
  * @bio: target bio
@@ -2050,6 +2069,10 @@ void bio_disassociate_task(struct bio *bio)
 		css_put(bio->bi_css);
 		bio->bi_css = NULL;
 	}
+	if (bio->bi_blkg) {
+		blkg_put(bio->bi_blkg);
+		bio->bi_blkg = NULL;
+	}
 }
 
 /**
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 63bb261811dd..caaabbe8a7a5 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2134,12 +2134,8 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
 {
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	if (bio->bi_css) {
-		if (bio->bi_cg_private)
-			blkg_put(tg_to_blkg(bio->bi_cg_private));
-		bio->bi_cg_private = tg;
-		blkg_get(tg_to_blkg(tg));
-	}
+	if (bio->bi_css)
+		bio_associate_blkg(bio, tg_to_blkg(tg));
 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
 #endif
 }
@@ -2287,6 +2283,7 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
 
 void blk_throtl_bio_endio(struct bio *bio)
 {
+	struct blkcg_gq *blkg;
 	struct throtl_grp *tg;
 	u64 finish_time_ns;
 	unsigned long finish_time;
@@ -2294,20 +2291,18 @@ void blk_throtl_bio_endio(struct bio *bio)
 	unsigned long lat;
 	int rw = bio_data_dir(bio);
 
-	tg = bio->bi_cg_private;
-	if (!tg)
+	blkg = bio->bi_blkg;
+	if (!blkg)
 		return;
-	bio->bi_cg_private = NULL;
+	tg = blkg_to_tg(blkg);
 
 	finish_time_ns = ktime_get_ns();
 	tg->last_finish_time = finish_time_ns >> 10;
 
 	start_time = bio_issue_time(&bio->bi_issue) >> 10;
 	finish_time = __bio_issue_time(finish_time_ns) >> 10;
-	if (!start_time || finish_time <= start_time) {
-		blkg_put(tg_to_blkg(tg));
+	if (!start_time || finish_time <= start_time)
 		return;
-	}
 
 	lat = finish_time - start_time;
 	/* this is only for bio based driver */
@@ -2336,8 +2331,6 @@ void blk_throtl_bio_endio(struct bio *bio)
 		tg->bio_cnt /= 2;
 		tg->bad_bio_cnt /= 2;
 	}
-
-	blkg_put(tg_to_blkg(tg));
 }
 #endif
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f08f5fe7bd08..a279ba384da9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -555,6 +555,7 @@ do {						\
 
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
 void bio_disassociate_task(struct bio *bio);
 void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3c4f390aea4b..3364d42ebe08 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -179,8 +179,8 @@ struct bio {
 	 */
 	struct io_context	*bi_ioc;
 	struct cgroup_subsys_state *bi_css;
+	struct blkcg_gq		*bi_blkg;
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	void			*bi_cg_private;
 	struct bio_issue	bi_issue;
 #endif
 #endif
-- 
cgit v1.2.3


From c7c98fd37653955d3a17dd4f1fa67aba070096a9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:51 -0400
Subject: block: introduce bio_issue_as_root_blkg

Instead of forcing all file systems to get the right context on their
bio's, simply check for REQ_META to see if we need to issue as the root
blkg.  We don't want to force all bio's to have the root blkg associated
with them if REQ_META is set, as some controllers (blk-iolatency) need
to know who the originating cgroup is so it can backcharge them for the
work they are doing.  This helper will make sure that the controllers do
the proper thing wrt the IO priority and backcharging.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6c666fd7de3c..69aa71dc0c04 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -238,6 +238,22 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
 	return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
+/**
+ * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
+ * @return: true if this bio needs to be submitted with the root blkg context.
+ *
+ * In order to avoid priority inversions we sometimes need to issue a bio as if
+ * it were attached to the root blkg, and then backcharge to the actual owning
+ * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
+ * bio and attach the appropriate blkg to the bio.  Then we call this helper and
+ * if it is true run with the root blkg for that queue and then do any
+ * backcharging to the originating cgroup once the io is complete.
+ */
+static inline bool bio_issue_as_root_blkg(struct bio *bio)
+{
+	return (bio->bi_opf & REQ_META);
+}
+
 /**
  * blkcg_parent - get the parent of a blkcg
  * @blkcg: blkcg of interest
-- 
cgit v1.2.3


From 903d23f0a354f226fa78f1c1c34b60aaf992e812 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:52 -0400
Subject: blk-cgroup: allow controllers to output their own stats

blk-iolatency has a few stats that it would like to print out, and
instead of adding a bunch of crap to the generic code just provide a
helper so that controllers can add stuff to the stat line if they want
to.

Hide it behind a boot option since it changes the output of io.stat from
normal, and these stats are only interesting to developers.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 47 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/blk-cgroup.h |  3 +++
 2 files changed, 47 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index eb85cb87c40f..7dc6f05cc44b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -50,6 +50,8 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 
 static LIST_HEAD(all_blkcgs);		/* protected by blkcg_pol_mutex */
 
+static bool blkcg_debug_stats = false;
+
 static bool blkcg_policy_enabled(struct request_queue *q,
 				 const struct blkcg_policy *pol)
 {
@@ -954,13 +956,25 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
 		const char *dname;
+		char *buf;
 		struct blkg_rwstat rwstat;
 		u64 rbytes, wbytes, rios, wios;
+		size_t size = seq_get_buf(sf, &buf), off = 0;
+		int i;
+		bool has_stats = false;
 
 		dname = blkg_dev_name(blkg);
 		if (!dname)
 			continue;
 
+		/*
+		 * Hooray string manipulation, count is the size written NOT
+		 * INCLUDING THE \0, so size is now count+1 less than what we
+		 * had before, but we want to start writing the next bit from
+		 * the \0 so we only add count to buf.
+		 */
+		off += scnprintf(buf+off, size-off, "%s ", dname);
+
 		spin_lock_irq(blkg->q->queue_lock);
 
 		rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
@@ -975,9 +989,33 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 
 		spin_unlock_irq(blkg->q->queue_lock);
 
-		if (rbytes || wbytes || rios || wios)
-			seq_printf(sf, "%s rbytes=%llu wbytes=%llu rios=%llu wios=%llu\n",
-				   dname, rbytes, wbytes, rios, wios);
+		if (rbytes || wbytes || rios || wios) {
+			has_stats = true;
+			off += scnprintf(buf+off, size-off,
+					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu",
+					 rbytes, wbytes, rios, wios);
+		}
+
+		if (!blkcg_debug_stats)
+			goto next;
+
+		for (i = 0; i < BLKCG_MAX_POLS; i++) {
+			struct blkcg_policy *pol = blkcg_policy[i];
+			size_t written;
+
+			if (!blkg->pd[i] || !pol->pd_stat_fn)
+				continue;
+
+			written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
+			if (written)
+				has_stats = true;
+			off += written;
+		}
+next:
+		if (has_stats) {
+			off += scnprintf(buf+off, size-off, "\n");
+			seq_commit(sf, off);
+		}
 	}
 
 	rcu_read_unlock();
@@ -1547,3 +1585,6 @@ out_unlock:
 	mutex_unlock(&blkcg_pol_register_mutex);
 }
 EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
+
+module_param(blkcg_debug_stats, bool, 0644);
+MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 69aa71dc0c04..b41292726c0f 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -148,6 +148,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
+typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
+				      size_t size);
 
 struct blkcg_policy {
 	int				plid;
@@ -167,6 +169,7 @@ struct blkcg_policy {
 	blkcg_pol_offline_pd_fn		*pd_offline_fn;
 	blkcg_pol_free_pd_fn		*pd_free_fn;
 	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
+	blkcg_pol_stat_pd_fn		*pd_stat_fn;
 };
 
 extern struct blkcg blkcg_root;
-- 
cgit v1.2.3


From 0d1e0c7cd5909d6c6aa0957179318e13fcca971a Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:53 -0400
Subject: blk: introduce REQ_SWAP

Just like REQ_META, it's important to know the IO coming down is swap
in order to guard against potential IO priority inversion issues with
cgroups.  Add REQ_SWAP and use it for all swap IO, and add it to our
bio_issue_as_root_blkg helper.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 2 +-
 include/linux/blk_types.h  | 3 ++-
 mm/page_io.c               | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index b41292726c0f..a8f9ba8f33a4 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -254,7 +254,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
  */
 static inline bool bio_issue_as_root_blkg(struct bio *bio)
 {
-	return (bio->bi_opf & REQ_META);
+	return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
 }
 
 /**
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3364d42ebe08..0ffc34c5cc83 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -329,7 +329,7 @@ enum req_flag_bits {
 
 	/* for driver use */
 	__REQ_DRV,
-
+	__REQ_SWAP,		/* swapping request. */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -351,6 +351,7 @@ enum req_flag_bits {
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
 
 #define REQ_DRV			(1ULL << __REQ_DRV)
+#define REQ_SWAP		(1ULL << __REQ_SWAP)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
diff --git a/mm/page_io.c b/mm/page_io.c
index b41cf9644585..a552cb37e220 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -338,7 +338,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		ret = -ENOMEM;
 		goto out;
 	}
-	bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
 	count_swpout_vm_event(page);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From 0d3bd88d54f513723602b361dccfc71639f50779 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 3 Jul 2018 11:14:54 -0400
Subject: swap,blkcg: issue swap io with the appropriate context

For backcharging we need to know who the page belongs to when swapping
it out.  We don't worry about things that do ->rw_page (zram etc) at the
moment, we're only worried about pages that actually go to a block
device.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 24 ++++++++++++++++++++++++
 include/linux/bio.h |  7 +++++++
 mm/page_io.c        |  1 +
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 044571538574..5f84f5c3887b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2015,6 +2015,30 @@ EXPORT_SYMBOL(bioset_init_from_src);
 
 #ifdef CONFIG_BLK_CGROUP
 
+#ifdef CONFIG_MEMCG
+/**
+ * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
+ * @bio: target bio
+ * @page: the page to lookup the blkcg from
+ *
+ * Associate @bio with the blkcg from @page's owning memcg.  This works like
+ * every other associate function wrt references.
+ */
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
+{
+	struct cgroup_subsys_state *blkcg_css;
+
+	if (unlikely(bio->bi_css))
+		return -EBUSY;
+	if (!page->mem_cgroup)
+		return 0;
+	blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
+				     &io_cgrp_subsys);
+	bio->bi_css = blkcg_css;
+	return 0;
+}
+#endif /* CONFIG_MEMCG */
+
 /**
  * bio_associate_blkcg - associate a bio with the specified blkcg
  * @bio: target bio
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a279ba384da9..a00dfff51aa5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -553,6 +553,13 @@ do {						\
 #define bio_dev(bio) \
 	disk_devt((bio)->bi_disk)
 
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
+#else
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+						struct page *page) {  return 0; }
+#endif
+
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
diff --git a/mm/page_io.c b/mm/page_io.c
index a552cb37e220..aafd19ec1db4 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,6 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		goto out;
 	}
 	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
+	bio_associate_blkcg_from_page(bio, page);
 	count_swpout_vm_event(page);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From d09d8df3a29403693d9d20cc34ed101f2c558e2b Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:55 -0400
Subject: blkcg: add generic throttling mechanism

Since IO can be issued from literally anywhere it's almost impossible to
do throttling without having some sort of adverse effect somewhere else
in the system because of locking or other dependencies.  The best way to
solve this is to do the throttling when we know we aren't holding any
other kernel resources.  Do this by tracking throttling in a per-blkg
basis, and if we require throttling flag the task that it needs to check
before it returns to user space and possibly sleep there.

This is to address the case where a process is doing work that is
generating IO that can't be throttled, whether that is directly with a
lot of REQ_META IO, or indirectly by allocating so much memory that it
is swamping the disk with REQ_SWAP.  We can't use task_add_work as we
don't want to induce a memory allocation in the IO path, so simply
saving the request queue in the task and flagging it to do the
notify_resume thing achieves the same result without the overhead of a
memory allocation.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c          | 220 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blk-cgroup.h  |  99 ++++++++++++++++++++
 include/linux/cgroup-defs.h |   3 +
 include/linux/sched.h       |   8 ++
 include/linux/tracehook.h   |   2 +
 5 files changed, 332 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7dc6f05cc44b..d3310ec96c2a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -27,6 +27,7 @@
 #include <linux/atomic.h>
 #include <linux/ctype.h>
 #include <linux/blk-cgroup.h>
+#include <linux/tracehook.h>
 #include "blk.h"
 
 #define MAX_KEY_LEN 100
@@ -999,6 +1000,14 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 		if (!blkcg_debug_stats)
 			goto next;
 
+		if (atomic_read(&blkg->use_delay)) {
+			has_stats = true;
+			off += scnprintf(buf+off, size-off,
+					 " use_delay=%d delay_nsec=%llu",
+					 atomic_read(&blkg->use_delay),
+					(unsigned long long)atomic64_read(&blkg->delay_nsec));
+		}
+
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];
 			size_t written;
@@ -1326,6 +1335,13 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css)
 	mutex_unlock(&blkcg_pol_mutex);
 }
 
+static void blkcg_exit(struct task_struct *tsk)
+{
+	if (tsk->throttle_queue)
+		blk_put_queue(tsk->throttle_queue);
+	tsk->throttle_queue = NULL;
+}
+
 struct cgroup_subsys io_cgrp_subsys = {
 	.css_alloc = blkcg_css_alloc,
 	.css_offline = blkcg_css_offline,
@@ -1335,6 +1351,7 @@ struct cgroup_subsys io_cgrp_subsys = {
 	.dfl_cftypes = blkcg_files,
 	.legacy_cftypes = blkcg_legacy_files,
 	.legacy_name = "blkio",
+	.exit = blkcg_exit,
 #ifdef CONFIG_MEMCG
 	/*
 	 * This ensures that, if available, memcg is automatically enabled
@@ -1586,5 +1603,208 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
 
+/*
+ * Scale the accumulated delay based on how long it has been since we updated
+ * the delay.  We only call this when we are adding delay, in case it's been a
+ * while since we added delay, and when we are checking to see if we need to
+ * delay a task, to account for any delays that may have occurred.
+ */
+static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
+{
+	u64 old = atomic64_read(&blkg->delay_start);
+
+	/*
+	 * We only want to scale down every second.  The idea here is that we
+	 * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
+	 * time window.  We only want to throttle tasks for recent delay that
+	 * has occurred, in 1 second time windows since that's the maximum
+	 * things can be throttled.  We save the current delay window in
+	 * blkg->last_delay so we know what amount is still left to be charged
+	 * to the blkg from this point onward.  blkg->last_use keeps track of
+	 * the use_delay counter.  The idea is if we're unthrottling the blkg we
+	 * are ok with whatever is happening now, and we can take away more of
+	 * the accumulated delay as we've already throttled enough that
+	 * everybody is happy with their IO latencies.
+	 */
+	if (time_before64(old + NSEC_PER_SEC, now) &&
+	    atomic64_cmpxchg(&blkg->delay_start, old, now) == old) {
+		u64 cur = atomic64_read(&blkg->delay_nsec);
+		u64 sub = min_t(u64, blkg->last_delay, now - old);
+		int cur_use = atomic_read(&blkg->use_delay);
+
+		/*
+		 * We've been unthrottled, subtract a larger chunk of our
+		 * accumulated delay.
+		 */
+		if (cur_use < blkg->last_use)
+			sub = max_t(u64, sub, blkg->last_delay >> 1);
+
+		/*
+		 * This shouldn't happen, but handle it anyway.  Our delay_nsec
+		 * should only ever be growing except here where we subtract out
+		 * min(last_delay, 1 second), but lord knows bugs happen and I'd
+		 * rather not end up with negative numbers.
+		 */
+		if (unlikely(cur < sub)) {
+			atomic64_set(&blkg->delay_nsec, 0);
+			blkg->last_delay = 0;
+		} else {
+			atomic64_sub(sub, &blkg->delay_nsec);
+			blkg->last_delay = cur - sub;
+		}
+		blkg->last_use = cur_use;
+	}
+}
+
+/*
+ * This is called when we want to actually walk up the hierarchy and check to
+ * see if we need to throttle, and then actually throttle if there is some
+ * accumulated delay.  This should only be called upon return to user space so
+ * we're not holding some lock that would induce a priority inversion.
+ */
+static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
+{
+	u64 now = ktime_to_ns(ktime_get());
+	u64 exp;
+	u64 delay_nsec = 0;
+	int tok;
+
+	while (blkg->parent) {
+		if (atomic_read(&blkg->use_delay)) {
+			blkcg_scale_delay(blkg, now);
+			delay_nsec = max_t(u64, delay_nsec,
+					   atomic64_read(&blkg->delay_nsec));
+		}
+		blkg = blkg->parent;
+	}
+
+	if (!delay_nsec)
+		return;
+
+	/*
+	 * Let's not sleep for all eternity if we've amassed a huge delay.
+	 * Swapping or metadata IO can accumulate 10's of seconds worth of
+	 * delay, and we want userspace to be able to do _something_ so cap the
+	 * delays at 1 second.  If there's 10's of seconds worth of delay then
+	 * the tasks will be delayed for 1 second for every syscall.
+	 */
+	delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
+
+	/*
+	 * TODO: the use_memdelay flag is going to be for the upcoming psi stuff
+	 * that hasn't landed upstream yet.  Once that stuff is in place we need
+	 * to do a psi_memstall_enter/leave if memdelay is set.
+	 */
+
+	exp = ktime_add_ns(now, delay_nsec);
+	tok = io_schedule_prepare();
+	do {
+		__set_current_state(TASK_KILLABLE);
+		if (!schedule_hrtimeout(&exp, HRTIMER_MODE_ABS))
+			break;
+	} while (!fatal_signal_pending(current));
+	io_schedule_finish(tok);
+}
+
+/**
+ * blkcg_maybe_throttle_current - throttle the current task if it has been marked
+ *
+ * This is only called if we've been marked with set_notify_resume().  Obviously
+ * we can be set_notify_resume() for reasons other than blkcg throttling, so we
+ * check to see if current->throttle_queue is set and if not this doesn't do
+ * anything.  This should only ever be called by the resume code, it's not meant
+ * to be called by people willy-nilly as it will actually do the work to
+ * throttle the task if it is setup for throttling.
+ */
+void blkcg_maybe_throttle_current(void)
+{
+	struct request_queue *q = current->throttle_queue;
+	struct cgroup_subsys_state *css;
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	bool use_memdelay = current->use_memdelay;
+
+	if (!q)
+		return;
+
+	current->throttle_queue = NULL;
+	current->use_memdelay = false;
+
+	rcu_read_lock();
+	css = kthread_blkcg();
+	if (css)
+		blkcg = css_to_blkcg(css);
+	else
+		blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
+
+	if (!blkcg)
+		goto out;
+	blkg = blkg_lookup(blkcg, q);
+	if (!blkg)
+		goto out;
+	blkg = blkg_try_get(blkg);
+	if (!blkg)
+		goto out;
+	rcu_read_unlock();
+	blk_put_queue(q);
+
+	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
+	blkg_put(blkg);
+	return;
+out:
+	rcu_read_unlock();
+	blk_put_queue(q);
+}
+EXPORT_SYMBOL_GPL(blkcg_maybe_throttle_current);
+
+/**
+ * blkcg_schedule_throttle - this task needs to check for throttling
+ * @q - the request queue IO was submitted on
+ * @use_memdelay - do we charge this to memory delay for PSI
+ *
+ * This is called by the IO controller when we know there's delay accumulated
+ * for the blkg for this task.  We do not pass the blkg because there are places
+ * we call this that may not have that information, the swapping code for
+ * instance will only have a request_queue at that point.  This set's the
+ * notify_resume for the task to check and see if it requires throttling before
+ * returning to user space.
+ *
+ * We will only schedule once per syscall.  You can call this over and over
+ * again and it will only do the check once upon return to user space, and only
+ * throttle once.  If the task needs to be throttled again it'll need to be
+ * re-set at the next time we see the task.
+ */
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
+{
+	if (unlikely(current->flags & PF_KTHREAD))
+		return;
+
+	if (!blk_get_queue(q))
+		return;
+
+	if (current->throttle_queue)
+		blk_put_queue(current->throttle_queue);
+	current->throttle_queue = q;
+	if (use_memdelay)
+		current->use_memdelay = use_memdelay;
+	set_notify_resume(current);
+}
+EXPORT_SYMBOL_GPL(blkcg_schedule_throttle);
+
+/**
+ * blkcg_add_delay - add delay to this blkg
+ * @now - the current time in nanoseconds
+ * @delta - how many nanoseconds of delay to add
+ *
+ * Charge @delta to the blkg's current delay accumulation.  This is used to
+ * throttle tasks if an IO controller thinks we need more throttling.
+ */
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
+{
+	blkcg_scale_delay(blkg, now);
+	atomic64_add(delta, &blkg->delay_nsec);
+}
+EXPORT_SYMBOL_GPL(blkcg_add_delay);
+
 module_param(blkcg_debug_stats, bool, 0644);
 MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a8f9ba8f33a4..de57de4831d5 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -136,6 +136,12 @@ struct blkcg_gq {
 	struct blkg_policy_data		*pd[BLKCG_MAX_POLS];
 
 	struct rcu_head			rcu_head;
+
+	atomic_t			use_delay;
+	atomic64_t			delay_nsec;
+	atomic64_t			delay_start;
+	u64				last_delay;
+	int				last_use;
 };
 
 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -241,6 +247,26 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
 	return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
+static inline bool blk_cgroup_congested(void)
+{
+	struct cgroup_subsys_state *css;
+	bool ret = false;
+
+	rcu_read_lock();
+	css = kthread_blkcg();
+	if (!css)
+		css = task_css(current, io_cgrp_id);
+	while (css) {
+		if (atomic_read(&css->cgroup->congestion_count)) {
+			ret = true;
+			break;
+		}
+		css = css->parent;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
 /**
  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
  * @return: true if this bio needs to be submitted with the root blkg context.
@@ -374,6 +400,21 @@ static inline void blkg_get(struct blkcg_gq *blkg)
 	atomic_inc(&blkg->refcnt);
 }
 
+/**
+ * blkg_try_get - try and get a blkg reference
+ * @blkg: blkg to get
+ *
+ * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
+ * of freeing this blkg, so we can only use it if the refcnt is not zero.
+ */
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
+{
+	if (atomic_inc_not_zero(&blkg->refcnt))
+		return blkg;
+	return NULL;
+}
+
+
 void __blkg_release_rcu(struct rcu_head *rcu);
 
 /**
@@ -734,6 +775,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 	return !throtl;
 }
 
+static inline void blkcg_use_delay(struct blkcg_gq *blkg)
+{
+	if (atomic_add_return(1, &blkg->use_delay) == 1)
+		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
+{
+	int old = atomic_read(&blkg->use_delay);
+
+	if (old == 0)
+		return 0;
+
+	/*
+	 * We do this song and dance because we can race with somebody else
+	 * adding or removing delay.  If we just did an atomic_dec we'd end up
+	 * negative and we'd already be in trouble.  We need to subtract 1 and
+	 * then check to see if we were the last delay so we can drop the
+	 * congestion count on the cgroup.
+	 */
+	while (old) {
+		int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
+		if (cur == old)
+			break;
+		old = cur;
+	}
+
+	if (old == 0)
+		return 0;
+	if (old == 1)
+		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+	return 1;
+}
+
+static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
+{
+	int old = atomic_read(&blkg->use_delay);
+	if (!old)
+		return;
+	/* We only want 1 person clearing the congestion count for this blkg. */
+	while (old) {
+		int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
+		if (cur == old) {
+			atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+			break;
+		}
+		old = cur;
+	}
+}
+
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+void blkcg_maybe_throttle_current(void);
 #else	/* CONFIG_BLK_CGROUP */
 
 struct blkcg {
@@ -753,8 +847,13 @@ struct blkcg_policy {
 
 #define blkcg_root_css	((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 
+static inline void blkcg_maybe_throttle_current(void) { }
+static inline bool blk_cgroup_congested(void) { return false; }
+
 #ifdef CONFIG_BLOCK
 
+static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
+
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c0e68f903011..ff20b677fb9f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -438,6 +438,9 @@ struct cgroup {
 	/* used to store eBPF programs */
 	struct cgroup_bpf bpf;
 
+	/* If there is block congestion on this cgroup. */
+	atomic_t congestion_count;
+
 	/* ids of the ancestors at each level including self */
 	int ancestor_ids[];
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..c2e993de67ec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,10 @@ struct task_struct {
 	/* disallow userland-initiated cgroup migration */
 	unsigned			no_cgroup_migration:1;
 #endif
+#ifdef CONFIG_BLK_CGROUP
+	/* to be used once the psi infrastructure lands upstream. */
+	unsigned			use_memdelay:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
@@ -1151,6 +1155,10 @@ struct task_struct {
 	unsigned int			memcg_nr_pages_over_high;
 #endif
 
+#ifdef CONFIG_BLK_CGROUP
+	struct request_queue		*throttle_queue;
+#endif
+
 #ifdef CONFIG_UPROBES
 	struct uprobe_task		*utask;
 #endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 4a8841963c2e..05589a3e37f4 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -51,6 +51,7 @@
 #include <linux/security.h>
 #include <linux/task_work.h>
 #include <linux/memcontrol.h>
+#include <linux/blk-cgroup.h>
 struct linux_binprm;
 
 /*
@@ -192,6 +193,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 		task_work_run();
 
 	mem_cgroup_handle_over_high();
+	blkcg_maybe_throttle_current();
 }
 
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From 2cf855837b89d92996cf264713f3bed2bf9b0b4f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 3 Jul 2018 11:14:56 -0400
Subject: memcontrol: schedule throttling if we are congested

Memory allocations can induce swapping via kswapd or direct reclaim.  If
we are having IO done for us by kswapd and don't actually go into direct
reclaim we may never get scheduled for throttling.  So instead check to
see if our cgroup is congested, and if so schedule the throttling.
Before we return to user space the throttling stuff will only throttle
if we actually required it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/memcontrol.h | 13 +++++++++++++
 include/linux/swap.h       | 11 ++++++++++-
 mm/huge_memory.c           |  6 +++---
 mm/memcontrol.c            | 13 +++++++++++++
 mm/memory.c                | 11 ++++++-----
 mm/shmem.c                 | 10 +++++-----
 mm/swapfile.c              | 31 +++++++++++++++++++++++++++++++
 7 files changed, 81 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6c6fb116e925..680d3395fc83 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
 int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
 			  bool compound);
+int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
+			  bool compound);
 void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 			      bool lrucare, bool compound);
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
@@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 	return 0;
 }
 
+static inline int mem_cgroup_try_charge_delay(struct page *page,
+					      struct mm_struct *mm,
+					      gfp_t gfp_mask,
+					      struct mem_cgroup **memcgp,
+					      bool compound)
+{
+	*memcgp = NULL;
+	return 0;
+}
+
 static inline void mem_cgroup_commit_charge(struct page *page,
 					    struct mem_cgroup *memcg,
 					    bool lrucare, bool compound)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c063443d8638..1a8bd05a335e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -629,7 +629,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 
 	return memcg->swappiness;
 }
-
 #else
 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 {
@@ -637,6 +636,16 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 }
 #endif
 
+#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
+					 gfp_t gfp_mask);
+#else
+static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg,
+						int node, gfp_t gfp_mask)
+{
+}
+#endif
+
 #ifdef CONFIG_MEMCG_SWAP
 extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
 extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1cd7c1a57a14..b87d5b151db2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
 		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
 					       vmf->address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
-			     mem_cgroup_try_charge(pages[i], vma->vm_mm,
+			     mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
 				     GFP_KERNEL, &memcg, false))) {
 			if (pages[i])
 				put_page(pages[i]);
@@ -1312,7 +1312,7 @@ alloc:
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
+	if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
 					huge_gfp, &memcg, true))) {
 		put_page(new_page);
 		split_huge_pmd(vma, vmf->pmd, vmf->address);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e6f0d5ef320a..64bd28d35388 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5593,6 +5593,19 @@ out:
 	return ret;
 }
 
+int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
+			  bool compound)
+{
+	struct mem_cgroup *memcg;
+	int ret;
+
+	ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound);
+	memcg = *memcgp;
+	mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask);
+	return ret;
+}
+
 /**
  * mem_cgroup_commit_charge - commit a page charge
  * @page: page to charge
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..dfe80c574282 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2503,7 +2503,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 		cow_user_page(new_page, old_page, vmf->address, vma);
 	}
 
-	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
+	if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
 		goto oom_free_new;
 
 	__SetPageUptodate(new_page);
@@ -3003,8 +3003,8 @@ int do_swap_page(struct vm_fault *vmf)
 		goto out_page;
 	}
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
-				&memcg, false)) {
+	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
+					&memcg, false)) {
 		ret = VM_FAULT_OOM;
 		goto out_page;
 	}
@@ -3165,7 +3165,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
 	if (!page)
 		goto oom;
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
+	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
+					false))
 		goto oom_free_page;
 
 	/*
@@ -3661,7 +3662,7 @@ static int do_cow_fault(struct vm_fault *vmf)
 	if (!vmf->cow_page)
 		return VM_FAULT_OOM;
 
-	if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
+	if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
 				&vmf->memcg, false)) {
 		put_page(vmf->cow_page);
 		return VM_FAULT_OOM;
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cab84403055..6206ca3510cf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
 	 * Charged back to the user (not to caller) when swap account is used.
 	 */
-	error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
-			false);
+	error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
+					    &memcg, false);
 	if (error)
 		goto out;
 	/* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -1712,7 +1712,7 @@ repeat:
 				goto failed;
 		}
 
-		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
+		error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
 				false);
 		if (!error) {
 			error = shmem_add_to_page_cache(page, mapping, index,
@@ -1818,7 +1818,7 @@ alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, inode,
 		if (sgp == SGP_WRITE)
 			__SetPageReferenced(page);
 
-		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
+		error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
 				PageTransHuge(page));
 		if (error)
 			goto unacct;
@@ -2291,7 +2291,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	__SetPageSwapBacked(page);
 	__SetPageUptodate(page);
 
-	ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
+	ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
 	if (ret)
 		goto out_release;
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2cc2972eedaf..db4ec8ae1c8c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3731,6 +3731,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
 	}
 }
 
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
+				  gfp_t gfp_mask)
+{
+	struct swap_info_struct *si, *next;
+	if (!(gfp_mask & __GFP_IO) || !memcg)
+		return;
+
+	if (!blk_cgroup_congested())
+		return;
+
+	/*
+	 * We've already scheduled a throttle, avoid taking the global swap
+	 * lock.
+	 */
+	if (current->throttle_queue)
+		return;
+
+	spin_lock(&swap_avail_lock);
+	plist_for_each_entry_safe(si, next, &swap_avail_heads[node],
+				  avail_lists[node]) {
+		if (si->bdev) {
+			blkcg_schedule_throttle(bdev_get_queue(si->bdev),
+						true);
+			break;
+		}
+	}
+	spin_unlock(&swap_avail_lock);
+}
+#endif
+
 static int __init swapfile_init(void)
 {
 	int nid;
-- 
cgit v1.2.3


From a79050434b45959f397042080fd1d70ffa9bd9df Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 09:32:35 -0600
Subject: blk-rq-qos: refactor out common elements of blk-wbt

blkcg-qos is going to do essentially what wbt does, only on a cgroup
basis.  Break out the common code that will be shared between blkcg-qos
and wbt into blk-rq-qos.* so they can both utilize the same
infrastructure.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile         |   2 +-
 block/blk-core.c       |  12 +-
 block/blk-mq.c         |  12 +-
 block/blk-rq-qos.c     | 178 +++++++++++++++++++++++++++
 block/blk-rq-qos.h     | 106 ++++++++++++++++
 block/blk-settings.c   |   4 +-
 block/blk-sysfs.c      |  22 ++--
 block/blk-wbt.c        | 326 ++++++++++++++++++++++---------------------------
 block/blk-wbt.h        |  63 ++++------
 include/linux/blkdev.h |   4 +-
 10 files changed, 478 insertions(+), 251 deletions(-)
 create mode 100644 block/blk-rq-qos.c
 create mode 100644 block/blk-rq-qos.h

(limited to 'include/linux')

diff --git a/block/Makefile b/block/Makefile
index a8f94cdb75c3..57d0f47ab05f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
 			genhd.o partition-generic.o ioprio.o \
-			badblocks.o partitions/
+			badblocks.o partitions/ blk-rq-qos.o
 
 obj-$(CONFIG_BOUNCE)		+= bounce.o
 obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_ioctl.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 2ff8e131a892..b33a73bcf2d0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1645,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, rq);
+	rq_qos_requeue(q, rq);
 
 	if (rq->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, rq);
@@ -1752,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 
-	wbt_done(q->rq_wb, req);
+	rq_qos_done(q, req);
 
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
@@ -2044,7 +2044,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	}
 
 get_rq:
-	wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+	wb_acct = rq_qos_throttle(q, bio, q->queue_lock);
 
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
@@ -2054,7 +2054,7 @@ get_rq:
 	req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
 	if (IS_ERR(req)) {
 		blk_queue_exit(q);
-		__wbt_done(q->rq_wb, wb_acct);
+		rq_qos_cleanup(q, wb_acct);
 		if (PTR_ERR(req) == -ENOMEM)
 			bio->bi_status = BLK_STS_RESOURCE;
 		else
@@ -2983,7 +2983,7 @@ void blk_start_request(struct request *req)
 		req->throtl_size = blk_rq_sectors(req);
 #endif
 		req->rq_flags |= RQF_STATS;
-		wbt_issue(req->q->rq_wb, req);
+		rq_qos_issue(req->q, req);
 	}
 
 	BUG_ON(blk_rq_is_complete(req));
@@ -3207,7 +3207,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
 	blk_account_io_done(req, now);
 
 	if (req->end_io) {
-		wbt_done(req->q->rq_wb, req);
+		rq_qos_done(q, req);
 		req->end_io(req, error);
 	} else {
 		if (blk_bidi_rq(req))
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 850fdd02c385..ea2a226457fa 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -504,7 +504,7 @@ void blk_mq_free_request(struct request *rq)
 	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
 		laptop_io_completion(q->backing_dev_info);
 
-	wbt_done(q->rq_wb, rq);
+	rq_qos_done(q, rq);
 
 	if (blk_rq_rl(rq))
 		blk_put_rl(blk_rq_rl(rq));
@@ -527,7 +527,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 	blk_account_io_done(rq, now);
 
 	if (rq->end_io) {
-		wbt_done(rq->q->rq_wb, rq);
+		rq_qos_done(rq->q, rq);
 		rq->end_io(rq, error);
 	} else {
 		if (unlikely(blk_bidi_rq(rq)))
@@ -641,7 +641,7 @@ void blk_mq_start_request(struct request *rq)
 		rq->throtl_size = blk_rq_sectors(rq);
 #endif
 		rq->rq_flags |= RQF_STATS;
-		wbt_issue(q->rq_wb, rq);
+		rq_qos_issue(q, rq);
 	}
 
 	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
@@ -667,7 +667,7 @@ static void __blk_mq_requeue_request(struct request *rq)
 	blk_mq_put_driver_tag(rq);
 
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, rq);
+	rq_qos_requeue(q, rq);
 
 	if (blk_mq_request_started(rq)) {
 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
@@ -1806,13 +1806,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (blk_mq_sched_bio_merge(q, bio))
 		return BLK_QC_T_NONE;
 
-	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+	wb_acct = rq_qos_throttle(q, bio, NULL);
 
 	trace_block_getrq(q, bio, bio->bi_opf);
 
 	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
-		__wbt_done(q->rq_wb, wb_acct);
+		rq_qos_cleanup(q, wb_acct);
 		if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
 		return BLK_QC_T_NONE;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
new file mode 100644
index 000000000000..d2f2af8aa10c
--- /dev/null
+++ b/block/blk-rq-qos.c
@@ -0,0 +1,178 @@
+#include "blk-rq-qos.h"
+
+#include "blk-wbt.h"
+
+/*
+ * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
+ * false if 'v' + 1 would be bigger than 'below'.
+ */
+static bool atomic_inc_below(atomic_t *v, int below)
+{
+	int cur = atomic_read(v);
+
+	for (;;) {
+		int old;
+
+		if (cur >= below)
+			return false;
+		old = atomic_cmpxchg(v, cur, cur + 1);
+		if (old == cur)
+			break;
+		cur = old;
+	}
+
+	return true;
+}
+
+bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit)
+{
+	return atomic_inc_below(&rq_wait->inflight, limit);
+}
+
+void rq_qos_cleanup(struct request_queue *q, enum wbt_flags wb_acct)
+{
+	struct rq_qos *rqos;
+
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->cleanup)
+			rqos->ops->cleanup(rqos, wb_acct);
+	}
+}
+
+void rq_qos_done(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->done)
+			rqos->ops->done(rqos, rq);
+	}
+}
+
+void rq_qos_issue(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->issue)
+			rqos->ops->issue(rqos, rq);
+	}
+}
+
+void rq_qos_requeue(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->requeue)
+			rqos->ops->requeue(rqos, rq);
+	}
+}
+
+enum wbt_flags rq_qos_throttle(struct request_queue *q, struct bio *bio,
+			       spinlock_t *lock)
+{
+	struct rq_qos *rqos;
+	enum wbt_flags flags = 0;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->throttle)
+			flags |= rqos->ops->throttle(rqos, bio, lock);
+	}
+	return flags;
+}
+
+/*
+ * Return true, if we can't increase the depth further by scaling
+ */
+bool rq_depth_calc_max_depth(struct rq_depth *rqd)
+{
+	unsigned int depth;
+	bool ret = false;
+
+	/*
+	 * For QD=1 devices, this is a special case. It's important for those
+	 * to have one request ready when one completes, so force a depth of
+	 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
+	 * since the device can't have more than that in flight. If we're
+	 * scaling down, then keep a setting of 1/1/1.
+	 */
+	if (rqd->queue_depth == 1) {
+		if (rqd->scale_step > 0)
+			rqd->max_depth = 1;
+		else {
+			rqd->max_depth = 2;
+			ret = true;
+		}
+	} else {
+		/*
+		 * scale_step == 0 is our default state. If we have suffered
+		 * latency spikes, step will be > 0, and we shrink the
+		 * allowed write depths. If step is < 0, we're only doing
+		 * writes, and we allow a temporarily higher depth to
+		 * increase performance.
+		 */
+		depth = min_t(unsigned int, rqd->default_depth,
+			      rqd->queue_depth);
+		if (rqd->scale_step > 0)
+			depth = 1 + ((depth - 1) >> min(31, rqd->scale_step));
+		else if (rqd->scale_step < 0) {
+			unsigned int maxd = 3 * rqd->queue_depth / 4;
+
+			depth = 1 + ((depth - 1) << -rqd->scale_step);
+			if (depth > maxd) {
+				depth = maxd;
+				ret = true;
+			}
+		}
+
+		rqd->max_depth = depth;
+	}
+
+	return ret;
+}
+
+void rq_depth_scale_up(struct rq_depth *rqd)
+{
+	/*
+	 * Hit max in previous round, stop here
+	 */
+	if (rqd->scaled_max)
+		return;
+
+	rqd->scale_step--;
+
+	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
+}
+
+/*
+ * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
+ * had a latency violation.
+ */
+void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
+{
+	/*
+	 * Stop scaling down when we've hit the limit. This also prevents
+	 * ->scale_step from going to crazy values, if the device can't
+	 * keep up.
+	 */
+	if (rqd->max_depth == 1)
+		return;
+
+	if (rqd->scale_step < 0 && hard_throttle)
+		rqd->scale_step = 0;
+	else
+		rqd->scale_step++;
+
+	rqd->scaled_max = false;
+	rq_depth_calc_max_depth(rqd);
+}
+
+void rq_qos_exit(struct request_queue *q)
+{
+	while (q->rq_qos) {
+		struct rq_qos *rqos = q->rq_qos;
+		q->rq_qos = rqos->next;
+		rqos->ops->exit(rqos);
+	}
+}
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
new file mode 100644
index 000000000000..f9a39bd6ece3
--- /dev/null
+++ b/block/blk-rq-qos.h
@@ -0,0 +1,106 @@
+#ifndef RQ_QOS_H
+#define RQ_QOS_H
+
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/blk_types.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+
+enum rq_qos_id {
+	RQ_QOS_WBT,
+	RQ_QOS_CGROUP,
+};
+
+struct rq_wait {
+	wait_queue_head_t wait;
+	atomic_t inflight;
+};
+
+struct rq_qos {
+	struct rq_qos_ops *ops;
+	struct request_queue *q;
+	enum rq_qos_id id;
+	struct rq_qos *next;
+};
+
+struct rq_qos_ops {
+	enum wbt_flags (*throttle)(struct rq_qos *, struct bio *,
+				   spinlock_t *);
+	void (*issue)(struct rq_qos *, struct request *);
+	void (*requeue)(struct rq_qos *, struct request *);
+	void (*done)(struct rq_qos *, struct request *);
+	void (*cleanup)(struct rq_qos *, enum wbt_flags);
+	void (*exit)(struct rq_qos *);
+};
+
+struct rq_depth {
+	unsigned int max_depth;
+
+	int scale_step;
+	bool scaled_max;
+
+	unsigned int queue_depth;
+	unsigned int default_depth;
+};
+
+static inline struct rq_qos *rq_qos_id(struct request_queue *q,
+				       enum rq_qos_id id)
+{
+	struct rq_qos *rqos;
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->id == id)
+			break;
+	}
+	return rqos;
+}
+
+static inline struct rq_qos *wbt_rq_qos(struct request_queue *q)
+{
+	return rq_qos_id(q, RQ_QOS_WBT);
+}
+
+static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
+{
+	return rq_qos_id(q, RQ_QOS_CGROUP);
+}
+
+static inline void rq_wait_init(struct rq_wait *rq_wait)
+{
+	atomic_set(&rq_wait->inflight, 0);
+	init_waitqueue_head(&rq_wait->wait);
+}
+
+static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+{
+	rqos->next = q->rq_qos;
+	q->rq_qos = rqos;
+}
+
+static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+{
+	struct rq_qos *cur, *prev = NULL;
+	for (cur = q->rq_qos; cur; cur = cur->next) {
+		if (cur == rqos) {
+			if (prev)
+				prev->next = rqos->next;
+			else
+				q->rq_qos = cur;
+			break;
+		}
+		prev = cur;
+	}
+}
+
+bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit);
+void rq_depth_scale_up(struct rq_depth *rqd);
+void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+bool rq_depth_calc_max_depth(struct rq_depth *rqd);
+
+void rq_qos_cleanup(struct request_queue *, enum wbt_flags);
+void rq_qos_done(struct request_queue *, struct request *);
+void rq_qos_issue(struct request_queue *, struct request *);
+void rq_qos_requeue(struct request_queue *, struct request *);
+enum wbt_flags rq_qos_throttle(struct request_queue *, struct bio *, spinlock_t *);
+void rq_qos_exit(struct request_queue *);
+#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d1de71124656..053de87d1fda 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -875,7 +875,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
 	q->queue_depth = depth;
-	wbt_set_queue_depth(q->rq_wb, depth);
+	wbt_set_queue_depth(q, depth);
 }
 EXPORT_SYMBOL(blk_set_queue_depth);
 
@@ -900,7 +900,7 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
 		queue_flag_clear(QUEUE_FLAG_FUA, q);
 	spin_unlock_irq(q->queue_lock);
 
-	wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 }
 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 94987b1f69e1..49c29a5d06bb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -422,16 +422,16 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 
 static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
 {
-	if (!q->rq_wb)
+	if (!wbt_rq_qos(q))
 		return -EINVAL;
 
-	return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
+	return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
 }
 
 static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 				  size_t count)
 {
-	struct rq_wb *rwb;
+	struct rq_qos *rqos;
 	ssize_t ret;
 	s64 val;
 
@@ -441,23 +441,21 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 	if (val < -1)
 		return -EINVAL;
 
-	rwb = q->rq_wb;
-	if (!rwb) {
+	rqos = wbt_rq_qos(q);
+	if (!rqos) {
 		ret = wbt_init(q);
 		if (ret)
 			return ret;
 	}
 
-	rwb = q->rq_wb;
 	if (val == -1)
-		rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+		val = wbt_default_latency_nsec(q);
 	else if (val >= 0)
-		rwb->min_lat_nsec = val * 1000ULL;
+		val *= 1000ULL;
 
-	if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
-		rwb->enable_state = WBT_STATE_ON_MANUAL;
+	wbt_set_min_lat(q, val);
 
-	wbt_update_limits(rwb);
+	wbt_update_limits(q);
 	return count;
 }
 
@@ -964,7 +962,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	kobject_del(&q->kobj);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
-	wbt_exit(q);
+	rq_qos_exit(q);
 
 	mutex_lock(&q->sysfs_lock);
 	if (q->request_fn || (q->mq_ops && q->elevator))
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 4f89b28fa652..6fe20fb823e4 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -25,6 +25,7 @@
 #include <linux/swap.h>
 
 #include "blk-wbt.h"
+#include "blk-rq-qos.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/wbt.h>
@@ -78,28 +79,6 @@ static inline bool rwb_enabled(struct rq_wb *rwb)
 	return rwb && rwb->wb_normal != 0;
 }
 
-/*
- * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
- * false if 'v' + 1 would be bigger than 'below'.
- */
-static bool atomic_inc_below(atomic_t *v, int below)
-{
-	int cur = atomic_read(v);
-
-	for (;;) {
-		int old;
-
-		if (cur >= below)
-			return false;
-		old = atomic_cmpxchg(v, cur, cur + 1);
-		if (old == cur)
-			break;
-		cur = old;
-	}
-
-	return true;
-}
-
 static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
 {
 	if (rwb_enabled(rwb)) {
@@ -116,7 +95,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
  */
 static bool wb_recent_wait(struct rq_wb *rwb)
 {
-	struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb;
+	struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
 
 	return time_before(jiffies, wb->dirty_sleep + HZ);
 }
@@ -144,8 +123,9 @@ static void rwb_wake_all(struct rq_wb *rwb)
 	}
 }
 
-void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
+static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	struct rq_wait *rqw;
 	int inflight, limit;
 
@@ -194,10 +174,9 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
  * Called on completion of a request. Note that it's also called when
  * a request is merged, when the request gets freed.
  */
-void wbt_done(struct rq_wb *rwb, struct request *rq)
+static void wbt_done(struct rq_qos *rqos, struct request *rq)
 {
-	if (!rwb)
-		return;
+	struct rq_wb *rwb = RQWB(rqos);
 
 	if (!wbt_is_tracked(rq)) {
 		if (rwb->sync_cookie == rq) {
@@ -209,72 +188,11 @@ void wbt_done(struct rq_wb *rwb, struct request *rq)
 			wb_timestamp(rwb, &rwb->last_comp);
 	} else {
 		WARN_ON_ONCE(rq == rwb->sync_cookie);
-		__wbt_done(rwb, wbt_flags(rq));
+		__wbt_done(rqos, wbt_flags(rq));
 	}
 	wbt_clear_state(rq);
 }
 
-/*
- * Return true, if we can't increase the depth further by scaling
- */
-static bool calc_wb_limits(struct rq_wb *rwb)
-{
-	unsigned int depth;
-	bool ret = false;
-
-	if (!rwb->min_lat_nsec) {
-		rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
-		return false;
-	}
-
-	/*
-	 * For QD=1 devices, this is a special case. It's important for those
-	 * to have one request ready when one completes, so force a depth of
-	 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
-	 * since the device can't have more than that in flight. If we're
-	 * scaling down, then keep a setting of 1/1/1.
-	 */
-	if (rwb->queue_depth == 1) {
-		if (rwb->scale_step > 0)
-			rwb->wb_max = rwb->wb_normal = 1;
-		else {
-			rwb->wb_max = rwb->wb_normal = 2;
-			ret = true;
-		}
-		rwb->wb_background = 1;
-	} else {
-		/*
-		 * scale_step == 0 is our default state. If we have suffered
-		 * latency spikes, step will be > 0, and we shrink the
-		 * allowed write depths. If step is < 0, we're only doing
-		 * writes, and we allow a temporarily higher depth to
-		 * increase performance.
-		 */
-		depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth);
-		if (rwb->scale_step > 0)
-			depth = 1 + ((depth - 1) >> min(31, rwb->scale_step));
-		else if (rwb->scale_step < 0) {
-			unsigned int maxd = 3 * rwb->queue_depth / 4;
-
-			depth = 1 + ((depth - 1) << -rwb->scale_step);
-			if (depth > maxd) {
-				depth = maxd;
-				ret = true;
-			}
-		}
-
-		/*
-		 * Set our max/normal/bg queue depths based on how far
-		 * we have scaled down (->scale_step).
-		 */
-		rwb->wb_max = depth;
-		rwb->wb_normal = (rwb->wb_max + 1) / 2;
-		rwb->wb_background = (rwb->wb_max + 3) / 4;
-	}
-
-	return ret;
-}
-
 static inline bool stat_sample_valid(struct blk_rq_stat *stat)
 {
 	/*
@@ -307,7 +225,8 @@ enum {
 
 static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 {
-	struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
+	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+	struct rq_depth *rqd = &rwb->rq_depth;
 	u64 thislat;
 
 	/*
@@ -351,7 +270,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 		return LAT_EXCEEDED;
 	}
 
-	if (rwb->scale_step)
+	if (rqd->scale_step)
 		trace_wbt_stat(bdi, stat);
 
 	return LAT_OK;
@@ -359,58 +278,48 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 
 static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
 {
-	struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
+	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+	struct rq_depth *rqd = &rwb->rq_depth;
 
-	trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec,
-			rwb->wb_background, rwb->wb_normal, rwb->wb_max);
+	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
+			rwb->wb_background, rwb->wb_normal, rqd->max_depth);
 }
 
-static void scale_up(struct rq_wb *rwb)
+static void calc_wb_limits(struct rq_wb *rwb)
 {
-	/*
-	 * Hit max in previous round, stop here
-	 */
-	if (rwb->scaled_max)
-		return;
+	if (rwb->min_lat_nsec == 0) {
+		rwb->wb_normal = rwb->wb_background = 0;
+	} else if (rwb->rq_depth.max_depth <= 2) {
+		rwb->wb_normal = rwb->rq_depth.max_depth;
+		rwb->wb_background = 1;
+	} else {
+		rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
+		rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
+	}
+}
 
-	rwb->scale_step--;
+static void scale_up(struct rq_wb *rwb)
+{
+	rq_depth_scale_up(&rwb->rq_depth);
+	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
-
-	rwb->scaled_max = calc_wb_limits(rwb);
-
-	rwb_wake_all(rwb);
-
-	rwb_trace_step(rwb, "step up");
+	rwb_trace_step(rwb, "scale up");
 }
 
-/*
- * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
- * had a latency violation.
- */
 static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 {
-	/*
-	 * Stop scaling down when we've hit the limit. This also prevents
-	 * ->scale_step from going to crazy values, if the device can't
-	 * keep up.
-	 */
-	if (rwb->wb_max == 1)
-		return;
-
-	if (rwb->scale_step < 0 && hard_throttle)
-		rwb->scale_step = 0;
-	else
-		rwb->scale_step++;
-
-	rwb->scaled_max = false;
-	rwb->unknown_cnt = 0;
+	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
 	calc_wb_limits(rwb);
-	rwb_trace_step(rwb, "step down");
+	rwb->unknown_cnt = 0;
+	rwb_wake_all(rwb);
+	rwb_trace_step(rwb, "scale down");
 }
 
 static void rwb_arm_timer(struct rq_wb *rwb)
 {
-	if (rwb->scale_step > 0) {
+	struct rq_depth *rqd = &rwb->rq_depth;
+
+	if (rqd->scale_step > 0) {
 		/*
 		 * We should speed this up, using some variant of a fast
 		 * integer inverse square root calculation. Since we only do
@@ -418,7 +327,7 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 		 * though.
 		 */
 		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
-					int_sqrt((rwb->scale_step + 1) << 8));
+					int_sqrt((rqd->scale_step + 1) << 8));
 	} else {
 		/*
 		 * For step < 0, we don't want to increase/decrease the
@@ -433,12 +342,13 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 static void wb_timer_fn(struct blk_stat_callback *cb)
 {
 	struct rq_wb *rwb = cb->data;
+	struct rq_depth *rqd = &rwb->rq_depth;
 	unsigned int inflight = wbt_inflight(rwb);
 	int status;
 
 	status = latency_exceeded(rwb, cb->stat);
 
-	trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
+	trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
 			inflight);
 
 	/*
@@ -469,9 +379,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 		 * currently don't have a valid read/write sample. For that
 		 * case, slowly return to center state (step == 0).
 		 */
-		if (rwb->scale_step > 0)
+		if (rqd->scale_step > 0)
 			scale_up(rwb);
-		else if (rwb->scale_step < 0)
+		else if (rqd->scale_step < 0)
 			scale_down(rwb, false);
 		break;
 	default:
@@ -481,19 +391,50 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 	/*
 	 * Re-arm timer, if we have IO in flight
 	 */
-	if (rwb->scale_step || inflight)
+	if (rqd->scale_step || inflight)
 		rwb_arm_timer(rwb);
 }
 
-void wbt_update_limits(struct rq_wb *rwb)
+static void __wbt_update_limits(struct rq_wb *rwb)
 {
-	rwb->scale_step = 0;
-	rwb->scaled_max = false;
+	struct rq_depth *rqd = &rwb->rq_depth;
+
+	rqd->scale_step = 0;
+	rqd->scaled_max = false;
+
+	rq_depth_calc_max_depth(rqd);
 	calc_wb_limits(rwb);
 
 	rwb_wake_all(rwb);
 }
 
+void wbt_update_limits(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return;
+	__wbt_update_limits(RQWB(rqos));
+}
+
+u64 wbt_get_min_lat(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return 0;
+	return RQWB(rqos)->min_lat_nsec;
+}
+
+void wbt_set_min_lat(struct request_queue *q, u64 val)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return;
+	RQWB(rqos)->min_lat_nsec = val;
+	RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
+	__wbt_update_limits(RQWB(rqos));
+}
+
+
 static bool close_io(struct rq_wb *rwb)
 {
 	const unsigned long now = jiffies;
@@ -520,7 +461,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	 * IO for a bit.
 	 */
 	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
-		limit = rwb->wb_max;
+		limit = rwb->rq_depth.max_depth;
 	else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
 		/*
 		 * If less than 100ms since we completed unrelated IO,
@@ -554,7 +495,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
 	    rqw->wait.head.next != &wait->entry)
 		return false;
 
-	return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));
+	return rq_wait_inc_below(rqw, get_limit(rwb, rw));
 }
 
 /*
@@ -614,8 +555,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
  * in an irq held spinlock, if it holds one when calling this function.
  * If we do sleep, we'll release and re-grab it.
  */
-enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
+static enum wbt_flags wbt_wait(struct rq_qos *rqos, struct bio *bio,
+			       spinlock_t *lock)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	enum wbt_flags ret = 0;
 
 	if (!rwb_enabled(rwb))
@@ -643,8 +586,10 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
 	return ret | WBT_TRACKED;
 }
 
-void wbt_issue(struct rq_wb *rwb, struct request *rq)
+void wbt_issue(struct rq_qos *rqos, struct request *rq)
 {
+	struct rq_wb *rwb = RQWB(rqos);
+
 	if (!rwb_enabled(rwb))
 		return;
 
@@ -661,8 +606,9 @@ void wbt_issue(struct rq_wb *rwb, struct request *rq)
 	}
 }
 
-void wbt_requeue(struct rq_wb *rwb, struct request *rq)
+void wbt_requeue(struct rq_qos *rqos, struct request *rq)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	if (!rwb_enabled(rwb))
 		return;
 	if (rq == rwb->sync_cookie) {
@@ -671,39 +617,30 @@ void wbt_requeue(struct rq_wb *rwb, struct request *rq)
 	}
 }
 
-void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
+void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
-	if (rwb) {
-		rwb->queue_depth = depth;
-		wbt_update_limits(rwb);
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (rqos) {
+		RQWB(rqos)->rq_depth.queue_depth = depth;
+		__wbt_update_limits(RQWB(rqos));
 	}
 }
 
-void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
-{
-	if (rwb)
-		rwb->wc = write_cache_on;
-}
-
-/*
- * Disable wbt, if enabled by default.
- */
-void wbt_disable_default(struct request_queue *q)
+void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
 {
-	struct rq_wb *rwb = q->rq_wb;
-
-	if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT)
-		wbt_exit(q);
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (rqos)
+		RQWB(rqos)->wc = write_cache_on;
 }
-EXPORT_SYMBOL_GPL(wbt_disable_default);
 
 /*
  * Enable wbt if defaults are configured that way
  */
 void wbt_enable_default(struct request_queue *q)
 {
+	struct rq_qos *rqos = wbt_rq_qos(q);
 	/* Throttling already enabled? */
-	if (q->rq_wb)
+	if (rqos)
 		return;
 
 	/* Queue not registered? Maybe shutting down... */
@@ -741,6 +678,41 @@ static int wbt_data_dir(const struct request *rq)
 	return -1;
 }
 
+static void wbt_exit(struct rq_qos *rqos)
+{
+	struct rq_wb *rwb = RQWB(rqos);
+	struct request_queue *q = rqos->q;
+
+	blk_stat_remove_callback(q, rwb->cb);
+	blk_stat_free_callback(rwb->cb);
+	kfree(rwb);
+}
+
+/*
+ * Disable wbt, if enabled by default.
+ */
+void wbt_disable_default(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	struct rq_wb *rwb;
+	if (!rqos)
+		return;
+	rwb = RQWB(rqos);
+	if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
+		rwb->wb_normal = 0;
+}
+EXPORT_SYMBOL_GPL(wbt_disable_default);
+
+
+static struct rq_qos_ops wbt_rqos_ops = {
+	.throttle = wbt_wait,
+	.issue = wbt_issue,
+	.requeue = wbt_requeue,
+	.done = wbt_done,
+	.cleanup = __wbt_done,
+	.exit = wbt_exit,
+};
+
 int wbt_init(struct request_queue *q)
 {
 	struct rq_wb *rwb;
@@ -756,39 +728,29 @@ int wbt_init(struct request_queue *q)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < WBT_NUM_RWQ; i++) {
-		atomic_set(&rwb->rq_wait[i].inflight, 0);
-		init_waitqueue_head(&rwb->rq_wait[i].wait);
-	}
+	for (i = 0; i < WBT_NUM_RWQ; i++)
+		rq_wait_init(&rwb->rq_wait[i]);
 
+	rwb->rqos.id = RQ_QOS_WBT;
+	rwb->rqos.ops = &wbt_rqos_ops;
+	rwb->rqos.q = q;
 	rwb->last_comp = rwb->last_issue = jiffies;
-	rwb->queue = q;
 	rwb->win_nsec = RWB_WINDOW_NSEC;
 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
-	wbt_update_limits(rwb);
+	rwb->wc = 1;
+	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
+	__wbt_update_limits(rwb);
 
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	q->rq_wb = rwb;
+	rq_qos_add(q, &rwb->rqos);
 	blk_stat_add_callback(q, rwb->cb);
 
 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
 
-	wbt_set_queue_depth(rwb, blk_queue_depth(q));
-	wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+	wbt_set_queue_depth(q, blk_queue_depth(q));
+	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 
 	return 0;
 }
-
-void wbt_exit(struct request_queue *q)
-{
-	struct rq_wb *rwb = q->rq_wb;
-
-	if (rwb) {
-		blk_stat_remove_callback(q, rwb->cb);
-		blk_stat_free_callback(rwb->cb);
-		q->rq_wb = NULL;
-		kfree(rwb);
-	}
-}
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 300df531d0a6..53b20a58c0a2 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -9,6 +9,7 @@
 #include <linux/ktime.h>
 
 #include "blk-stat.h"
+#include "blk-rq-qos.h"
 
 enum wbt_flags {
 	WBT_TRACKED		= 1,	/* write, tracked for throttling */
@@ -35,20 +36,12 @@ enum {
 	WBT_STATE_ON_MANUAL	= 2,
 };
 
-struct rq_wait {
-	wait_queue_head_t wait;
-	atomic_t inflight;
-};
-
 struct rq_wb {
 	/*
 	 * Settings that govern how we throttle
 	 */
 	unsigned int wb_background;		/* background writeback */
 	unsigned int wb_normal;			/* normal writeback */
-	unsigned int wb_max;			/* max throughput writeback */
-	int scale_step;
-	bool scaled_max;
 
 	short enable_state;			/* WBT_STATE_* */
 
@@ -67,15 +60,20 @@ struct rq_wb {
 	void *sync_cookie;
 
 	unsigned int wc;
-	unsigned int queue_depth;
 
 	unsigned long last_issue;		/* last non-throttled issue */
 	unsigned long last_comp;		/* last non-throttled comp */
 	unsigned long min_lat_nsec;
-	struct request_queue *queue;
+	struct rq_qos rqos;
 	struct rq_wait rq_wait[WBT_NUM_RWQ];
+	struct rq_depth rq_depth;
 };
 
+static inline struct rq_wb *RQWB(struct rq_qos *rqos)
+{
+	return container_of(rqos, struct rq_wb, rqos);
+}
+
 static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 {
 	unsigned int i, ret = 0;
@@ -86,6 +84,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 	return ret;
 }
 
+
 #ifdef CONFIG_BLK_WBT
 
 static inline void wbt_track(struct request *rq, enum wbt_flags flags)
@@ -93,19 +92,16 @@ static inline void wbt_track(struct request *rq, enum wbt_flags flags)
 	rq->wbt_flags |= flags;
 }
 
-void __wbt_done(struct rq_wb *, enum wbt_flags);
-void wbt_done(struct rq_wb *, struct request *);
-enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
 int wbt_init(struct request_queue *);
-void wbt_exit(struct request_queue *);
-void wbt_update_limits(struct rq_wb *);
-void wbt_requeue(struct rq_wb *, struct request *);
-void wbt_issue(struct rq_wb *, struct request *);
+void wbt_update_limits(struct request_queue *);
 void wbt_disable_default(struct request_queue *);
 void wbt_enable_default(struct request_queue *);
 
-void wbt_set_queue_depth(struct rq_wb *, unsigned int);
-void wbt_set_write_cache(struct rq_wb *, bool);
+u64 wbt_get_min_lat(struct request_queue *q);
+void wbt_set_min_lat(struct request_queue *q, u64 val);
+
+void wbt_set_queue_depth(struct request_queue *, unsigned int);
+void wbt_set_write_cache(struct request_queue *, bool);
 
 u64 wbt_default_latency_nsec(struct request_queue *);
 
@@ -114,43 +110,30 @@ u64 wbt_default_latency_nsec(struct request_queue *);
 static inline void wbt_track(struct request *rq, enum wbt_flags flags)
 {
 }
-static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
-{
-}
-static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
-{
-}
-static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
-				      spinlock_t *lock)
-{
-	return 0;
-}
 static inline int wbt_init(struct request_queue *q)
 {
 	return -EINVAL;
 }
-static inline void wbt_exit(struct request_queue *q)
-{
-}
-static inline void wbt_update_limits(struct rq_wb *rwb)
+static inline void wbt_update_limits(struct request_queue *q)
 {
 }
-static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
+static inline void wbt_disable_default(struct request_queue *q)
 {
 }
-static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
+static inline void wbt_enable_default(struct request_queue *q)
 {
 }
-static inline void wbt_disable_default(struct request_queue *q)
+static inline void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
 }
-static inline void wbt_enable_default(struct request_queue *q)
+static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
 {
 }
-static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
+static inline u64 wbt_get_min_lat(struct request_queue *q)
 {
+	return 0;
 }
-static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc)
+static inline void wbt_set_min_lat(struct request_queue *q, u64 val)
 {
 }
 static inline u64 wbt_default_latency_nsec(struct request_queue *q)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9d05646d5059..137759862f07 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -42,7 +42,7 @@ struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
 struct pr_ops;
-struct rq_wb;
+struct rq_qos;
 struct blk_queue_stats;
 struct blk_stat_callback;
 
@@ -443,7 +443,7 @@ struct request_queue {
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
 	struct blk_queue_stats	*stats;
-	struct rq_wb		*rq_wb;
+	struct rq_qos		*rq_qos;
 
 	/*
 	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
-- 
cgit v1.2.3


From d70675121546c35feaceebf7ed9caed8716640f3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:15:01 -0400
Subject: block: introduce blk-iolatency io controller

Current IO controllers for the block layer are less than ideal for our
use case.  The io.max controller is great at hard limiting, but it is
not work conserving.  This patch introduces io.latency.  You provide a
latency target for your group and we monitor the io in short windows to
make sure we are not exceeding those latency targets.  This makes use of
the rq-qos infrastructure and works much like the wbt stuff.  There are
a few differences from wbt

 - It's bio based, so the latency covers the whole block layer in addition to
   the actual io.
 - We will throttle all IO types that comes in here if we need to.
 - We use the mean latency over the 100ms window.  This is because writes can
   be particularly fast, which could give us a false sense of the impact of
   other workloads on our protected workload.
 - By default there's no throttling, we set the queue_depth to INT_MAX so that
   we can have as many outstanding bio's as we're allowed to.  Only at
   throttle time do we pay attention to the actual queue depth.
 - We backcharge cgroups for root cg issued IO and induce artificial
   delays in order to deal with cases like metadata only or swap heavy
   workloads.

In testing this has worked out relatively well.  Protected workloads
will throttle noisy workloads down to 1 io at time if they are doing
normal IO on their own, or induce up to a 1 second delay per syscall if
they are doing a lot of root issued IO (metadata/swap IO).

Our testing has revolved mostly around our production web servers where
we have hhvm (the web server application) in a protected group and
everything else in another group.  We see slightly higher requests per
second (RPS) on the test tier vs the control tier, and much more stable
RPS across all machines in the test tier vs the control tier.

Another test we run is a slow memory allocator in the unprotected group.
Before this would eventually push us into swap and cause the whole box
to die and not recover at all.  With these patches we see slight RPS
drops (usually 10-15%) before the memory consumer is properly killed and
things recover within seconds.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig             |  12 +
 block/Makefile            |   1 +
 block/blk-cgroup.c        |   8 +
 block/blk-iolatency.c     | 930 ++++++++++++++++++++++++++++++++++++++++++++++
 block/blk.h               |   6 +
 include/linux/blk_types.h |   2 -
 6 files changed, 957 insertions(+), 2 deletions(-)
 create mode 100644 block/blk-iolatency.c

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index dfe7bc770fc9..1f2469a0123c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -149,6 +149,18 @@ config BLK_WBT
 	dynamically on an algorithm loosely based on CoDel, factoring in
 	the realtime performance of the disk.
 
+config BLK_CGROUP_IOLATENCY
+	bool "Enable support for latency based cgroup IO protection"
+	depends on BLK_CGROUP=y
+	default n
+	---help---
+	Enabling this option enables the .latency interface for IO throttling.
+	The IO controller will attempt to maintain average IO latencies below
+	the configured latency target, throttling anybody with a higher latency
+	target than the victimized group.
+
+	Note, this is an experimental interface and could be changed someday.
+
 config BLK_WBT_SQ
 	bool "Single queue writeback throttling"
 	default n
diff --git a/block/Makefile b/block/Makefile
index 57d0f47ab05f..572b33f32c07 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
+obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d3310ec96c2a..7e2c19ce1a08 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1238,6 +1238,14 @@ int blkcg_init_queue(struct request_queue *q)
 	if (preloaded)
 		radix_tree_preload_end();
 
+	ret = blk_iolatency_init(q);
+	if (ret) {
+		spin_lock_irq(q->queue_lock);
+		blkg_destroy_all(q);
+		spin_unlock_irq(q->queue_lock);
+		return ret;
+	}
+
 	ret = blk_throtl_init(q);
 	if (ret) {
 		spin_lock_irq(q->queue_lock);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
new file mode 100644
index 000000000000..a35a1f580337
--- /dev/null
+++ b/block/blk-iolatency.c
@@ -0,0 +1,930 @@
+/*
+ * Block rq-qos base io controller
+ *
+ * This works similar to wbt with a few exceptions
+ *
+ * - It's bio based, so the latency covers the whole block layer in addition to
+ *   the actual io.
+ * - We will throttle all IO that comes in here if we need to.
+ * - We use the mean latency over the 100ms window.  This is because writes can
+ *   be particularly fast, which could give us a false sense of the impact of
+ *   other workloads on our protected workload.
+ * - By default there's no throttling, we set the queue_depth to INT_MAX so that
+ *   we can have as many outstanding bio's as we're allowed to.  Only at
+ *   throttle time do we pay attention to the actual queue depth.
+ *
+ * The hierarchy works like the cpu controller does, we track the latency at
+ * every configured node, and each configured node has it's own independent
+ * queue depth.  This means that we only care about our latency targets at the
+ * peer level.  Some group at the bottom of the hierarchy isn't going to affect
+ * a group at the end of some other path if we're only configred at leaf level.
+ *
+ * Consider the following
+ *
+ *                   root blkg
+ *             /                     \
+ *        fast (target=5ms)     slow (target=10ms)
+ *         /     \                  /        \
+ *       a        b          normal(15ms)   unloved
+ *
+ * "a" and "b" have no target, but their combined io under "fast" cannot exceed
+ * an average latency of 5ms.  If it does then we will throttle the "slow"
+ * group.  In the case of "normal", if it exceeds its 15ms target, we will
+ * throttle "unloved", but nobody else.
+ *
+ * In this example "fast", "slow", and "normal" will be the only groups actually
+ * accounting their io latencies.  We have to walk up the heirarchy to the root
+ * on every submit and complete so we can do the appropriate stat recording and
+ * adjust the queue depth of ourselves if needed.
+ *
+ * There are 2 ways we throttle IO.
+ *
+ * 1) Queue depth throttling.  As we throttle down we will adjust the maximum
+ * number of IO's we're allowed to have in flight.  This starts at (u64)-1 down
+ * to 1.  If the group is only ever submitting IO for itself then this is the
+ * only way we throttle.
+ *
+ * 2) Induced delay throttling.  This is for the case that a group is generating
+ * IO that has to be issued by the root cg to avoid priority inversion. So think
+ * REQ_META or REQ_SWAP.  If we are already at qd == 1 and we're getting a lot
+ * of work done for us on behalf of the root cg and are being asked to scale
+ * down more then we induce a latency at userspace return.  We accumulate the
+ * total amount of time we need to be punished by doing
+ *
+ * total_time += min_lat_nsec - actual_io_completion
+ *
+ * and then at throttle time will do
+ *
+ * throttle_time = min(total_time, NSEC_PER_SEC)
+ *
+ * This induced delay will throttle back the activity that is generating the
+ * root cg issued io's, wethere that's some metadata intensive operation or the
+ * group is using so much memory that it is pushing us into swap.
+ *
+ * Copyright (C) 2018 Josef Bacik
+ */
+#include <linux/kernel.h>
+#include <linux/blk_types.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/memcontrol.h>
+#include <linux/sched/signal.h>
+#include <trace/events/block.h>
+#include "blk-rq-qos.h"
+#include "blk-stat.h"
+
+#define DEFAULT_SCALE_COOKIE 1000000U
+
+static struct blkcg_policy blkcg_policy_iolatency;
+struct iolatency_grp;
+
+struct blk_iolatency {
+	struct rq_qos rqos;
+	struct timer_list timer;
+	atomic_t enabled;
+};
+
+static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
+{
+	return container_of(rqos, struct blk_iolatency, rqos);
+}
+
+static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
+{
+	return atomic_read(&blkiolat->enabled) > 0;
+}
+
+struct child_latency_info {
+	spinlock_t lock;
+
+	/* Last time we adjusted the scale of everybody. */
+	u64 last_scale_event;
+
+	/* The latency that we missed. */
+	u64 scale_lat;
+
+	/* Total io's from all of our children for the last summation. */
+	u64 nr_samples;
+
+	/* The guy who actually changed the latency numbers. */
+	struct iolatency_grp *scale_grp;
+
+	/* Cookie to tell if we need to scale up or down. */
+	atomic_t scale_cookie;
+};
+
+struct iolatency_grp {
+	struct blkg_policy_data pd;
+	struct blk_rq_stat __percpu *stats;
+	struct blk_iolatency *blkiolat;
+	struct rq_depth rq_depth;
+	struct rq_wait rq_wait;
+	atomic64_t window_start;
+	atomic_t scale_cookie;
+	u64 min_lat_nsec;
+	u64 cur_win_nsec;
+
+	/* total running average of our io latency. */
+	u64 total_lat_avg;
+	u64 total_lat_nr;
+
+	/* Our current number of IO's for the last summation. */
+	u64 nr_samples;
+
+	struct child_latency_info child_lat;
+};
+
+static inline struct iolatency_grp *pd_to_lat(struct blkg_policy_data *pd)
+{
+	return pd ? container_of(pd, struct iolatency_grp, pd) : NULL;
+}
+
+static inline struct iolatency_grp *blkg_to_lat(struct blkcg_gq *blkg)
+{
+	return pd_to_lat(blkg_to_pd(blkg, &blkcg_policy_iolatency));
+}
+
+static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
+{
+	return pd_to_blkg(&iolat->pd);
+}
+
+static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
+				       wait_queue_entry_t *wait,
+				       bool first_block)
+{
+	struct rq_wait *rqw = &iolat->rq_wait;
+
+	if (first_block && waitqueue_active(&rqw->wait) &&
+	    rqw->wait.head.next != &wait->entry)
+		return false;
+	return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
+}
+
+static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
+				       struct iolatency_grp *iolat,
+				       spinlock_t *lock, bool issue_as_root,
+				       bool use_memdelay)
+	__releases(lock)
+	__acquires(lock)
+{
+	struct rq_wait *rqw = &iolat->rq_wait;
+	unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
+	DEFINE_WAIT(wait);
+	bool first_block = true;
+
+	if (use_delay)
+		blkcg_schedule_throttle(rqos->q, use_memdelay);
+
+	/*
+	 * To avoid priority inversions we want to just take a slot if we are
+	 * issuing as root.  If we're being killed off there's no point in
+	 * delaying things, we may have been killed by OOM so throttling may
+	 * make recovery take even longer, so just let the IO's through so the
+	 * task can go away.
+	 */
+	if (issue_as_root || fatal_signal_pending(current)) {
+		atomic_inc(&rqw->inflight);
+		return;
+	}
+
+	if (iolatency_may_queue(iolat, &wait, first_block))
+		return;
+
+	do {
+		prepare_to_wait_exclusive(&rqw->wait, &wait,
+					  TASK_UNINTERRUPTIBLE);
+
+		if (iolatency_may_queue(iolat, &wait, first_block))
+			break;
+		first_block = false;
+
+		if (lock) {
+			spin_unlock_irq(lock);
+			io_schedule();
+			spin_lock_irq(lock);
+		} else {
+			io_schedule();
+		}
+	} while (1);
+
+	finish_wait(&rqw->wait, &wait);
+}
+
+#define SCALE_DOWN_FACTOR 2
+#define SCALE_UP_FACTOR 4
+
+static inline unsigned long scale_amount(unsigned long qd, bool up)
+{
+	return max(up ? qd >> SCALE_UP_FACTOR : qd >> SCALE_DOWN_FACTOR, 1UL);
+}
+
+/*
+ * We scale the qd down faster than we scale up, so we need to use this helper
+ * to adjust the scale_cookie accordingly so we don't prematurely get
+ * scale_cookie at DEFAULT_SCALE_COOKIE and unthrottle too much.
+ *
+ * Each group has their own local copy of the last scale cookie they saw, so if
+ * the global scale cookie goes up or down they know which way they need to go
+ * based on their last knowledge of it.
+ */
+static void scale_cookie_change(struct blk_iolatency *blkiolat,
+				struct child_latency_info *lat_info,
+				bool up)
+{
+	unsigned long qd = blk_queue_depth(blkiolat->rqos.q);
+	unsigned long scale = scale_amount(qd, up);
+	unsigned long old = atomic_read(&lat_info->scale_cookie);
+	unsigned long max_scale = qd << 1;
+	unsigned long diff = 0;
+
+	if (old < DEFAULT_SCALE_COOKIE)
+		diff = DEFAULT_SCALE_COOKIE - old;
+
+	if (up) {
+		if (scale + old > DEFAULT_SCALE_COOKIE)
+			atomic_set(&lat_info->scale_cookie,
+				   DEFAULT_SCALE_COOKIE);
+		else if (diff > qd)
+			atomic_inc(&lat_info->scale_cookie);
+		else
+			atomic_add(scale, &lat_info->scale_cookie);
+	} else {
+		/*
+		 * We don't want to dig a hole so deep that it takes us hours to
+		 * dig out of it.  Just enough that we don't throttle/unthrottle
+		 * with jagged workloads but can still unthrottle once pressure
+		 * has sufficiently dissipated.
+		 */
+		if (diff > qd) {
+			if (diff < max_scale)
+				atomic_dec(&lat_info->scale_cookie);
+		} else {
+			atomic_sub(scale, &lat_info->scale_cookie);
+		}
+	}
+}
+
+/*
+ * Change the queue depth of the iolatency_grp.  We add/subtract 1/16th of the
+ * queue depth at a time so we don't get wild swings and hopefully dial in to
+ * fairer distribution of the overall queue depth.
+ */
+static void scale_change(struct iolatency_grp *iolat, bool up)
+{
+	unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q);
+	unsigned long scale = scale_amount(qd, up);
+	unsigned long old = iolat->rq_depth.max_depth;
+	bool changed = false;
+
+	if (old > qd)
+		old = qd;
+
+	if (up) {
+		if (old == 1 && blkcg_unuse_delay(lat_to_blkg(iolat)))
+			return;
+
+		if (old < qd) {
+			changed = true;
+			old += scale;
+			old = min(old, qd);
+			iolat->rq_depth.max_depth = old;
+			wake_up_all(&iolat->rq_wait.wait);
+		}
+	} else if (old > 1) {
+		old >>= 1;
+		changed = true;
+		iolat->rq_depth.max_depth = max(old, 1UL);
+	}
+}
+
+/* Check our parent and see if the scale cookie has changed. */
+static void check_scale_change(struct iolatency_grp *iolat)
+{
+	struct iolatency_grp *parent;
+	struct child_latency_info *lat_info;
+	unsigned int cur_cookie;
+	unsigned int our_cookie = atomic_read(&iolat->scale_cookie);
+	u64 scale_lat;
+	unsigned int old;
+	int direction = 0;
+
+	if (lat_to_blkg(iolat)->parent == NULL)
+		return;
+
+	parent = blkg_to_lat(lat_to_blkg(iolat)->parent);
+	if (!parent)
+		return;
+
+	lat_info = &parent->child_lat;
+	cur_cookie = atomic_read(&lat_info->scale_cookie);
+	scale_lat = READ_ONCE(lat_info->scale_lat);
+
+	if (cur_cookie < our_cookie)
+		direction = -1;
+	else if (cur_cookie > our_cookie)
+		direction = 1;
+	else
+		return;
+
+	old = atomic_cmpxchg(&iolat->scale_cookie, our_cookie, cur_cookie);
+
+	/* Somebody beat us to the punch, just bail. */
+	if (old != our_cookie)
+		return;
+
+	if (direction < 0 && iolat->min_lat_nsec) {
+		u64 samples_thresh;
+
+		if (!scale_lat || iolat->min_lat_nsec <= scale_lat)
+			return;
+
+		/*
+		 * Sometimes high priority groups are their own worst enemy, so
+		 * instead of taking it out on some poor other group that did 5%
+		 * or less of the IO's for the last summation just skip this
+		 * scale down event.
+		 */
+		samples_thresh = lat_info->nr_samples * 5;
+		samples_thresh = div64_u64(samples_thresh, 100);
+		if (iolat->nr_samples <= samples_thresh)
+			return;
+	}
+
+	/* We're as low as we can go. */
+	if (iolat->rq_depth.max_depth == 1 && direction < 0) {
+		blkcg_use_delay(lat_to_blkg(iolat));
+		return;
+	}
+
+	/* We're back to the default cookie, unthrottle all the things. */
+	if (cur_cookie == DEFAULT_SCALE_COOKIE) {
+		blkcg_clear_delay(lat_to_blkg(iolat));
+		iolat->rq_depth.max_depth = INT_MAX;
+		wake_up_all(&iolat->rq_wait.wait);
+		return;
+	}
+
+	scale_change(iolat, direction > 0);
+}
+
+static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
+				     spinlock_t *lock)
+{
+	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	struct request_queue *q = rqos->q;
+	bool issue_as_root = bio_issue_as_root_blkg(bio);
+
+	if (!blk_iolatency_enabled(blkiolat))
+		return;
+
+	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
+	bio_associate_blkcg(bio, &blkcg->css);
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		if (!lock)
+			spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		if (!lock)
+			spin_unlock_irq(q->queue_lock);
+	}
+	if (!blkg)
+		goto out;
+
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+	bio_associate_blkg(bio, blkg);
+out:
+	rcu_read_unlock();
+	while (blkg && blkg->parent) {
+		struct iolatency_grp *iolat = blkg_to_lat(blkg);
+		if (!iolat) {
+			blkg = blkg->parent;
+			continue;
+		}
+
+		check_scale_change(iolat);
+		__blkcg_iolatency_throttle(rqos, iolat, lock, issue_as_root,
+				     (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+		blkg = blkg->parent;
+	}
+	if (!timer_pending(&blkiolat->timer))
+		mod_timer(&blkiolat->timer, jiffies + HZ);
+}
+
+static void iolatency_record_time(struct iolatency_grp *iolat,
+				  struct bio_issue *issue, u64 now,
+				  bool issue_as_root)
+{
+	struct blk_rq_stat *rq_stat;
+	u64 start = bio_issue_time(issue);
+	u64 req_time;
+
+	if (now <= start)
+		return;
+
+	req_time = now - start;
+
+	/*
+	 * We don't want to count issue_as_root bio's in the cgroups latency
+	 * statistics as it could skew the numbers downwards.
+	 */
+	if (unlikely(issue_as_root && iolat->rq_depth.max_depth != (u64)-1)) {
+		u64 sub = iolat->min_lat_nsec;
+		if (req_time < sub)
+			blkcg_add_delay(lat_to_blkg(iolat), now, sub - req_time);
+		return;
+	}
+
+	rq_stat = get_cpu_ptr(iolat->stats);
+	blk_rq_stat_add(rq_stat, req_time);
+	put_cpu_ptr(rq_stat);
+}
+
+#define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
+#define BLKIOLATENCY_MIN_GOOD_SAMPLES 5
+
+static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
+{
+	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+	struct iolatency_grp *parent;
+	struct child_latency_info *lat_info;
+	struct blk_rq_stat stat;
+	unsigned long flags;
+	int cpu;
+
+	blk_rq_stat_init(&stat);
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		struct blk_rq_stat *s;
+		s = per_cpu_ptr(iolat->stats, cpu);
+		blk_rq_stat_sum(&stat, s);
+		blk_rq_stat_init(s);
+	}
+	preempt_enable();
+
+	/*
+	 * Our average exceeded our window, scale up our window so we are more
+	 * accurate, but not more than the global timer.
+	 */
+	if (stat.mean > iolat->cur_win_nsec) {
+		iolat->cur_win_nsec <<= 1;
+		iolat->cur_win_nsec =
+			max_t(u64, iolat->cur_win_nsec, NSEC_PER_SEC);
+	}
+
+	parent = blkg_to_lat(blkg->parent);
+	if (!parent)
+		return;
+
+	lat_info = &parent->child_lat;
+
+	iolat->total_lat_avg =
+		div64_u64((iolat->total_lat_avg * iolat->total_lat_nr) +
+			  stat.mean, iolat->total_lat_nr + 1);
+
+	iolat->total_lat_nr++;
+
+	/* Everything is ok and we don't need to adjust the scale. */
+	if (stat.mean <= iolat->min_lat_nsec &&
+	    atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
+		return;
+
+	/* Somebody beat us to the punch, just bail. */
+	spin_lock_irqsave(&lat_info->lock, flags);
+	lat_info->nr_samples -= iolat->nr_samples;
+	lat_info->nr_samples += stat.nr_samples;
+	iolat->nr_samples = stat.nr_samples;
+
+	if ((lat_info->last_scale_event >= now ||
+	    now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
+	    lat_info->scale_lat <= iolat->min_lat_nsec)
+		goto out;
+
+	if (stat.mean <= iolat->min_lat_nsec &&
+	    stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
+		if (lat_info->scale_grp == iolat) {
+			lat_info->last_scale_event = now;
+			scale_cookie_change(iolat->blkiolat, lat_info, true);
+		}
+	} else if (stat.mean > iolat->min_lat_nsec) {
+		lat_info->last_scale_event = now;
+		if (!lat_info->scale_grp ||
+		    lat_info->scale_lat > iolat->min_lat_nsec) {
+			WRITE_ONCE(lat_info->scale_lat, iolat->min_lat_nsec);
+			lat_info->scale_grp = iolat;
+		}
+		scale_cookie_change(iolat->blkiolat, lat_info, false);
+	}
+out:
+	spin_unlock_irqrestore(&lat_info->lock, flags);
+}
+
+static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
+{
+	struct blkcg_gq *blkg;
+	struct rq_wait *rqw;
+	struct iolatency_grp *iolat;
+	u64 window_start;
+	u64 now = ktime_to_ns(ktime_get());
+	bool issue_as_root = bio_issue_as_root_blkg(bio);
+	bool enabled = false;
+
+	blkg = bio->bi_blkg;
+	if (!blkg)
+		return;
+
+	iolat = blkg_to_lat(bio->bi_blkg);
+	if (!iolat)
+		return;
+
+	enabled = blk_iolatency_enabled(iolat->blkiolat);
+	while (blkg && blkg->parent) {
+		iolat = blkg_to_lat(blkg);
+		if (!iolat) {
+			blkg = blkg->parent;
+			continue;
+		}
+		rqw = &iolat->rq_wait;
+
+		atomic_dec(&rqw->inflight);
+		if (!enabled || iolat->min_lat_nsec == 0)
+			goto next;
+		iolatency_record_time(iolat, &bio->bi_issue, now,
+				      issue_as_root);
+		window_start = atomic64_read(&iolat->window_start);
+		if (now > window_start &&
+		    (now - window_start) >= iolat->cur_win_nsec) {
+			if (atomic64_cmpxchg(&iolat->window_start,
+					window_start, now) == window_start)
+				iolatency_check_latencies(iolat, now);
+		}
+next:
+		wake_up(&rqw->wait);
+		blkg = blkg->parent;
+	}
+}
+
+static void blkcg_iolatency_cleanup(struct rq_qos *rqos, struct bio *bio)
+{
+	struct blkcg_gq *blkg;
+
+	blkg = bio->bi_blkg;
+	while (blkg && blkg->parent) {
+		struct rq_wait *rqw;
+		struct iolatency_grp *iolat;
+
+		iolat = blkg_to_lat(blkg);
+		if (!iolat)
+			goto next;
+
+		rqw = &iolat->rq_wait;
+		atomic_dec(&rqw->inflight);
+		wake_up(&rqw->wait);
+next:
+		blkg = blkg->parent;
+	}
+}
+
+static void blkcg_iolatency_exit(struct rq_qos *rqos)
+{
+	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
+
+	del_timer_sync(&blkiolat->timer);
+	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
+	kfree(blkiolat);
+}
+
+static struct rq_qos_ops blkcg_iolatency_ops = {
+	.throttle = blkcg_iolatency_throttle,
+	.cleanup = blkcg_iolatency_cleanup,
+	.done_bio = blkcg_iolatency_done_bio,
+	.exit = blkcg_iolatency_exit,
+};
+
+static void blkiolatency_timer_fn(struct timer_list *t)
+{
+	struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
+	struct blkcg_gq *blkg;
+	struct cgroup_subsys_state *pos_css;
+	u64 now = ktime_to_ns(ktime_get());
+
+	rcu_read_lock();
+	blkg_for_each_descendant_pre(blkg, pos_css,
+				     blkiolat->rqos.q->root_blkg) {
+		struct iolatency_grp *iolat;
+		struct child_latency_info *lat_info;
+		unsigned long flags;
+		u64 cookie;
+
+		/*
+		 * We could be exiting, don't access the pd unless we have a
+		 * ref on the blkg.
+		 */
+		if (!blkg_try_get(blkg))
+			continue;
+
+		iolat = blkg_to_lat(blkg);
+		if (!iolat)
+			continue;
+
+		lat_info = &iolat->child_lat;
+		cookie = atomic_read(&lat_info->scale_cookie);
+
+		if (cookie >= DEFAULT_SCALE_COOKIE)
+			goto next;
+
+		spin_lock_irqsave(&lat_info->lock, flags);
+		if (lat_info->last_scale_event >= now)
+			goto next_lock;
+
+		/*
+		 * We scaled down but don't have a scale_grp, scale up and carry
+		 * on.
+		 */
+		if (lat_info->scale_grp == NULL) {
+			scale_cookie_change(iolat->blkiolat, lat_info, true);
+			goto next_lock;
+		}
+
+		/*
+		 * It's been 5 seconds since our last scale event, clear the
+		 * scale grp in case the group that needed the scale down isn't
+		 * doing any IO currently.
+		 */
+		if (now - lat_info->last_scale_event >=
+		    ((u64)NSEC_PER_SEC * 5))
+			lat_info->scale_grp = NULL;
+next_lock:
+		spin_unlock_irqrestore(&lat_info->lock, flags);
+next:
+		blkg_put(blkg);
+	}
+	rcu_read_unlock();
+}
+
+int blk_iolatency_init(struct request_queue *q)
+{
+	struct blk_iolatency *blkiolat;
+	struct rq_qos *rqos;
+	int ret;
+
+	blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
+	if (!blkiolat)
+		return -ENOMEM;
+
+	rqos = &blkiolat->rqos;
+	rqos->id = RQ_QOS_CGROUP;
+	rqos->ops = &blkcg_iolatency_ops;
+	rqos->q = q;
+
+	rq_qos_add(q, rqos);
+
+	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
+	if (ret) {
+		rq_qos_del(q, rqos);
+		kfree(blkiolat);
+		return ret;
+	}
+
+	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+
+	return 0;
+}
+
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+{
+	struct iolatency_grp *iolat = blkg_to_lat(blkg);
+	struct blk_iolatency *blkiolat = iolat->blkiolat;
+	u64 oldval = iolat->min_lat_nsec;
+
+	iolat->min_lat_nsec = val;
+	iolat->cur_win_nsec = max_t(u64, val << 4, 100 * NSEC_PER_MSEC);
+	iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, NSEC_PER_SEC);
+
+	if (!oldval && val)
+		atomic_inc(&blkiolat->enabled);
+	if (oldval && !val)
+		atomic_dec(&blkiolat->enabled);
+}
+
+static void iolatency_clear_scaling(struct blkcg_gq *blkg)
+{
+	if (blkg->parent) {
+		struct iolatency_grp *iolat = blkg_to_lat(blkg->parent);
+		struct child_latency_info *lat_info;
+		if (!iolat)
+			return;
+
+		lat_info = &iolat->child_lat;
+		spin_lock(&lat_info->lock);
+		atomic_set(&lat_info->scale_cookie, DEFAULT_SCALE_COOKIE);
+		lat_info->last_scale_event = 0;
+		lat_info->scale_grp = NULL;
+		lat_info->scale_lat = 0;
+		spin_unlock(&lat_info->lock);
+	}
+}
+
+static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
+			     size_t nbytes, loff_t off)
+{
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
+	struct blkcg_gq *blkg;
+	struct blk_iolatency *blkiolat;
+	struct blkg_conf_ctx ctx;
+	struct iolatency_grp *iolat;
+	char *p, *tok;
+	u64 lat_val = 0;
+	u64 oldval;
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
+	if (ret)
+		return ret;
+
+	iolat = blkg_to_lat(ctx.blkg);
+	blkiolat = iolat->blkiolat;
+	p = ctx.body;
+
+	ret = -EINVAL;
+	while ((tok = strsep(&p, " "))) {
+		char key[16];
+		char val[21];	/* 18446744073709551616 */
+
+		if (sscanf(tok, "%15[^=]=%20s", key, val) != 2)
+			goto out;
+
+		if (!strcmp(key, "target")) {
+			u64 v;
+
+			if (!strcmp(val, "max"))
+				lat_val = 0;
+			else if (sscanf(val, "%llu", &v) == 1)
+				lat_val = v * NSEC_PER_USEC;
+			else
+				goto out;
+		} else {
+			goto out;
+		}
+	}
+
+	/* Walk up the tree to see if our new val is lower than it should be. */
+	blkg = ctx.blkg;
+	oldval = iolat->min_lat_nsec;
+
+	iolatency_set_min_lat_nsec(blkg, lat_val);
+	if (oldval != iolat->min_lat_nsec) {
+		iolatency_clear_scaling(blkg);
+	}
+
+	ret = 0;
+out:
+	blkg_conf_finish(&ctx);
+	return ret ?: nbytes;
+}
+
+static u64 iolatency_prfill_limit(struct seq_file *sf,
+				  struct blkg_policy_data *pd, int off)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	const char *dname = blkg_dev_name(pd->blkg);
+
+	if (!dname || !iolat->min_lat_nsec)
+		return 0;
+	seq_printf(sf, "%s target=%llu\n",
+		   dname,
+		   (unsigned long long)iolat->min_lat_nsec / NSEC_PER_USEC);
+	return 0;
+}
+
+static int iolatency_print_limit(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  iolatency_prfill_limit,
+			  &blkcg_policy_iolatency, seq_cft(sf)->private, false);
+	return 0;
+}
+
+static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
+				size_t size)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	unsigned long long avg_lat = div64_u64(iolat->total_lat_avg, NSEC_PER_USEC);
+
+	if (iolat->rq_depth.max_depth == (u64)-1)
+		return scnprintf(buf, size, " depth=max avg_lat=%llu",
+				 avg_lat);
+
+	return scnprintf(buf, size, " depth=%u avg_lat=%llu",
+			 iolat->rq_depth.max_depth, avg_lat);
+}
+
+
+static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
+{
+	struct iolatency_grp *iolat;
+
+	iolat = kzalloc_node(sizeof(*iolat), gfp, node);
+	if (!iolat)
+		return NULL;
+	iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
+				       __alignof__(struct blk_rq_stat), gfp);
+	if (!iolat->stats) {
+		kfree(iolat);
+		return NULL;
+	}
+	return &iolat->pd;
+}
+
+static void iolatency_pd_init(struct blkg_policy_data *pd)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+	struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
+	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
+	u64 now = ktime_to_ns(ktime_get());
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct blk_rq_stat *stat;
+		stat = per_cpu_ptr(iolat->stats, cpu);
+		blk_rq_stat_init(stat);
+	}
+
+	rq_wait_init(&iolat->rq_wait);
+	spin_lock_init(&iolat->child_lat.lock);
+	iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q);
+	iolat->rq_depth.max_depth = INT_MAX;
+	iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
+	iolat->blkiolat = blkiolat;
+	iolat->cur_win_nsec = 100 * NSEC_PER_MSEC;
+	atomic64_set(&iolat->window_start, now);
+
+	/*
+	 * We init things in list order, so the pd for the parent may not be
+	 * init'ed yet for whatever reason.
+	 */
+	if (blkg->parent && blkg_to_pd(blkg->parent, &blkcg_policy_iolatency)) {
+		struct iolatency_grp *parent = blkg_to_lat(blkg->parent);
+		atomic_set(&iolat->scale_cookie,
+			   atomic_read(&parent->child_lat.scale_cookie));
+	} else {
+		atomic_set(&iolat->scale_cookie, DEFAULT_SCALE_COOKIE);
+	}
+
+	atomic_set(&iolat->child_lat.scale_cookie, DEFAULT_SCALE_COOKIE);
+}
+
+static void iolatency_pd_offline(struct blkg_policy_data *pd)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+
+	iolatency_set_min_lat_nsec(blkg, 0);
+	iolatency_clear_scaling(blkg);
+}
+
+static void iolatency_pd_free(struct blkg_policy_data *pd)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	free_percpu(iolat->stats);
+	kfree(iolat);
+}
+
+static struct cftype iolatency_files[] = {
+	{
+		.name = "latency",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = iolatency_print_limit,
+		.write = iolatency_set_limit,
+	},
+	{}
+};
+
+static struct blkcg_policy blkcg_policy_iolatency = {
+	.dfl_cftypes	= iolatency_files,
+	.pd_alloc_fn	= iolatency_pd_alloc,
+	.pd_init_fn	= iolatency_pd_init,
+	.pd_offline_fn	= iolatency_pd_offline,
+	.pd_free_fn	= iolatency_pd_free,
+	.pd_stat_fn	= iolatency_pd_stat,
+};
+
+static int __init iolatency_init(void)
+{
+	return blkcg_policy_register(&blkcg_policy_iolatency);
+}
+
+static void __exit iolatency_exit(void)
+{
+	return blkcg_policy_unregister(&blkcg_policy_iolatency);
+}
+
+module_init(iolatency_init);
+module_exit(iolatency_exit);
diff --git a/block/blk.h b/block/blk.h
index 8d23aea96ce9..69b14cd2bb22 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -412,4 +412,10 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 
 extern void blk_drain_queue(struct request_queue *q);
 
+#ifdef CONFIG_BLK_CGROUP_IOLATENCY
+extern int blk_iolatency_init(struct request_queue *q);
+#else
+static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
+#endif
+
 #endif /* BLK_INTERNAL_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0ffc34c5cc83..e13449a379a1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -180,9 +180,7 @@ struct bio {
 	struct io_context	*bi_ioc;
 	struct cgroup_subsys_state *bi_css;
 	struct blkcg_gq		*bi_blkg;
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	struct bio_issue	bi_issue;
-#endif
 #endif
 	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-- 
cgit v1.2.3


From d8095f94e19581057bcad35b8a725aa739e77595 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 2 Jul 2018 15:08:10 +0200
Subject: dmaengine: add support for reporting pause and resume separately

'cmd_pause' DMA channel capability means that respective DMA engine
supports both pausing and resuming given DMA channel. However, in some
cases it is important to know if DMA channel can be paused without the
need to resume it. This is a typical requirement for proper residue
reading on transfer timeout in UART drivers. There are also some DMA
engines with limited hardware, which doesn't really support resuming.

Reporting pause and resume capabilities separately allows UART drivers to
properly check for the really required capabilities and operate in DMA
mode also in systems with limited DMA hardware. On the other hand drivers,
which rely on full channel suspend/resume support, should now check for
both 'pause' and 'resume' features.

Existing clients of dma_get_slave_caps() have been checked and the only
driver which rely on proper channel resuming is soc-generic-dmaengine-pcm
driver, which has been updated to check the newly added capability.
Existing 'cmd_pause' now only indicates that DMA engine support pausing
given DMA channel.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c               | 8 ++------
 include/linux/dmaengine.h             | 5 ++++-
 sound/soc/soc-generic-dmaengine-pcm.c | 2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 08ba8473a284..84ac38dbdb65 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -500,12 +500,8 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 	caps->max_burst = device->max_burst;
 	caps->residue_granularity = device->residue_granularity;
 	caps->descriptor_reuse = device->descriptor_reuse;
-
-	/*
-	 * Some devices implement only pause (e.g. to get residuum) but no
-	 * resume. However cmd_pause is advertised as pause AND resume.
-	 */
-	caps->cmd_pause = !!(device->device_pause && device->device_resume);
+	caps->cmd_pause = !!device->device_pause;
+	caps->cmd_resume = !!device->device_resume;
 	caps->cmd_terminate = !!device->device_terminate_all;
 
 	return 0;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 861be5cab1df..c8c3a7a93802 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -415,7 +415,9 @@ enum dma_residue_granularity {
  *	each type, the dma controller should set BIT(<TYPE>) and same
  *	should be checked by controller as well
  * @max_burst: max burst capability per-transfer
- * @cmd_pause: true, if pause and thereby resume is supported
+ * @cmd_pause: true, if pause is supported (i.e. for reading residue or
+ *	       for resume later)
+ * @cmd_resume: true, if resume is supported
  * @cmd_terminate: true, if terminate cmd is supported
  * @residue_granularity: granularity of the reported transfer residue
  * @descriptor_reuse: if a descriptor can be reused by client and
@@ -427,6 +429,7 @@ struct dma_slave_caps {
 	u32 directions;
 	u32 max_burst;
 	bool cmd_pause;
+	bool cmd_resume;
 	bool cmd_terminate;
 	enum dma_residue_granularity residue_granularity;
 	bool descriptor_reuse;
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index 56a541b9ff9e..76c46d793843 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -156,7 +156,7 @@ static int dmaengine_pcm_set_runtime_hwparams(struct snd_pcm_substream *substrea
 
 	ret = dma_get_slave_caps(chan, &dma_caps);
 	if (ret == 0) {
-		if (dma_caps.cmd_pause)
+		if (dma_caps.cmd_pause && dma_caps.cmd_resume)
 			hw.info |= SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME;
 		if (dma_caps.residue_granularity <= DMA_RESIDUE_GRANULARITY_SEGMENT)
 			hw.info |= SNDRV_PCM_INFO_BATCH;
-- 
cgit v1.2.3


From ffcfe25bb50f27395e15fa999f1a7eb769f55360 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:38 -0400
Subject: net: Add support for subordinate device traffic classes

This patch is meant to provide the basic tools needed to allow us to create
subordinate device traffic classes. The general idea here is to allow
subdividing the queues of a device into queue groups accessible through an
upper device such as a macvlan.

The idea here is to enforce the idea that an upper device has to be a
single queue device, ideally with IFF_NO_QUQUE set. With that being the
case we can pretty much guarantee that the tc_to_txq mappings and XPS maps
for the upper device are unused. As such we could reuse those in order to
support subdividing the lower device and distributing those queues between
the subordinate devices.

In order to distinguish between a regular set of traffic classes and if a
device is carrying subordinate traffic classes I changed num_tc from a u8
to a s16 value and use the negative values to represent the subordinate
pool values. So starting at -1 and running to -32768 we can encode those as
pool values, and the existing values of 0 to 15 can be maintained.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/netdevice.h | 16 ++++++++-
 net/core/dev.c            | 89 +++++++++++++++++++++++++++++++++++++++++++++++
 net/core/net-sysfs.c      | 21 ++++++++++-
 3 files changed, 124 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b683971e500d..b1ff77276bc4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -575,6 +575,9 @@ struct netdev_queue {
 	 * (/sys/class/net/DEV/Q/trans_timeout)
 	 */
 	unsigned long		trans_timeout;
+
+	/* Subordinate device that the queue has been assigned to */
+	struct net_device	*sb_dev;
 /*
  * write-mostly part
  */
@@ -1991,7 +1994,7 @@ struct net_device {
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-	u8			num_tc;
+	s16			num_tc;
 	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
 	u8			prio_tc_map[TC_BITMASK + 1];
 
@@ -2045,6 +2048,17 @@ int netdev_get_num_tc(struct net_device *dev)
 	return dev->num_tc;
 }
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev);
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+				 struct net_device *sb_dev,
+				 u8 tc, u16 count, u16 offset);
+int netdev_set_sb_channel(struct net_device *dev, u16 channel);
+static inline int netdev_get_sb_channel(struct net_device *dev)
+{
+	return max_t(int, -dev->num_tc, 0);
+}
+
 static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 					 unsigned int index)
diff --git a/net/core/dev.c b/net/core/dev.c
index 89825c1eccdc..cc1d6bba017a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2067,11 +2067,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 		struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
 		int i;
 
+		/* walk through the TCs and see if it falls into any of them */
 		for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
 			if ((txq - tc->offset) < tc->count)
 				return i;
 		}
 
+		/* didn't find it, just return -1 to indicate no match */
 		return -1;
 	}
 
@@ -2260,7 +2262,14 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	unsigned int nr_ids;
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
@@ -2448,11 +2457,25 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
+static void netdev_unbind_all_sb_channels(struct net_device *dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+	/* Unbind any subordinate channels */
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev)
+			netdev_unbind_sb_channel(dev, txq->sb_dev);
+	}
+}
+
 void netdev_reset_tc(struct net_device *dev)
 {
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
+	/* Reset TC configuration of device */
 	dev->num_tc = 0;
 	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
 	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
@@ -2481,11 +2504,77 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
 	dev->num_tc = num_tc;
 	return 0;
 }
 EXPORT_SYMBOL(netdev_set_num_tc);
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+	netif_reset_xps_queues_gt(sb_dev, 0);
+#endif
+	memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+	memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev == sb_dev)
+			txq->sb_dev = NULL;
+	}
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+				 struct net_device *sb_dev,
+				 u8 tc, u16 count, u16 offset)
+{
+	/* Make certain the sb_dev and dev are already configured */
+	if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
+		return -EINVAL;
+
+	/* We cannot hand out queues we don't have */
+	if ((offset + count) > dev->real_num_tx_queues)
+		return -EINVAL;
+
+	/* Record the mapping */
+	sb_dev->tc_to_txq[tc].count = count;
+	sb_dev->tc_to_txq[tc].offset = offset;
+
+	/* Provide a way for Tx queue to find the tc_to_txq map or
+	 * XPS map for itself.
+	 */
+	while (count--)
+		netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
+
+int netdev_set_sb_channel(struct net_device *dev, u16 channel)
+{
+	/* Do not use a multiqueue device to represent a subordinate channel */
+	if (netif_is_multiqueue(dev))
+		return -ENODEV;
+
+	/* We allow channels 1 - 32767 to be used for subordinate channels.
+	 * Channel 0 is meant to be "native" mode and used only to represent
+	 * the main root device. We allow writing 0 to reset the device back
+	 * to normal mode after being used as a subordinate channel.
+	 */
+	if (channel > S16_MAX)
+		return -EINVAL;
+
+	dev->num_tc = -channel;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_sb_channel);
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index dce3ae0fbca2..ffa1d18f2c2c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1054,11 +1054,23 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
 		return -ENOENT;
 
 	index = get_netdev_queue_index(queue);
+
+	/* If queue belongs to subordinate dev use its TC mapping */
+	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 	tc = netdev_txq_to_tc(dev, index);
 	if (tc < 0)
 		return -EINVAL;
 
-	return sprintf(buf, "%u\n", tc);
+	/* We can report the traffic class one of two ways:
+	 * Subordinate device traffic classes are reported with the traffic
+	 * class first, and then the subordinate class so for example TC0 on
+	 * subordinate device 2 will be reported as "0-2". If the queue
+	 * belongs to the root device it will be reported with just the
+	 * traffic class, so just "0" for TC 0 for example.
+	 */
+	return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
+				 sprintf(buf, "%u\n", tc);
 }
 
 #ifdef CONFIG_XPS
@@ -1225,7 +1237,14 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	index = get_netdev_queue_index(queue);
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
-- 
cgit v1.2.3


From eadec877ce9ca46a94e9036b5a44e7941d4fc501 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:48 -0400
Subject: net: Add support for subordinate traffic classes to netdev_pick_tx

This change makes it so that we can support the concept of subordinate
device traffic classes to the core networking code. In doing this we can
start pulling out the driver specific bits needed to support selecting a
queue based on an upper device.

The solution at is currently stands is only partially implemented. I have
the start of some XPS bits in here, but I would still need to allow for
configuration of the XPS maps on the queues reserved for the subordinate
devices. For now I am using the reference to the sb_dev XPS map as just a
way to skip the lookup of the lower device XPS map for now as that would
result in the wrong queue being picked.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 19 +++------
 drivers/net/macvlan.c                         | 10 +----
 include/linux/netdevice.h                     |  4 +-
 net/core/dev.c                                | 58 ++++++++++++++++-----------
 4 files changed, 45 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 80225af2acb1..abb176df2e7f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8208,20 +8208,17 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 					      input, common, ring->queue_index);
 }
 
+#ifdef IXGBE_FCOE
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 			      void *accel_priv, select_queue_fallback_t fallback)
 {
-	struct ixgbe_fwd_adapter *fwd_adapter = accel_priv;
-#ifdef IXGBE_FCOE
 	struct ixgbe_adapter *adapter;
 	struct ixgbe_ring_feature *f;
-#endif
 	int txq;
 
-	if (fwd_adapter) {
-		u8 tc = netdev_get_num_tc(dev) ?
-			netdev_get_prio_tc_map(dev, skb->priority) : 0;
-		struct net_device *vdev = fwd_adapter->netdev;
+	if (accel_priv) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		struct net_device *vdev = accel_priv;
 
 		txq = vdev->tc_to_txq[tc].offset;
 		txq += reciprocal_scale(skb_get_hash(skb),
@@ -8230,8 +8227,6 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 		return txq;
 	}
 
-#ifdef IXGBE_FCOE
-
 	/*
 	 * only execute the code below if protocol is FCoE
 	 * or FIP and we have FCoE enabled on the adapter
@@ -8257,11 +8252,9 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 		txq -= f->indices;
 
 	return txq + f->offset;
-#else
-	return fallback(dev, skb);
-#endif
 }
 
+#endif
 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 			       struct xdp_frame *xdpf)
 {
@@ -10058,7 +10051,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
 	.ndo_start_xmit		= ixgbe_xmit_frame,
-	.ndo_select_queue	= ixgbe_select_queue,
 	.ndo_set_rx_mode	= ixgbe_set_rx_mode,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= ixgbe_set_mac,
@@ -10081,6 +10073,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_poll_controller	= ixgbe_netpoll,
 #endif
 #ifdef IXGBE_FCOE
+	.ndo_select_queue	= ixgbe_select_queue,
 	.ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,
 	.ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target,
 	.ndo_fcoe_ddp_done = ixgbe_fcoe_ddp_put,
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index adde8fc45588..401e1d1ce1ec 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -514,7 +514,6 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	const struct macvlan_dev *vlan = netdev_priv(dev);
 	const struct macvlan_port *port = vlan->port;
 	const struct macvlan_dev *dest;
-	void *accel_priv = NULL;
 
 	if (vlan->mode == MACVLAN_MODE_BRIDGE) {
 		const struct ethhdr *eth = (void *)skb->data;
@@ -533,15 +532,10 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NET_XMIT_SUCCESS;
 		}
 	}
-
-	/* For packets that are non-multicast and not bridged we will pass
-	 * the necessary information so that the lowerdev can distinguish
-	 * the source of the packets via the accel_priv value.
-	 */
-	accel_priv = vlan->accel_priv;
 xmit_world:
 	skb->dev = vlan->lowerdev;
-	return dev_queue_xmit_accel(skb, accel_priv);
+	return dev_queue_xmit_accel(skb,
+				    netdev_get_sb_channel(dev) ? dev : NULL);
 }
 
 static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b1ff77276bc4..fda0bcda7a42 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2103,7 +2103,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
-				    void *accel_priv);
+				    struct net_device *sb_dev);
 
 /* returns the headroom that the master device needs to take in account
  * when forwarding to this dev
@@ -2568,7 +2568,7 @@ void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
diff --git a/net/core/dev.c b/net/core/dev.c
index cc1d6bba017a..09a7cc2f3c55 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2786,24 +2786,26 @@ EXPORT_SYMBOL(netif_device_attach);
  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
  * to be used as a distribution range.
  */
-static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
+static u16 skb_tx_hash(const struct net_device *dev,
+		       const struct net_device *sb_dev,
+		       struct sk_buff *skb)
 {
 	u32 hash;
 	u16 qoffset = 0;
 	u16 qcount = dev->real_num_tx_queues;
 
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+
+		qoffset = sb_dev->tc_to_txq[tc].offset;
+		qcount = sb_dev->tc_to_txq[tc].count;
+	}
+
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
 		while (unlikely(hash >= qcount))
 			hash -= qcount;
-		return hash;
-	}
-
-	if (dev->num_tc) {
-		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-
-		qoffset = dev->tc_to_txq[tc].offset;
-		qcount = dev->tc_to_txq[tc].count;
+		return hash + qoffset;
 	}
 
 	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
@@ -3573,7 +3575,8 @@ static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 }
 #endif
 
-static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
+			 struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
@@ -3587,7 +3590,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	if (!static_key_false(&xps_rxqs_needed))
 		goto get_cpus_map;
 
-	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
 	if (dev_maps) {
 		int tci = sk_rx_queue_get(sk);
 
@@ -3598,7 +3601,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 
 get_cpus_map:
 	if (queue_index < 0) {
-		dev_maps = rcu_dereference(dev->xps_cpus_map);
+		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
 		if (dev_maps) {
 			unsigned int tci = skb->sender_cpu - 1;
 
@@ -3614,17 +3617,20 @@ get_cpus_map:
 #endif
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+			     struct net_device *sb_dev)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
 
+	sb_dev = sb_dev ? : dev;
+
 	if (queue_index < 0 || skb->ooo_okay ||
 	    queue_index >= dev->real_num_tx_queues) {
-		int new_index = get_xps_queue(dev, skb);
+		int new_index = get_xps_queue(dev, sb_dev, skb);
 
 		if (new_index < 0)
-			new_index = skb_tx_hash(dev, skb);
+			new_index = skb_tx_hash(dev, sb_dev, skb);
 
 		if (queue_index != new_index && sk &&
 		    sk_fullsock(sk) &&
@@ -3637,9 +3643,15 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 	return queue_index;
 }
 
+static u16 __netdev_pick_tx(struct net_device *dev,
+			    struct sk_buff *skb)
+{
+	return ___netdev_pick_tx(dev, skb, NULL);
+}
+
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
-				    void *accel_priv)
+				    struct net_device *sb_dev)
 {
 	int queue_index = 0;
 
@@ -3654,10 +3666,10 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+			queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
 							    __netdev_pick_tx);
 		else
-			queue_index = __netdev_pick_tx(dev, skb);
+			queue_index = ___netdev_pick_tx(dev, skb, sb_dev);
 
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
@@ -3669,7 +3681,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 /**
  *	__dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
- *	@accel_priv: private data used for L2 forwarding offload
+ *	@sb_dev: suboordinate device used for L2 forwarding offload
  *
  *	Queue a buffer for transmission to a network device. The caller must
  *	have set the device and priority and built the buffer before calling
@@ -3692,7 +3704,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
@@ -3731,7 +3743,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 	else
 		skb_dst_force(skb);
 
-	txq = netdev_pick_tx(dev, skb, accel_priv);
+	txq = netdev_pick_tx(dev, skb, sb_dev);
 	q = rcu_dereference_bh(txq->qdisc);
 
 	trace_net_dev_queue(skb);
@@ -3805,9 +3817,9 @@ int dev_queue_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_queue_xmit);
 
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
 {
-	return __dev_queue_xmit(skb, accel_priv);
+	return __dev_queue_xmit(skb, sb_dev);
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 
-- 
cgit v1.2.3


From a4ea8a3dacc312c3402c78f6e4843afdda9b43a0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:54 -0400
Subject: net: Add generic ndo_select_queue functions

This patch adds a generic version of the ndo_select_queue functions for
either returning 0 or selecting a queue based on the processor ID. This is
generally meant to just reduce the number of functions we have to change
in the future when we have to deal with ndo_select_queue changes.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/lantiq_etop.c   | 10 +---------
 drivers/net/ethernet/ti/netcp_core.c |  9 +--------
 drivers/staging/netlogic/xlr_net.c   |  9 +--------
 include/linux/netdevice.h            |  4 ++++
 net/core/dev.c                       | 14 ++++++++++++++
 net/packet/af_packet.c               |  2 +-
 6 files changed, 22 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index afc810069440..7a637b51c7d2 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -563,14 +563,6 @@ ltq_etop_set_multicast_list(struct net_device *dev)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static u16
-ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb,
-		      void *accel_priv, select_queue_fallback_t fallback)
-{
-	/* we are currently only using the first queue */
-	return 0;
-}
-
 static int
 ltq_etop_init(struct net_device *dev)
 {
@@ -641,7 +633,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = {
 	.ndo_set_mac_address = ltq_etop_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_set_rx_mode = ltq_etop_set_multicast_list,
-	.ndo_select_queue = ltq_etop_select_queue,
+	.ndo_select_queue = dev_pick_tx_zero,
 	.ndo_init = ltq_etop_init,
 	.ndo_tx_timeout = ltq_etop_tx_timeout,
 };
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 6ebf110cd594..a1d335a3c5e4 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1889,13 +1889,6 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
 	return err;
 }
 
-static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      void *accel_priv,
-			      select_queue_fallback_t fallback)
-{
-	return 0;
-}
-
 static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
@@ -1972,7 +1965,7 @@ static const struct net_device_ops netcp_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= netcp_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= netcp_rx_kill_vid,
 	.ndo_tx_timeout		= netcp_ndo_tx_timeout,
-	.ndo_select_queue	= netcp_select_queue,
+	.ndo_select_queue	= dev_pick_tx_zero,
 	.ndo_setup_tc		= netcp_setup_tc,
 };
 
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index e461168313bf..4e6611e4c59b 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -290,13 +290,6 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb,
-				void *accel_priv,
-				select_queue_fallback_t fallback)
-{
-	return (u16)smp_processor_id();
-}
-
 static void xlr_hw_set_mac_addr(struct net_device *ndev)
 {
 	struct xlr_net_priv *priv = netdev_priv(ndev);
@@ -403,7 +396,7 @@ static const struct net_device_ops xlr_netdev_ops = {
 	.ndo_open = xlr_net_open,
 	.ndo_stop = xlr_net_stop,
 	.ndo_start_xmit = xlr_net_start_xmit,
-	.ndo_select_queue = xlr_net_select_queue,
+	.ndo_select_queue = dev_pick_tx_cpu_id,
 	.ndo_set_mac_address = xlr_net_set_mac_addr,
 	.ndo_set_rx_mode = xlr_set_rx_mode,
 	.ndo_get_stats64 = xlr_stats,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fda0bcda7a42..46f4c44ce3e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2567,6 +2567,10 @@ void dev_close(struct net_device *dev);
 void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+		     void *accel_priv, select_queue_fallback_t fallback);
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+		       void *accel_priv, select_queue_fallback_t fallback);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
diff --git a/net/core/dev.c b/net/core/dev.c
index 09a7cc2f3c55..b5e538032d5e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3617,6 +3617,20 @@ get_cpus_map:
 #endif
 }
 
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+		     void *accel_priv, select_queue_fallback_t fallback)
+{
+	return 0;
+}
+EXPORT_SYMBOL(dev_pick_tx_zero);
+
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+		       void *accel_priv, select_queue_fallback_t fallback)
+{
+	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
+}
+EXPORT_SYMBOL(dev_pick_tx_cpu_id);
+
 static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
 			     struct net_device *sb_dev)
 {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 47931ebfaef3..f37d087ae652 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -277,7 +277,7 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
 
 static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
 {
-	return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
+	return dev_pick_tx_cpu_id(dev, skb, NULL, NULL);
 }
 
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
-- 
cgit v1.2.3


From 4f49dec9075aa0277b8c9c657ec31e6361f88724 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:59 -0400
Subject: net: allow ndo_select_queue to pass netdev

This patch makes it so that instead of passing a void pointer as the
accel_priv we instead pass a net_device pointer as sb_dev. Making this
change allows us to pass the subordinate device through to the fallback
function eventually so that we can keep the actual code in the
ndo_select_queue call as focused on possible on the exception cases.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/infiniband/hw/hfi1/vnic_main.c            |  2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c |  4 ++--
 drivers/net/bonding/bond_main.c                   |  3 ++-
 drivers/net/ethernet/amazon/ena/ena_netdev.c      |  3 ++-
 drivers/net/ethernet/broadcom/bcmsysport.c        |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |  3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c   |  3 ++-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c     |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  7 ++++---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c        |  3 ++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h      |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   |  3 ++-
 drivers/net/ethernet/renesas/ravb_main.c          |  3 ++-
 drivers/net/ethernet/sun/ldmvsw.c                 |  3 ++-
 drivers/net/ethernet/sun/sunvnet.c                |  3 ++-
 drivers/net/hyperv/netvsc_drv.c                   |  4 ++--
 drivers/net/net_failover.c                        |  5 +++--
 drivers/net/team/team.c                           |  3 ++-
 drivers/net/tun.c                                 |  3 ++-
 drivers/net/wireless/marvell/mwifiex/main.c       |  3 ++-
 drivers/net/xen-netback/interface.c               |  2 +-
 drivers/net/xen-netfront.c                        |  3 ++-
 drivers/staging/rtl8188eu/os_dep/os_intfs.c       |  3 ++-
 drivers/staging/rtl8723bs/os_dep/os_intfs.c       |  7 +++----
 include/linux/netdevice.h                         | 11 +++++++----
 net/core/dev.c                                    |  6 ++++--
 net/mac80211/iface.c                              |  4 ++--
 29 files changed, 66 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 5d65582fe4d9..616fc9b6fad8 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -423,7 +423,7 @@ tx_finish:
 
 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
 				  struct sk_buff *skb,
-				  void *accel_priv,
+				  struct net_device *sb_dev,
 				  select_queue_fallback_t fallback)
 {
 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index 0c8aec62a425..61558788b3fa 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -95,7 +95,7 @@ static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
 }
 
 static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
-				 void *accel_priv,
+				 struct net_device *sb_dev,
 				 select_queue_fallback_t fallback)
 {
 	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
@@ -107,7 +107,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
 	mdata->entropy = opa_vnic_calc_entropy(skb);
 	mdata->vl = opa_vnic_get_vl(adapter, skb);
 	rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
-					       accel_priv, fallback);
+					       sb_dev, fallback);
 	skb_pull(skb, sizeof(*mdata));
 	return rc;
 }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 63e3844c5bec..9a2ea3c1f949 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4094,7 +4094,8 @@ static inline int bond_slave_override(struct bonding *bond,
 
 
 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/* This helper function exists to help dev_pick_tx get the correct
 	 * destination queue.  Using a helper function skips a call to
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index f2af87d70594..e3befb1f9204 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2213,7 +2213,8 @@ static void ena_netpoll(struct net_device *netdev)
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	u16 qid;
 	/* we suspect that this is good for in--kernel network services that
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index d5fca2e5a9bc..32f548e6431d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2107,7 +2107,7 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = {
 };
 
 static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
-				    void *accel_priv,
+				    struct net_device *sb_dev,
 				    select_queue_fallback_t fallback)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index af7b5a4d8ba0..e4e1cf907ac6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1910,7 +1910,8 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
 }
 
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index a8ce5c55bbb0..0e508e5defce 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -497,7 +497,8 @@ int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 
 static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 					struct bnx2x_fastpath *fp,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0d91716a2566..5dc5e5604f05 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -930,7 +930,8 @@ freeout:
 }
 
 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	int txq;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ef9ef703d13a..ff7a74ec8f11 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2022,7 +2022,8 @@ static void hns_nic_get_stats64(struct net_device *ndev,
 
 static u16
 hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
-		     void *accel_priv, select_queue_fallback_t fallback)
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback)
 {
 	struct ethhdr *eth_hdr = (struct ethhdr *)skb->data;
 	struct hns_nic_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index abb176df2e7f..8c7a68c57afa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8210,15 +8210,16 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 
 #ifdef IXGBE_FCOE
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      void *accel_priv, select_queue_fallback_t fallback)
+			      struct net_device *sb_dev,
+			      select_queue_fallback_t fallback)
 {
 	struct ixgbe_adapter *adapter;
 	struct ixgbe_ring_feature *f;
 	int txq;
 
-	if (accel_priv) {
+	if (sb_dev) {
 		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-		struct net_device *vdev = accel_priv;
+		struct net_device *vdev = sb_dev;
 
 		txq = vdev->tc_to_txq[tc].offset;
 		txq += reciprocal_scale(skb_get_hash(skb),
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 0227786308af..df2996618cd1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -688,7 +688,8 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 }
 
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv, select_queue_fallback_t fallback)
+			 struct net_device *sb_dev,
+			 select_queue_fallback_t fallback)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u16 rings_p_up = priv->num_tx_rings_p_up;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ace6545f82e6..c3228b89df46 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -699,7 +699,8 @@ void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv, select_queue_fallback_t fallback);
+			 struct net_device *sb_dev,
+			 select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e2b7586ed7a0..e1b237ccdf56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -865,7 +865,8 @@ struct mlx5e_profile {
 void mlx5e_build_ptys2ethtool_map(void);
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5e_tx_wqe *wqe, u16 pi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index f0739dae7b56..dfcc3710b65f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -111,7 +111,8 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
 #endif
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int channel_ix = fallback(dev, skb);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 68f122140966..4a7f54c8e7aa 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1656,7 +1656,8 @@ drop:
 }
 
 static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/* If skb needs TX timestamp, it is handled in network control queue */
 	return (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ? RAVB_NC :
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index a5dd627fe2f9..d42f47f6c632 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -101,7 +101,8 @@ static struct vnet_port *vsw_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct vnet_port *port = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index a94f50442613..12539b357a78 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -234,7 +234,8 @@ static struct vnet_port *vnet_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	struct vnet *vp = netdev_priv(dev);
 	struct vnet_port *port = __tx_port_find(vp, skb);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd1d6e115145..98c0107d6ca1 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -329,7 +329,7 @@ static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
 }
 
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			       void *accel_priv,
+			       struct net_device *sb_dev,
 			       select_queue_fallback_t fallback)
 {
 	struct net_device_context *ndc = netdev_priv(ndev);
@@ -343,7 +343,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 
 		if (vf_ops->ndo_select_queue)
 			txq = vf_ops->ndo_select_queue(vf_netdev, skb,
-						       accel_priv, fallback);
+						       sb_dev, fallback);
 		else
 			txq = fallback(vf_netdev, skb);
 
diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 4f390fa557e4..78b549698b7b 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -115,7 +115,8 @@ static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb,
 }
 
 static u16 net_failover_select_queue(struct net_device *dev,
-				     struct sk_buff *skb, void *accel_priv,
+				     struct sk_buff *skb,
+				     struct net_device *sb_dev,
 				     select_queue_fallback_t fallback)
 {
 	struct net_failover_info *nfo_info = netdev_priv(dev);
@@ -128,7 +129,7 @@ static u16 net_failover_select_queue(struct net_device *dev,
 
 		if (ops->ndo_select_queue)
 			txq = ops->ndo_select_queue(primary_dev, skb,
-						    accel_priv, fallback);
+						    sb_dev, fallback);
 		else
 			txq = fallback(primary_dev, skb);
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index b070959737ff..3a95eaae0c98 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1707,7 +1707,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/*
 	 * This helper function exists to help dev_pick_tx get the correct
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a192a017cc68..76f0f4131197 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -607,7 +607,8 @@ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 }
 
 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	u16 ret;
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 510f6b8e717d..fa3e8ddfe9a9 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1279,7 +1279,8 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev)
 
 static u16
 mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb,
-				void *accel_priv, select_queue_fallback_t fallback)
+				struct net_device *sb_dev,
+				select_queue_fallback_t fallback)
 {
 	skb->priority = cfg80211_classify8021d(skb, NULL);
 	return mwifiex_1d_to_wmm_queue[skb->priority];
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 78ebe494fef0..19c4c585f472 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -148,7 +148,7 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
 }
 
 static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
-			       void *accel_priv,
+			       struct net_device *sb_dev,
 			       select_queue_fallback_t fallback)
 {
 	struct xenvif *vif = netdev_priv(dev);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a57daecf1d57..d67cd379d156 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -545,7 +545,8 @@ static int xennet_count_skb_slots(struct sk_buff *skb)
 }
 
 static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
-			       void *accel_priv, select_queue_fallback_t fallback)
+			       struct net_device *sb_dev,
+			       select_queue_fallback_t fallback)
 {
 	unsigned int num_queues = dev->real_num_tx_queues;
 	u32 hash;
diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
index add1ba00f3e9..38e85c8a85c8 100644
--- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
@@ -253,7 +253,8 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
index ace68f023b49..181642358e3f 100644
--- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
@@ -403,10 +403,9 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 
-static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb
-				, void *accel_priv
-				, select_queue_fallback_t fallback
-)
+static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 46f4c44ce3e4..bbf062c1ca8a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -957,7 +957,8 @@ struct dev_ifalias {
  *	those the driver believes to be appropriate.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
- *                         void *accel_priv, select_queue_fallback_t fallback);
+ *                         struct net_device *sb_dev,
+ *                         select_queue_fallback_t fallback);
  *	Called to decide which queue to use when device supports multiple
  *	transmit queues.
  *
@@ -1229,7 +1230,7 @@ struct net_device_ops {
 						      netdev_features_t features);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
-						    void *accel_priv,
+						    struct net_device *sb_dev,
 						    select_queue_fallback_t fallback);
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
@@ -2568,9 +2569,11 @@ void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
-		     void *accel_priv, select_queue_fallback_t fallback);
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback);
 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
diff --git a/net/core/dev.c b/net/core/dev.c
index b5e538032d5e..a051ce27198b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3618,14 +3618,16 @@ get_cpus_map:
 }
 
 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
-		     void *accel_priv, select_queue_fallback_t fallback)
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback)
 {
 	return 0;
 }
 EXPORT_SYMBOL(dev_pick_tx_zero);
 
 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
 }
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 555e389b7dfa..5e6cf2cee965 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1130,7 +1130,7 @@ static void ieee80211_uninit(struct net_device *dev)
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
 					 struct sk_buff *skb,
-					 void *accel_priv,
+					 struct net_device *sb_dev,
 					 select_queue_fallback_t fallback)
 {
 	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
@@ -1176,7 +1176,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
 					  struct sk_buff *skb,
-					  void *accel_priv,
+					  struct net_device *sb_dev,
 					  select_queue_fallback_t fallback)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-- 
cgit v1.2.3


From 8ec56fc3c5ee6f9700adac190e9ce5b8859a58b6 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:20:04 -0400
Subject: net: allow fallback function to pass netdev

For most of these calls we can just pass NULL through to the fallback
function as the sb_dev. The only cases where we cannot are the cases where
we might be dealing with either an upper device or a driver that would
have configured things to support an sb_dev itself.

The only driver that has any significant change in this patch set should be
ixgbe as we can drop the redundant functionality that existed in both the
ndo_select_queue function and the fallback function that was passed through
to us.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c    |  2 +-
 drivers/net/ethernet/broadcom/bcmsysport.c      |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c   |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |  4 ++--
 drivers/net/ethernet/mellanox/mlx4/en_tx.c      |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c |  2 +-
 drivers/net/hyperv/netvsc_drv.c                 |  2 +-
 drivers/net/net_failover.c                      |  2 +-
 drivers/net/xen-netback/interface.c             |  2 +-
 include/linux/netdevice.h                       |  3 ++-
 net/core/dev.c                                  | 12 +++---------
 net/packet/af_packet.c                          |  7 ++++---
 14 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index e3befb1f9204..c673ac2df65b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2224,7 +2224,7 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (skb_rx_queue_recorded(skb))
 		qid = skb_get_rx_queue(skb);
 	else
-		qid = fallback(dev, skb);
+		qid = fallback(dev, skb, NULL);
 
 	return qid;
 }
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 32f548e6431d..eb890c4b3b2d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2116,7 +2116,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
 	unsigned int q, port;
 
 	if (!netdev_uses_dsa(dev))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
 	/* DSA tagging layer will have configured the correct queue */
 	q = BRCM_TAG_GET_QUEUE(queue);
@@ -2124,7 +2124,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
 	tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues];
 
 	if (unlikely(!tx_ring))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
 	return tx_ring->index;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index e4e1cf907ac6..5a727d4729da 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1933,7 +1933,8 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	/* select a non-FCoE queue */
-	return fallback(dev, skb) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
+	return fallback(dev, skb, NULL) %
+	       (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 5dc5e5604f05..40cf8dc9f163 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -973,7 +973,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 		return txq;
 	}
 
-	return fallback(dev, skb) % dev->real_num_tx_queues;
+	return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 }
 
 static int closest_timer(const struct sge *s, int time)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ff7a74ec8f11..948b3e0d18f4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2033,7 +2033,7 @@ hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
 	    is_multicast_ether_addr(eth_hdr->h_dest))
 		return 0;
 	else
-		return fallback(ndev, skb);
+		return fallback(ndev, skb, NULL);
 }
 
 static const struct net_device_ops hns_nic_netdev_ops = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8c7a68c57afa..bd6d9ea27b4b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8237,11 +8237,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 	case htons(ETH_P_FIP):
 		adapter = netdev_priv(dev);
 
-		if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
+		if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
 			break;
 		/* fall through */
 	default:
-		return fallback(dev, skb);
+		return fallback(dev, skb, sb_dev);
 	}
 
 	f = &adapter->ring_feature[RING_F_FCOE];
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index df2996618cd1..1857ee0f0871 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -695,9 +695,9 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 	u16 rings_p_up = priv->num_tx_rings_p_up;
 
 	if (netdev_get_num_tc(dev))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
-	return fallback(dev, skb) % rings_p_up;
+	return fallback(dev, skb, NULL) % rings_p_up;
 }
 
 static void mlx4_bf_copy(void __iomem *dst, const void *src,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index dfcc3710b65f..9106ea45e3cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -115,7 +115,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       select_queue_fallback_t fallback)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	int channel_ix = fallback(dev, skb);
+	int channel_ix = fallback(dev, skb, NULL);
 	u16 num_channels;
 	int up = 0;
 
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 98c0107d6ca1..cf4f40a04194 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -345,7 +345,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 			txq = vf_ops->ndo_select_queue(vf_netdev, skb,
 						       sb_dev, fallback);
 		else
-			txq = fallback(vf_netdev, skb);
+			txq = fallback(vf_netdev, skb, NULL);
 
 		/* Record the queue selected by VF so that it can be
 		 * used for common case where VF has more queues than
diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 78b549698b7b..d00d42c845b7 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -131,7 +131,7 @@ static u16 net_failover_select_queue(struct net_device *dev,
 			txq = ops->ndo_select_queue(primary_dev, skb,
 						    sb_dev, fallback);
 		else
-			txq = fallback(primary_dev, skb);
+			txq = fallback(primary_dev, skb, NULL);
 
 		qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
 
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 19c4c585f472..92274c237200 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -155,7 +155,7 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
 	unsigned int size = vif->hash.size;
 
 	if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
-		return fallback(dev, skb) % dev->real_num_tx_queues;
+		return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 
 	xenvif_set_skb_hash(vif, skb);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bbf062c1ca8a..2daf2fa6554f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -793,7 +793,8 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 }
 
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
-				       struct sk_buff *skb);
+				       struct sk_buff *skb,
+				       struct net_device *sb_dev);
 
 enum tc_setup_type {
 	TC_SETUP_QDISC_MQPRIO,
diff --git a/net/core/dev.c b/net/core/dev.c
index a051ce27198b..e18d81837a6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3633,8 +3633,8 @@ u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(dev_pick_tx_cpu_id);
 
-static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
-			     struct net_device *sb_dev)
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+			    struct net_device *sb_dev)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
@@ -3659,12 +3659,6 @@ static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
 	return queue_index;
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev,
-			    struct sk_buff *skb)
-{
-	return ___netdev_pick_tx(dev, skb, NULL);
-}
-
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
 				    struct net_device *sb_dev)
@@ -3685,7 +3679,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 			queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
 							    __netdev_pick_tx);
 		else
-			queue_index = ___netdev_pick_tx(dev, skb, sb_dev);
+			queue_index = __netdev_pick_tx(dev, skb, sb_dev);
 
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f37d087ae652..00189a3b07f2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -275,9 +275,10 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
 	return po->xmit == packet_direct_xmit;
 }
 
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
+				  struct net_device *sb_dev)
 {
-	return dev_pick_tx_cpu_id(dev, skb, NULL, NULL);
+	return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
 }
 
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
@@ -291,7 +292,7 @@ static u16 packet_pick_tx_queue(struct sk_buff *skb)
 						    __packet_pick_tx_queue);
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	} else {
-		queue_index = __packet_pick_tx_queue(dev, skb);
+		queue_index = __packet_pick_tx_queue(dev, skb, NULL);
 	}
 
 	return queue_index;
-- 
cgit v1.2.3


From 9f17dbf04ddf55ae48f5bbafea4c4920ea943215 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 9 Jul 2018 18:10:02 +0100
Subject: netfilter: fix use-after-free in NF_HOOK_LIST

nf_hook() can free the skb, so we need to remove it from the list before
 calling, and add passed skbs to a sublist afterwards.

Fixes: 17266ee93984 ("net: ipv4: listified version of ip_rcv")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 5a5e0a2ab2a3..23b48de8c2e2 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -294,12 +294,16 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct sk_buff *skb, *next;
+	struct list_head sublist;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
-		int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
-		if (ret != 1)
-			list_del(&skb->list);
+		list_del(&skb->list);
+		if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
+			list_add_tail(&skb->list, &sublist);
 	}
+	/* Put passed packets back on main list */
+	list_splice(&sublist, head);
 }
 
 /* Call setsockopt() */
-- 
cgit v1.2.3


From 7f35e63dbfcb627bd30bac45702ffdf1ddde1516 Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Mon, 9 Jul 2018 22:18:38 +0530
Subject: bus: ti-sysc: Add support for using ti-sysc for MCAN on dra76x

The dra76x MCAN generic interconnect module has a its own
format for the bits in the control registers.

Therefore add a new module type, new regbits and new capabilities
specific to the MCAN module.

Acked-by: Rob Herring <robh@kernel.org>
CC: Tony Lindgren <tony@atomide.com>
Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 Documentation/devicetree/bindings/bus/ti-sysc.txt |  1 +
 drivers/bus/ti-sysc.c                             | 18 ++++++++++++++++++
 include/dt-bindings/bus/ti-sysc.h                 |  2 ++
 include/linux/platform_data/ti-sysc.h             |  1 +
 4 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.txt b/Documentation/devicetree/bindings/bus/ti-sysc.txt
index d8ed5b780ed9..91dc2333af01 100644
--- a/Documentation/devicetree/bindings/bus/ti-sysc.txt
+++ b/Documentation/devicetree/bindings/bus/ti-sysc.txt
@@ -36,6 +36,7 @@ Required standard properties:
 		"ti,sysc-omap-aes"
 		"ti,sysc-mcasp"
 		"ti,sysc-usb-host-fs"
+		"ti,sysc-dra7-mcan"
 
 - reg		shall have register areas implemented for the interconnect
 		target module in question such as revision, sysc and syss
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 80d60f43db56..c9db5369e2ec 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1552,6 +1552,23 @@ static const struct sysc_capabilities sysc_omap4_usb_host_fs = {
 	.regbits = &sysc_regbits_omap4_usb_host_fs,
 };
 
+static const struct sysc_regbits sysc_regbits_dra7_mcan = {
+	.dmadisable_shift = -ENODEV,
+	.midle_shift = -ENODEV,
+	.sidle_shift = -ENODEV,
+	.clkact_shift = -ENODEV,
+	.enwkup_shift = 4,
+	.srst_shift = 0,
+	.emufree_shift = -ENODEV,
+	.autoidle_shift = -ENODEV,
+};
+
+static const struct sysc_capabilities sysc_dra7_mcan = {
+	.type = TI_SYSC_DRA7_MCAN,
+	.sysc_mask = SYSC_DRA7_MCAN_ENAWAKEUP | SYSC_OMAP4_SOFTRESET,
+	.regbits = &sysc_regbits_dra7_mcan,
+};
+
 static int sysc_init_pdata(struct sysc *ddata)
 {
 	struct ti_sysc_platform_data *pdata = dev_get_platdata(ddata->dev);
@@ -1743,6 +1760,7 @@ static const struct of_device_id sysc_match[] = {
 	{ .compatible = "ti,sysc-mcasp", .data = &sysc_omap4_mcasp, },
 	{ .compatible = "ti,sysc-usb-host-fs",
 	  .data = &sysc_omap4_usb_host_fs, },
+	{ .compatible = "ti,sysc-dra7-mcan", .data = &sysc_dra7_mcan, },
 	{  },
 };
 MODULE_DEVICE_TABLE(of, sysc_match);
diff --git a/include/dt-bindings/bus/ti-sysc.h b/include/dt-bindings/bus/ti-sysc.h
index 2c005376ac0e..7138384e2ef9 100644
--- a/include/dt-bindings/bus/ti-sysc.h
+++ b/include/dt-bindings/bus/ti-sysc.h
@@ -15,6 +15,8 @@
 /* SmartReflex sysc found on 36xx and later */
 #define SYSC_OMAP3_SR_ENAWAKEUP		(1 << 26)
 
+#define SYSC_DRA7_MCAN_ENAWAKEUP	(1 << 4)
+
 /* SYSCONFIG STANDBYMODE/MIDLEMODE/SIDLEMODE supported by hardware */
 #define SYSC_IDLE_FORCE			0
 #define SYSC_IDLE_NO			1
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 990aad477458..2efa3470a451 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -14,6 +14,7 @@ enum ti_sysc_module_type {
 	TI_SYSC_OMAP4_SR,
 	TI_SYSC_OMAP4_MCASP,
 	TI_SYSC_OMAP4_USB_HOST_FS,
+	TI_SYSC_DRA7_MCAN,
 };
 
 struct ti_sysc_cookie {
-- 
cgit v1.2.3


From 74655749a58405e259eaaba66bfc391fdbe1e34e Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Mon, 9 Jul 2018 13:03:16 +0530
Subject: ARM: OMAP2+: sleep33/43xx: Make sleep actions configurable

Add an argument to the sleep33xx and sleep43xx code to allow us to set
flags to determine which portions of the code get called in order to use
the same code for multiple power saving modes. This patch allows us to
decide whether or not we flush and disable caches, save EMIF context,
put the memory into self refresh and disable the EMIF, and/or invoke
the wkup_m3 when entering into WFI.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/pm33xx-core.c    | 10 ++++---
 arch/arm/mach-omap2/sleep33xx.S      | 47 ++++++++++++++++++++++++++++++
 arch/arm/mach-omap2/sleep43xx.S      | 56 +++++++++++++++++++++++++++++++++---
 drivers/soc/ti/pm33xx.c              | 15 +++++++++-
 include/linux/platform_data/pm33xx.h | 26 ++++++++++++++++-
 5 files changed, 144 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/pm33xx-core.c b/arch/arm/mach-omap2/pm33xx-core.c
index 9b3755a2e2ec..e363b9717fa5 100644
--- a/arch/arm/mach-omap2/pm33xx-core.c
+++ b/arch/arm/mach-omap2/pm33xx-core.c
@@ -106,12 +106,13 @@ static void amx3_post_suspend_common(void)
 		pr_err("PM: GFX domain did not transition: %x\n", status);
 }
 
-static int am33xx_suspend(unsigned int state, int (*fn)(unsigned long))
+static int am33xx_suspend(unsigned int state, int (*fn)(unsigned long),
+			  unsigned long args)
 {
 	int ret = 0;
 
 	amx3_pre_suspend_common();
-	ret = cpu_suspend(0, fn);
+	ret = cpu_suspend(args, fn);
 	amx3_post_suspend_common();
 
 	/*
@@ -128,13 +129,14 @@ static int am33xx_suspend(unsigned int state, int (*fn)(unsigned long))
 	return ret;
 }
 
-static int am43xx_suspend(unsigned int state, int (*fn)(unsigned long))
+static int am43xx_suspend(unsigned int state, int (*fn)(unsigned long),
+			  unsigned long args)
 {
 	int ret = 0;
 
 	amx3_pre_suspend_common();
 	scu_power_mode(scu_base, SCU_PM_POWEROFF);
-	ret = cpu_suspend(0, fn);
+	ret = cpu_suspend(args, fn);
 	scu_power_mode(scu_base, SCU_PM_NORMAL);
 	amx3_post_suspend_common();
 
diff --git a/arch/arm/mach-omap2/sleep33xx.S b/arch/arm/mach-omap2/sleep33xx.S
index 322b3bb868b4..8d0d53b671f9 100644
--- a/arch/arm/mach-omap2/sleep33xx.S
+++ b/arch/arm/mach-omap2/sleep33xx.S
@@ -8,6 +8,7 @@
 
 #include <generated/ti-pm-asm-offsets.h>
 #include <linux/linkage.h>
+#include <linux/platform_data/pm33xx.h>
 #include <linux/ti-emif-sram.h>
 #include <asm/assembler.h>
 #include <asm/memory.h>
@@ -19,12 +20,25 @@
 #define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE			0x0003
 #define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE			0x0002
 
+/* replicated define because linux/bitops.h cannot be included in assembly */
+#define BIT(nr)			(1 << (nr))
+
 	.arm
 	.align 3
 
 ENTRY(am33xx_do_wfi)
 	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
 
+	/* Save wfi_flags arg to data space */
+	mov	r4, r0
+	adr	r3, am33xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	str	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
+	/* Only flush cache is we know we are losing MPU context */
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_flush
+
 	/*
 	 * Flush all data from the L1 and L2 data cache before disabling
 	 * SCTLR.C bit.
@@ -48,14 +62,33 @@ ENTRY(am33xx_do_wfi)
 	ldr	r1, kernel_flush
 	blx	r1
 
+	adr	r3, am33xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
+cache_skip_flush:
+	/* Check if we want self refresh */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_enter_sr
+
 	adr	r9, am33xx_emif_sram_table
 
 	ldr	r3, [r9, #EMIF_PM_ENTER_SR_OFFSET]
 	blx	r3
 
+emif_skip_enter_sr:
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SAVE_EMIF
+	beq	emif_skip_save
+
 	ldr	r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET]
 	blx	r3
 
+emif_skip_save:
+	/* Only can disable EMIF if we have entered self refresh */
+	tst     r4, #WFI_FLAG_SELF_REFRESH
+	beq     emif_skip_disable
+
 	/* Disable EMIF */
 	ldr     r1, virt_emif_clkctrl
 	ldr     r2, [r1]
@@ -69,6 +102,10 @@ wait_emif_disable:
 	cmp	r2, r3
 	bne	wait_emif_disable
 
+emif_skip_disable:
+	tst	r4, #WFI_FLAG_WAKE_M3
+	beq	wkup_m3_skip
+
 	/*
 	 * For the MPU WFI to be registered as an interrupt
 	 * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set
@@ -79,6 +116,7 @@ wait_emif_disable:
 	bic	r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE
 	str	r2, [r1]
 
+wkup_m3_skip:
 	/*
 	 * Execute an ISB instruction to ensure that all of the
 	 * CP15 register changes have been committed.
@@ -132,10 +170,18 @@ wait_emif_enable:
 	cmp	r2, r3
 	bne	wait_emif_enable
 
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_exit_sr_abt
 
+	adr	r9, am33xx_emif_sram_table
 	ldr	r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
 	blx	r1
 
+emif_skip_exit_sr_abt:
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_restore
+
 	/*
 	 * Set SCTLR.C bit to allow data cache allocation
 	 */
@@ -144,6 +190,7 @@ wait_emif_enable:
 	mcr	p15, 0, r0, c1, c0, 0
 	isb
 
+cache_skip_restore:
 	/* Let the suspend code know about the abort */
 	mov	r0, #1
 	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
diff --git a/arch/arm/mach-omap2/sleep43xx.S b/arch/arm/mach-omap2/sleep43xx.S
index 8903814a6677..cd7e95f48a73 100644
--- a/arch/arm/mach-omap2/sleep43xx.S
+++ b/arch/arm/mach-omap2/sleep43xx.S
@@ -9,7 +9,7 @@
 #include <generated/ti-pm-asm-offsets.h>
 #include <linux/linkage.h>
 #include <linux/ti-emif-sram.h>
-
+#include <linux/platform_data/pm33xx.h>
 #include <asm/assembler.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/memory.h>
@@ -22,6 +22,9 @@
 #include "prm33xx.h"
 #include "prcm43xx.h"
 
+/* replicated define because linux/bitops.h cannot be included in assembly */
+#define BIT(nr)			(1 << (nr))
+
 #define AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED		0x00030000
 #define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE		0x0003
 #define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE		0x0002
@@ -51,6 +54,12 @@
 ENTRY(am43xx_do_wfi)
 	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
 
+	/* Save wfi_flags arg to data space */
+	mov	r4, r0
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	str	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
 #ifdef CONFIG_CACHE_L2X0
 	/* Retrieve l2 cache virt address BEFORE we shut off EMIF */
 	ldr	r1, get_l2cache_base
@@ -58,6 +67,10 @@ ENTRY(am43xx_do_wfi)
 	mov	r8, r0
 #endif
 
+	/* Only flush cache is we know we are losing MPU context */
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_flush
+
 	/*
 	 * Flush all data from the L1 and L2 data cache before disabling
 	 * SCTLR.C bit.
@@ -128,13 +141,33 @@ sync:
 	bne	sync
 #endif
 
+	/* Restore wfi_flags */
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
+cache_skip_flush:
+	/* Check if we want self refresh */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_enter_sr
+
 	adr     r9, am43xx_emif_sram_table
 
 	ldr     r3, [r9, #EMIF_PM_ENTER_SR_OFFSET]
 	blx     r3
 
+emif_skip_enter_sr:
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SAVE_EMIF
+	beq	emif_skip_save
+
 	ldr     r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET]
-	blx     r3
+	blx	r3
+
+emif_skip_save:
+	/* Only can disable EMIF if we have entered self refresh */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_disable
 
 	/* Disable EMIF */
 	ldr	r1, am43xx_virt_emif_clkctrl
@@ -148,6 +181,10 @@ wait_emif_disable:
 	cmp	r2, r3
 	bne	wait_emif_disable
 
+emif_skip_disable:
+	tst	r4, #WFI_FLAG_WAKE_M3
+	beq	wkup_m3_skip
+
 	/*
 	 * For the MPU WFI to be registered as an interrupt
 	 * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set
@@ -165,6 +202,7 @@ wait_emif_disable:
 	mov	r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_SW_SLEEP
 	str	r2, [r1]
 
+wkup_m3_skip:
 	/*
 	 * Execute a barrier instruction to ensure that all cache,
 	 * TLB and branch predictor maintenance operations issued
@@ -218,6 +256,9 @@ wait_emif_enable:
 	cmp	r2, r3
 	bne	wait_emif_enable
 
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_restore
+
 	/*
 	 * Set SCTLR.C bit to allow data cache allocation
 	 */
@@ -226,9 +267,16 @@ wait_emif_enable:
 	mcr	p15, 0, r0, c1, c0, 0
 	isb
 
-	ldr     r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
-	blx     r1
+cache_skip_restore:
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_exit_sr_abt
+
+	adr	r9, am43xx_emif_sram_table
+	ldr	r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
+	blx	r1
 
+emif_skip_exit_sr_abt:
 	/* Let the suspend code know about the abort */
 	mov	r0, #1
 	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c
index 652739c7f718..0011c8f5ecd7 100644
--- a/drivers/soc/ti/pm33xx.c
+++ b/drivers/soc/ti/pm33xx.c
@@ -41,6 +41,8 @@ static struct am33xx_pm_sram_addr *pm_sram;
 static struct device *pm33xx_dev;
 static struct wkup_m3_ipc *m3_ipc;
 
+static unsigned long suspend_wfi_flags;
+
 static u32 sram_suspend_address(unsigned long addr)
 {
 	return ((unsigned long)am33xx_do_wfi_sram +
@@ -53,7 +55,7 @@ static int am33xx_pm_suspend(suspend_state_t suspend_state)
 	int i, ret = 0;
 
 	ret = pm_ops->soc_suspend((unsigned long)suspend_state,
-				  am33xx_do_wfi_sram);
+				  am33xx_do_wfi_sram, suspend_wfi_flags);
 
 	if (ret) {
 		dev_err(pm33xx_dev, "PM: Kernel suspend failure\n");
@@ -310,6 +312,17 @@ static int am33xx_pm_probe(struct platform_device *pdev)
 	suspend_set_ops(&am33xx_pm_ops);
 #endif /* CONFIG_SUSPEND */
 
+	/*
+	 * For a system suspend we must flush the caches, we want
+	 * the DDR in self-refresh, we want to save the context
+	 * of the EMIF, and we want the wkup_m3 to handle low-power
+	 * transition.
+	 */
+	suspend_wfi_flags |= WFI_FLAG_FLUSH_CACHE;
+	suspend_wfi_flags |= WFI_FLAG_SELF_REFRESH;
+	suspend_wfi_flags |= WFI_FLAG_SAVE_EMIF;
+	suspend_wfi_flags |= WFI_FLAG_WAKE_M3;
+
 	ret = pm_ops->init();
 	if (ret) {
 		dev_err(dev, "Unable to call core pm init!\n");
diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h
index f9bed2a0af9d..d231265c135c 100644
--- a/include/linux/platform_data/pm33xx.h
+++ b/include/linux/platform_data/pm33xx.h
@@ -12,6 +12,29 @@
 #include <linux/kbuild.h>
 #include <linux/types.h>
 
+/*
+ * WFI Flags for sleep code control
+ *
+ * These flags allow PM code to exclude certain operations from happening
+ * in the low level ASM code found in sleep33xx.S and sleep43xx.S
+ *
+ * WFI_FLAG_FLUSH_CACHE: Flush the ARM caches and disable caching. Only
+ *			 needed when MPU will lose context.
+ * WFI_FLAG_SELF_REFRESH: Let EMIF place DDR memory into self-refresh and
+ *			  disable EMIF.
+ * WFI_FLAG_SAVE_EMIF: Save context of all EMIF registers and restore in
+ *		       resume path. Only needed if PER domain loses context
+ *		       and must also have WFI_FLAG_SELF_REFRESH set.
+ * WFI_FLAG_WAKE_M3: Disable MPU clock or clockdomain to cause wkup_m3 to
+ *		     execute when WFI instruction executes.
+ * WFI_FLAG_RTC_ONLY: Configure the RTC to enter RTC+DDR mode.
+ */
+#define WFI_FLAG_FLUSH_CACHE		BIT(0)
+#define WFI_FLAG_SELF_REFRESH		BIT(1)
+#define WFI_FLAG_SAVE_EMIF		BIT(2)
+#define WFI_FLAG_WAKE_M3		BIT(3)
+#define WFI_FLAG_RTC_ONLY		BIT(4)
+
 #ifndef __ASSEMBLER__
 struct am33xx_pm_sram_addr {
 	void (*do_wfi)(void);
@@ -23,7 +46,8 @@ struct am33xx_pm_sram_addr {
 
 struct am33xx_pm_platform_data {
 	int	(*init)(void);
-	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long));
+	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long),
+			       unsigned long args);
 	struct  am33xx_pm_sram_addr *(*get_sram_addrs)(void);
 };
 
-- 
cgit v1.2.3


From 8c5a916f4c8815196cc8a86b9582ca89422aac25 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 9 Jul 2018 13:03:17 +0530
Subject: ARM: OMAP2+: sleep33/43xx: Add RTC-Mode support

Add support for RTC mode to low level suspend code. This includes
providing the rtc base address for the assembly code to configuring the
PMIC_PWR_EN line late in suspend to enter RTC+DDR mode.

Note: This patch also fold in left out space parameter for
am33xx_emif_sram_table and am43xx_emif_sram_table

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/pm-asm-offsets.c |  2 ++
 arch/arm/mach-omap2/pm33xx-core.c    | 10 +++++++
 arch/arm/mach-omap2/sleep33xx.S      |  5 ++--
 arch/arm/mach-omap2/sleep43xx.S      | 54 ++++++++++++++++++++++++++++++++++--
 drivers/soc/ti/pm33xx.c              |  1 +
 include/linux/platform_data/pm33xx.h |  3 ++
 6 files changed, 71 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/pm-asm-offsets.c b/arch/arm/mach-omap2/pm-asm-offsets.c
index b9846b19e5e2..d8ae8a85b14b 100644
--- a/arch/arm/mach-omap2/pm-asm-offsets.c
+++ b/arch/arm/mach-omap2/pm-asm-offsets.c
@@ -27,6 +27,8 @@ int main(void)
 	       offsetof(struct am33xx_pm_ro_sram_data, amx3_pm_sram_data_virt));
 	DEFINE(AMX3_PM_RO_SRAM_DATA_PHYS_OFFSET,
 	       offsetof(struct am33xx_pm_ro_sram_data, amx3_pm_sram_data_phys));
+	DEFINE(AMX3_PM_RTC_BASE_VIRT_OFFSET,
+	       offsetof(struct am33xx_pm_ro_sram_data, rtc_base_virt));
 	DEFINE(AMX3_PM_RO_SRAM_DATA_SIZE,
 	       sizeof(struct am33xx_pm_ro_sram_data));
 
diff --git a/arch/arm/mach-omap2/pm33xx-core.c b/arch/arm/mach-omap2/pm33xx-core.c
index e363b9717fa5..f4971e4a86b2 100644
--- a/arch/arm/mach-omap2/pm33xx-core.c
+++ b/arch/arm/mach-omap2/pm33xx-core.c
@@ -26,6 +26,7 @@
 static struct powerdomain *cefuse_pwrdm, *gfx_pwrdm, *per_pwrdm, *mpu_pwrdm;
 static struct clockdomain *gfx_l4ls_clkdm;
 static void __iomem *scu_base;
+static struct omap_hwmod *rtc_oh;
 
 static int __init am43xx_map_scu(void)
 {
@@ -153,16 +154,25 @@ static struct am33xx_pm_sram_addr *amx3_get_sram_addrs(void)
 		return NULL;
 }
 
+void __iomem *am43xx_get_rtc_base_addr(void)
+{
+	rtc_oh = omap_hwmod_lookup("rtc");
+
+	return omap_hwmod_get_mpu_rt_va(rtc_oh);
+}
+
 static struct am33xx_pm_platform_data am33xx_ops = {
 	.init = am33xx_suspend_init,
 	.soc_suspend = am33xx_suspend,
 	.get_sram_addrs = amx3_get_sram_addrs,
+	.get_rtc_base_addr = am43xx_get_rtc_base_addr,
 };
 
 static struct am33xx_pm_platform_data am43xx_ops = {
 	.init = am43xx_suspend_init,
 	.soc_suspend = am43xx_suspend,
 	.get_sram_addrs = amx3_get_sram_addrs,
+	.get_rtc_base_addr = am43xx_get_rtc_base_addr,
 };
 
 static struct am33xx_pm_platform_data *am33xx_pm_get_pdata(void)
diff --git a/arch/arm/mach-omap2/sleep33xx.S b/arch/arm/mach-omap2/sleep33xx.S
index 8d0d53b671f9..47a816468cdb 100644
--- a/arch/arm/mach-omap2/sleep33xx.S
+++ b/arch/arm/mach-omap2/sleep33xx.S
@@ -228,8 +228,6 @@ ENDPROC(am33xx_resume_from_deep_sleep)
  * Local variables
  */
 	.align
-resume_addr:
-	.word	cpu_resume - PAGE_OFFSET + 0x80000000
 kernel_flush:
 	.word   v7_flush_dcache_all
 virt_mpu_clkctrl:
@@ -252,6 +250,9 @@ ENTRY(am33xx_pm_sram)
 	.word am33xx_emif_sram_table
 	.word am33xx_pm_ro_sram_data
 
+resume_addr:
+.word  cpu_resume - PAGE_OFFSET + 0x80000000
+
 .align 3
 ENTRY(am33xx_pm_ro_sram_data)
 	.space AMX3_PM_RO_SRAM_DATA_SIZE
diff --git a/arch/arm/mach-omap2/sleep43xx.S b/arch/arm/mach-omap2/sleep43xx.S
index cd7e95f48a73..5b9343b58fc7 100644
--- a/arch/arm/mach-omap2/sleep43xx.S
+++ b/arch/arm/mach-omap2/sleep43xx.S
@@ -48,6 +48,13 @@
 					AM43XX_CM_PER_EMIF_CLKCTRL_OFFSET)
 #define AM43XX_PRM_EMIF_CTRL_OFFSET			0x0030
 
+#define RTC_SECONDS_REG					0x0
+#define RTC_PMIC_REG					0x98
+#define RTC_PMIC_POWER_EN				BIT(16)
+#define RTC_PMIC_EXT_WAKEUP_STS				BIT(12)
+#define RTC_PMIC_EXT_WAKEUP_POL				BIT(4)
+#define RTC_PMIC_EXT_WAKEUP_EN				BIT(0)
+
 	.arm
 	.align 3
 
@@ -147,6 +154,20 @@ sync:
 	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
 
 cache_skip_flush:
+	/*
+	 * If we are trying to enter RTC+DDR mode we must perform
+	 * a read from the rtc address space to ensure translation
+	 * presence in the TLB to avoid page table walk after DDR
+	 * is unavailable.
+	 */
+	tst	r4, #WFI_FLAG_RTC_ONLY
+	beq	skip_rtc_va_refresh
+
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET]
+	ldr	r0, [r1]
+
+skip_rtc_va_refresh:
 	/* Check if we want self refresh */
 	tst	r4, #WFI_FLAG_SELF_REFRESH
 	beq	emif_skip_enter_sr
@@ -182,6 +203,34 @@ wait_emif_disable:
 	bne	wait_emif_disable
 
 emif_skip_disable:
+	tst	r4, #WFI_FLAG_RTC_ONLY
+	beq	skip_rtc_only
+
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET]
+
+	ldr	r0, [r1, #RTC_PMIC_REG]
+	orr	r0, r0, #RTC_PMIC_POWER_EN
+	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_STS
+	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_EN
+	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_POL
+	str	r0, [r1, #RTC_PMIC_REG]
+	ldr	r0, [r1, #RTC_PMIC_REG]
+	/* Wait for 2 seconds to lose power */
+	mov	r3, #2
+	ldr	r2, [r1, #RTC_SECONDS_REG]
+rtc_loop:
+	ldr	r0, [r1, #RTC_SECONDS_REG]
+	cmp	r0, r2
+	beq	rtc_loop
+	mov	r2, r0
+	subs	r3, r3, #1
+	bne	rtc_loop
+
+	b	re_enable_emif
+
+skip_rtc_only:
+
 	tst	r4, #WFI_FLAG_WAKE_M3
 	beq	wkup_m3_skip
 
@@ -247,6 +296,7 @@ wkup_m3_skip:
 	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
 	str	r2, [r1]
 
+re_enable_emif:
 	/* Re-enable EMIF */
 	ldr	r1, am43xx_virt_emif_clkctrl
 	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
@@ -381,8 +431,6 @@ ENDPROC(am43xx_resume_from_deep_sleep)
  * Local variables
  */
 	.align
-resume_addr:
-	.word	cpu_resume - PAGE_OFFSET + 0x80000000
 kernel_flush:
 	.word   v7_flush_dcache_all
 ddr_start:
@@ -429,6 +477,8 @@ ENTRY(am43xx_pm_sram)
 	.word am43xx_emif_sram_table
 	.word am43xx_pm_ro_sram_data
 
+resume_addr:
+	.word   cpu_resume - PAGE_OFFSET + 0x80000000
 .align 3
 
 ENTRY(am43xx_pm_ro_sram_data)
diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c
index 0011c8f5ecd7..d0dab323651f 100644
--- a/drivers/soc/ti/pm33xx.c
+++ b/drivers/soc/ti/pm33xx.c
@@ -229,6 +229,7 @@ static int am33xx_push_sram_idle(void)
 	ro_sram_data.amx3_pm_sram_data_virt = ocmcram_location_data;
 	ro_sram_data.amx3_pm_sram_data_phys =
 		gen_pool_virt_to_phys(sram_pool_data, ocmcram_location_data);
+	ro_sram_data.rtc_base_virt = pm_ops->get_rtc_base_addr();
 
 	/* Save physical address to calculate resume offset during pm init */
 	am33xx_do_wfi_sram_phys = gen_pool_virt_to_phys(sram_pool,
diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h
index d231265c135c..fbf5ed73c7cc 100644
--- a/include/linux/platform_data/pm33xx.h
+++ b/include/linux/platform_data/pm33xx.h
@@ -42,6 +42,7 @@ struct am33xx_pm_sram_addr {
 	unsigned long *resume_offset;
 	unsigned long *emif_sram_table;
 	unsigned long *ro_sram_data;
+	unsigned long resume_address;
 };
 
 struct am33xx_pm_platform_data {
@@ -49,6 +50,7 @@ struct am33xx_pm_platform_data {
 	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long),
 			       unsigned long args);
 	struct  am33xx_pm_sram_addr *(*get_sram_addrs)(void);
+	void __iomem *(*get_rtc_base_addr)(void);
 };
 
 struct am33xx_pm_sram_data {
@@ -60,6 +62,7 @@ struct am33xx_pm_sram_data {
 struct am33xx_pm_ro_sram_data {
 	u32 amx3_pm_sram_data_virt;
 	u32 amx3_pm_sram_data_phys;
+	void __iomem *rtc_base_virt;
 } __packed __aligned(8);
 
 #endif /* __ASSEMBLER__ */
-- 
cgit v1.2.3


From 7f69ae7fadd9dbebe563bfc28b993a2bc9c8acf9 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 28 Jun 2018 11:57:48 +0200
Subject: ARM: davinci: unduplicate aemif support

All users now register platform devices using the ti-aemif driver.
Remove the handcrafted aemif API.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/Makefile                  |   2 +-
 arch/arm/mach-davinci/aemif.c                   | 218 ------------------------
 include/linux/platform_data/mtd-davinci-aemif.h |   1 -
 3 files changed, 1 insertion(+), 220 deletions(-)
 delete mode 100644 arch/arm/mach-davinci/aemif.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 8725d8bea567..93d271b4d84b 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -6,7 +6,7 @@
 
 # Common objects
 obj-y 					:= time.o serial.o usb.o \
-					   common.o sram.o aemif.o
+					   common.o sram.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
 
diff --git a/arch/arm/mach-davinci/aemif.c b/arch/arm/mach-davinci/aemif.c
deleted file mode 100644
index e4ab3f3a2a1f..000000000000
--- a/arch/arm/mach-davinci/aemif.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * AEMIF support for DaVinci SoCs
- *
- * Copyright (C) 2010 Texas Instruments Incorporated. http://www.ti.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/module.h>
-#include <linux/time.h>
-
-#include <linux/platform_data/mtd-davinci-aemif.h>
-#include <linux/platform_data/mtd-davinci.h>
-
-/* Timing value configuration */
-
-#define TA(x)		((x) << 2)
-#define RHOLD(x)	((x) << 4)
-#define RSTROBE(x)	((x) << 7)
-#define RSETUP(x)	((x) << 13)
-#define WHOLD(x)	((x) << 17)
-#define WSTROBE(x)	((x) << 20)
-#define WSETUP(x)	((x) << 26)
-
-#define TA_MAX		0x3
-#define RHOLD_MAX	0x7
-#define RSTROBE_MAX	0x3f
-#define RSETUP_MAX	0xf
-#define WHOLD_MAX	0x7
-#define WSTROBE_MAX	0x3f
-#define WSETUP_MAX	0xf
-
-#define TIMING_MASK	(TA(TA_MAX) | \
-				RHOLD(RHOLD_MAX) | \
-				RSTROBE(RSTROBE_MAX) |	\
-				RSETUP(RSETUP_MAX) | \
-				WHOLD(WHOLD_MAX) | \
-				WSTROBE(WSTROBE_MAX) | \
-				WSETUP(WSETUP_MAX))
-
-static inline unsigned int davinci_aemif_readl(void __iomem *base, int offset)
-{
-	return readl_relaxed(base + offset);
-}
-
-static inline void davinci_aemif_writel(void __iomem *base,
-					int offset, unsigned long value)
-{
-	writel_relaxed(value, base + offset);
-}
-
-/*
- * aemif_calc_rate - calculate timing data.
- * @wanted: The cycle time needed in nanoseconds.
- * @clk: The input clock rate in kHz.
- * @max: The maximum divider value that can be programmed.
- *
- * On success, returns the calculated timing value minus 1 for easy
- * programming into AEMIF timing registers, else negative errno.
- */
-static int aemif_calc_rate(int wanted, unsigned long clk, int max)
-{
-	int result;
-
-	result = DIV_ROUND_UP((wanted * clk), NSEC_PER_MSEC) - 1;
-
-	pr_debug("%s: result %d from %ld, %d\n", __func__, result, clk, wanted);
-
-	/* It is generally OK to have a more relaxed timing than requested... */
-	if (result < 0)
-		result = 0;
-
-	/* ... But configuring tighter timings is not an option. */
-	else if (result > max)
-		result = -EINVAL;
-
-	return result;
-}
-
-/**
- * davinci_aemif_setup_timing - setup timing values for a given AEMIF interface
- * @t: timing values to be progammed
- * @base: The virtual base address of the AEMIF interface
- * @cs: chip-select to program the timing values for
- * @clkrate: the AEMIF clkrate
- *
- * This function programs the given timing values (in real clock) into the
- * AEMIF registers taking the AEMIF clock into account.
- *
- * This function does not use any locking while programming the AEMIF
- * because it is expected that there is only one user of a given
- * chip-select.
- *
- * Returns 0 on success, else negative errno.
- */
-static int davinci_aemif_setup_timing(struct davinci_aemif_timing *t,
-					void __iomem *base, unsigned cs,
-					unsigned long clkrate)
-{
-	unsigned set, val;
-	int ta, rhold, rstrobe, rsetup, whold, wstrobe, wsetup;
-	unsigned offset = A1CR_OFFSET + cs * 4;
-
-	if (!t)
-		return 0;	/* Nothing to do */
-
-	clkrate /= 1000;	/* turn clock into kHz for ease of use */
-
-	ta	= aemif_calc_rate(t->ta, clkrate, TA_MAX);
-	rhold	= aemif_calc_rate(t->rhold, clkrate, RHOLD_MAX);
-	rstrobe	= aemif_calc_rate(t->rstrobe, clkrate, RSTROBE_MAX);
-	rsetup	= aemif_calc_rate(t->rsetup, clkrate, RSETUP_MAX);
-	whold	= aemif_calc_rate(t->whold, clkrate, WHOLD_MAX);
-	wstrobe	= aemif_calc_rate(t->wstrobe, clkrate, WSTROBE_MAX);
-	wsetup	= aemif_calc_rate(t->wsetup, clkrate, WSETUP_MAX);
-
-	if (ta < 0 || rhold < 0 || rstrobe < 0 || rsetup < 0 ||
-			whold < 0 || wstrobe < 0 || wsetup < 0) {
-		pr_err("%s: cannot get suitable timings\n", __func__);
-		return -EINVAL;
-	}
-
-	set = TA(ta) | RHOLD(rhold) | RSTROBE(rstrobe) | RSETUP(rsetup) |
-		WHOLD(whold) | WSTROBE(wstrobe) | WSETUP(wsetup);
-
-	val = __raw_readl(base + offset);
-	val &= ~TIMING_MASK;
-	val |= set;
-	__raw_writel(val, base + offset);
-
-	return 0;
-}
-
-/**
- * davinci_aemif_setup - setup AEMIF interface by davinci_nand_pdata
- * @pdev - link to platform device to setup settings for
- *
- * This function does not use any locking while programming the AEMIF
- * because it is expected that there is only one user of a given
- * chip-select.
- *
- * Returns 0 on success, else negative errno.
- */
-int davinci_aemif_setup(struct platform_device *pdev)
-{
-	struct davinci_nand_pdata *pdata = dev_get_platdata(&pdev->dev);
-	uint32_t val;
-	unsigned long clkrate;
-	struct resource	*res;
-	void __iomem *base;
-	struct clk *clk;
-	int ret = 0;
-
-	clk = clk_get(&pdev->dev, "aemif");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
-		return ret;
-	}
-
-	ret = clk_prepare_enable(clk);
-	if (ret < 0) {
-		dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n",
-			ret);
-		goto err_put;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!res) {
-		dev_err(&pdev->dev, "cannot get IORESOURCE_MEM\n");
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	base = ioremap(res->start, resource_size(res));
-	if (!base) {
-		dev_err(&pdev->dev, "ioremap failed for resource %pR\n", res);
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	/*
-	 * Setup Async configuration register in case we did not boot
-	 * from NAND and so bootloader did not bother to set it up.
-	 */
-	val = davinci_aemif_readl(base, A1CR_OFFSET + pdata->core_chipsel * 4);
-	/*
-	 * Extended Wait is not valid and Select Strobe mode is not
-	 * used
-	 */
-	val &= ~(ACR_ASIZE_MASK | ACR_EW_MASK | ACR_SS_MASK);
-	if (pdata->options & NAND_BUSWIDTH_16)
-		val |= 0x1;
-
-	davinci_aemif_writel(base, A1CR_OFFSET + pdata->core_chipsel * 4, val);
-
-	clkrate = clk_get_rate(clk);
-
-	if (pdata->timing)
-		ret = davinci_aemif_setup_timing(pdata->timing, base,
-						 pdata->core_chipsel, clkrate);
-
-	if (ret < 0)
-		dev_dbg(&pdev->dev, "NAND timing values setup fail\n");
-
-	iounmap(base);
-err:
-	clk_disable_unprepare(clk);
-err_put:
-	clk_put(clk);
-	return ret;
-}
diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h
index 97948ac2bb9b..a403dd51dacc 100644
--- a/include/linux/platform_data/mtd-davinci-aemif.h
+++ b/include/linux/platform_data/mtd-davinci-aemif.h
@@ -33,5 +33,4 @@ struct davinci_aemif_timing {
 	u8	ta;
 };
 
-int davinci_aemif_setup(struct platform_device *pdev);
 #endif
-- 
cgit v1.2.3


From 25b4e70dcce92168eab4d8113817bb4dd130ebd2 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 9 Jul 2018 09:41:48 -0600
Subject: driver core: allow stopping deferred probe after init

Deferred probe will currently wait forever on dependent devices to probe,
but sometimes a driver will never exist. It's also not always critical for
a driver to exist. Platforms can rely on default configuration from the
bootloader or reset defaults for things such as pinctrl and power domains.
This is often the case with initial platform support until various drivers
get enabled. There's at least 2 scenarios where deferred probe can render
a platform broken. Both involve using a DT which has more devices and
dependencies than the kernel supports. The 1st case is a driver may be
disabled in the kernel config. The 2nd case is the kernel version may
simply not have the dependent driver. This can happen if using a newer DT
(provided by firmware perhaps) with a stable kernel version. Deferred
probe issues can be difficult to debug especially if the console has
dependencies or userspace fails to boot to a shell.

There are also cases like IOMMUs where only built-in drivers are
supported, so deferring probe after initcalls is not needed. The IOMMU
subsystem implemented its own mechanism to handle this using OF_DECLARE
linker sections.

This commit adds makes ending deferred probe conditional on initcalls
being completed or a debug timeout. Subsystems or drivers may opt-in by
calling driver_deferred_probe_check_init_done() instead of
unconditionally returning -EPROBE_DEFER. They may use additional
information from DT or kernel's config to decide whether to continue to
defer probe or not.

The timeout mechanism is intended for debug purposes and WARNs loudly.
The remaining deferred probe pending list will also be dumped after the
timeout. Not that this timeout won't work for the console which needs
to be enabled before userspace starts. However, if the console's
dependencies are resolved, then the kernel log will be printed (as
opposed to no output).

Cc: Alexander Graf <agraf@suse.de>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  9 ++++
 drivers/base/dd.c                               | 59 +++++++++++++++++++++++++
 include/linux/device.h                          |  2 +
 3 files changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..e83ef4648ea4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -804,6 +804,15 @@
 			Defaults to the default architecture's huge page size
 			if not specified.
 
+	deferred_probe_timeout=
+			[KNL] Debugging option to set a timeout in seconds for
+			deferred probe to give up waiting on dependencies to
+			probe. Only specific dependencies (subsystems or
+			drivers) that have opted in will be ignored. A timeout of 0
+			will timeout at the end of initcalls. This option will also
+			dump out devices still on the deferred probe list after
+			retrying.
+
 	dhash_entries=	[KNL]
 			Set number of hash buckets for dentry cache.
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index e85705e84407..fb62f1be40d3 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -55,6 +55,7 @@ static LIST_HEAD(deferred_probe_pending_list);
 static LIST_HEAD(deferred_probe_active_list);
 static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
 static struct dentry *deferred_devices;
+static bool initcalls_done;
 
 /*
  * In some cases, like suspend to RAM or hibernation, It might be reasonable
@@ -219,6 +220,51 @@ static int deferred_devs_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(deferred_devs);
 
+static int deferred_probe_timeout = -1;
+static int __init deferred_probe_timeout_setup(char *str)
+{
+	deferred_probe_timeout = simple_strtol(str, NULL, 10);
+	return 1;
+}
+__setup("deferred_probe_timeout=", deferred_probe_timeout_setup);
+
+/**
+ * driver_deferred_probe_check_state() - Check deferred probe state
+ * @dev: device to check
+ *
+ * Returns -ENODEV if init is done and all built-in drivers have had a chance
+ * to probe (i.e. initcalls are done), -ETIMEDOUT if deferred probe debug
+ * timeout has expired, or -EPROBE_DEFER if none of those conditions are met.
+ *
+ * Drivers or subsystems can opt-in to calling this function instead of directly
+ * returning -EPROBE_DEFER.
+ */
+int driver_deferred_probe_check_state(struct device *dev)
+{
+	if (initcalls_done) {
+		if (!deferred_probe_timeout) {
+			dev_WARN(dev, "deferred probe timeout, ignoring dependency");
+			return -ETIMEDOUT;
+		}
+		dev_warn(dev, "ignoring dependency for device, assuming no driver");
+		return -ENODEV;
+	}
+	return -EPROBE_DEFER;
+}
+
+static void deferred_probe_timeout_work_func(struct work_struct *work)
+{
+	struct device_private *private, *p;
+
+	deferred_probe_timeout = 0;
+	driver_deferred_probe_trigger();
+	flush_work(&deferred_probe_work);
+
+	list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
+		dev_info(private->device, "deferred probe pending");
+}
+static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
+
 /**
  * deferred_probe_initcall() - Enable probing of deferred devices
  *
@@ -235,6 +281,19 @@ static int deferred_probe_initcall(void)
 	driver_deferred_probe_trigger();
 	/* Sort as many dependencies as possible before exiting initcalls */
 	flush_work(&deferred_probe_work);
+	initcalls_done = true;
+
+	/*
+	 * Trigger deferred probe again, this time we won't defer anything
+	 * that is optional
+	 */
+	driver_deferred_probe_trigger();
+	flush_work(&deferred_probe_work);
+
+	if (deferred_probe_timeout > 0) {
+		schedule_delayed_work(&deferred_probe_timeout_work,
+			deferred_probe_timeout * HZ);
+	}
 	return 0;
 }
 late_initcall(deferred_probe_initcall);
diff --git a/include/linux/device.h b/include/linux/device.h
index 575c5a35ece5..d2acc78d279b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -339,6 +339,8 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, void *data,
 				  int (*match)(struct device *dev, void *data));
 
+int driver_deferred_probe_check_state(struct device *dev);
+
 /**
  * struct subsys_interface - interfaces to device functions
  * @name:       name of the device function
-- 
cgit v1.2.3


From ac6bbf0cdf4206c517ac9789814c23e372ebce4d Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 9 Jul 2018 09:41:52 -0600
Subject: iommu: Remove IOMMU_OF_DECLARE

Now that we use the driver core to stop deferred probe for missing
drivers, IOMMU_OF_DECLARE can be removed.

This is slightly less optimal than having a list of built-in drivers in
that we'll now defer probe twice before giving up. This shouldn't have a
significant impact on boot times as past discussions about deferred
probe have given no evidence of deferred probe having a substantial
impact.

Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Kukjin Kim <kgene@kernel.org>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: iommu@lists.linux-foundation.org
Cc: linux-samsung-soc@vger.kernel.org
Cc: linux-arm-msm@vger.kernel.org
Cc: linux-rockchip@lists.infradead.org
Cc: devicetree@vger.kernel.org
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/arm-smmu-v3.c       |  2 --
 drivers/iommu/arm-smmu.c          |  7 -------
 drivers/iommu/exynos-iommu.c      |  2 --
 drivers/iommu/ipmmu-vmsa.c        |  3 ---
 drivers/iommu/msm_iommu.c         |  2 --
 drivers/iommu/of_iommu.c          | 19 +------------------
 drivers/iommu/qcom_iommu.c        |  2 --
 drivers/iommu/rockchip-iommu.c    |  2 --
 include/asm-generic/vmlinux.lds.h |  2 --
 include/linux/of_iommu.h          |  4 ----
 10 files changed, 1 insertion(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 1d647104bccc..22bdabd3d8e0 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2915,8 +2915,6 @@ static struct platform_driver arm_smmu_driver = {
 };
 module_platform_driver(arm_smmu_driver);
 
-IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3");
-
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f7a96bcf94a6..c73cfce1ccc0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -2211,13 +2211,6 @@ static struct platform_driver arm_smmu_driver = {
 };
 module_platform_driver(arm_smmu_driver);
 
-IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1");
-IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2");
-IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400");
-IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401");
-IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500");
-IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2");
-
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 85879cfec52f..b128cb4372d3 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1390,5 +1390,3 @@ err_reg_driver:
 	return ret;
 }
 core_initcall(exynos_iommu_init);
-
-IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu");
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 40ae6e87cb88..f026aa16d5f1 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1108,9 +1108,6 @@ static void __exit ipmmu_exit(void)
 subsys_initcall(ipmmu_init);
 module_exit(ipmmu_exit);
 
-IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa");
-IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795");
-
 MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
 MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 0d3350463a3f..27377742600d 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -877,7 +877,5 @@ static void __exit msm_iommu_driver_exit(void)
 subsys_initcall(msm_iommu_driver_init);
 module_exit(msm_iommu_driver_exit);
 
-IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu");
-
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 78ddf47dd67a..f7787e757244 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -27,9 +27,6 @@
 
 #define NO_IOMMU	1
 
-static const struct of_device_id __iommu_of_table_sentinel
-	__used __section(__iommu_of_table_end);
-
 /**
  * of_get_dma_window - Parse *dma-window property and returns 0 if found.
  *
@@ -98,19 +95,6 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
 
-static bool of_iommu_driver_present(struct device_node *np)
-{
-	/*
-	 * If the IOMMU still isn't ready by the time we reach init, assume
-	 * it never will be. We don't want to defer indefinitely, nor attempt
-	 * to dereference __iommu_of_table after it's been freed.
-	 */
-	if (system_state >= SYSTEM_RUNNING)
-		return false;
-
-	return of_match_node(&__iommu_of_table, np);
-}
-
 static int of_iommu_xlate(struct device *dev,
 			  struct of_phandle_args *iommu_spec)
 {
@@ -120,8 +104,7 @@ static int of_iommu_xlate(struct device *dev,
 
 	ops = iommu_ops_from_fwnode(fwnode);
 	if ((ops && !ops->of_xlate) ||
-	    !of_device_is_available(iommu_spec->np) ||
-	    (!ops && !of_iommu_driver_present(iommu_spec->np)))
+	    !of_device_is_available(iommu_spec->np))
 		return NO_IOMMU;
 
 	err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index fe88a4880d3a..b48aee82d14b 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -945,7 +945,5 @@ static void __exit qcom_iommu_exit(void)
 module_init(qcom_iommu_init);
 module_exit(qcom_iommu_exit);
 
-IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1");
-
 MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..de8d3bf91b23 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1284,8 +1284,6 @@ static int __init rk_iommu_init(void)
 }
 subsys_initcall(rk_iommu_init);
 
-IOMMU_OF_DECLARE(rk_iommu_of, "rockchip,iommu");
-
 MODULE_DESCRIPTION("IOMMU API for Rockchip");
 MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
 MODULE_ALIAS("platform:rockchip-iommu");
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e373e2e10f6a..f173b5f30dbe 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -218,7 +218,6 @@
 #define TIMER_OF_TABLES()	OF_TABLE(CONFIG_TIMER_OF, timer)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
-#define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
 #define RESERVEDMEM_OF_TABLES()	OF_TABLE(CONFIG_OF_RESERVED_MEM, reservedmem)
 #define CPU_METHOD_OF_TABLES()	OF_TABLE(CONFIG_SMP, cpu_method)
 #define CPUIDLE_METHOD_OF_TABLES() OF_TABLE(CONFIG_CPU_IDLE, cpuidle_method)
@@ -601,7 +600,6 @@
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	TIMER_OF_TABLES()						\
-	IOMMU_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	CPUIDLE_METHOD_OF_TABLES()					\
 	KERNEL_DTB()							\
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 4fa654e4b5a9..f3d40dd7bb66 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -32,8 +32,4 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 #endif	/* CONFIG_OF_IOMMU */
 
-extern struct of_device_id __iommu_of_table;
-
-#define IOMMU_OF_DECLARE(name, compat)	OF_DECLARE_1(iommu, name, compat, NULL)
-
 #endif /* __OF_IOMMU_H */
-- 
cgit v1.2.3


From aef92a8bed25d03b8f03ce499a56e8e8e5e2c05e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 10 Jul 2018 13:42:10 +0200
Subject: watchdog/softlockup: Fix the SOFTLOCKUP_DETECTOR=n build

I got confused by all the various CONFIG options here about and
conflated CONFIG_LOCKUP_DETECTOR and CONFIG_SOFTLOCKUP_DETECTOR.

This results in a build failure for:

   CONFIG_LOCKUP_DETECTOR=y && CONFIG_SOFTLOCKUP_DETECTOR=n

As reported by Abdul.

Reported-and-tested-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-next <linux-next@vger.kernel.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: mpe <mpe@ellerman.id.au>
Cc: sachinp <sachinp@linux.vnet.ibm.com>
Cc: stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work")
Link: http://lkml.kernel.org/r/20180710114210.GI2476@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 80664bbeca43..08f9247e9827 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -33,15 +33,10 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif /* !CONFIG_SMP */
 
-extern int lockup_detector_online_cpu(unsigned int cpu);
-extern int lockup_detector_offline_cpu(unsigned int cpu);
-
 #else /* CONFIG_LOCKUP_DETECTOR */
 static inline void lockup_detector_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
 static inline void lockup_detector_cleanup(void) { }
-#define lockup_detector_online_cpu	NULL
-#define lockup_detector_offline_cpu	NULL
 #endif /* !CONFIG_LOCKUP_DETECTOR */
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
@@ -50,12 +45,18 @@ extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern unsigned int  softlockup_panic;
-#else
+
+extern int lockup_detector_online_cpu(unsigned int cpu);
+extern int lockup_detector_offline_cpu(unsigned int cpu);
+#else /* CONFIG_SOFTLOCKUP_DETECTOR */
 static inline void touch_softlockup_watchdog_sched(void) { }
 static inline void touch_softlockup_watchdog(void) { }
 static inline void touch_softlockup_watchdog_sync(void) { }
 static inline void touch_all_softlockup_watchdogs(void) { }
-#endif
+
+#define lockup_detector_online_cpu	NULL
+#define lockup_detector_offline_cpu	NULL
+#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
 
 #ifdef CONFIG_DETECT_HUNG_TASK
 void reset_hung_task_detector(void);
-- 
cgit v1.2.3


From 8d3e994241e6bcc7ead2b918c4f15b7683afa90a Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 10 Jul 2018 09:57:58 +0100
Subject: arm_pmu: Clean up maximum period handling

Each PMU defines their max_period of the counter as the maximum
value that can be counted. Since all the PMU backends support
32bit counters by default, let us remove the redundant field.

No functional changes.

Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_v6.c     |  2 --
 arch/arm/kernel/perf_event_v7.c     |  1 -
 arch/arm/kernel/perf_event_xscale.c |  2 --
 arch/arm64/kernel/perf_event.c      |  1 -
 drivers/perf/arm_pmu.c              | 16 ++++++++++++----
 include/linux/perf/arm_pmu.h        |  1 -
 6 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index be42c4f66a40..f64a6bfebcec 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -495,7 +495,6 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= armv6pmu_stop;
 	cpu_pmu->map_event	= armv6_map_event;
 	cpu_pmu->num_events	= 3;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 }
 
 static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
@@ -546,7 +545,6 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= armv6pmu_stop;
 	cpu_pmu->map_event	= armv6mpcore_map_event;
 	cpu_pmu->num_events	= 3;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 
 	return 0;
 }
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 5a5116794440..2cf1ca2925c8 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1170,7 +1170,6 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->start		= armv7pmu_start;
 	cpu_pmu->stop		= armv7pmu_stop;
 	cpu_pmu->reset		= armv7pmu_reset;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
 static void armv7_read_num_pmnc_events(void *info)
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 88d1a76f5367..c4f029458b52 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -374,7 +374,6 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= xscale1pmu_stop;
 	cpu_pmu->map_event	= xscale_map_event;
 	cpu_pmu->num_events	= 3;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 
 	return 0;
 }
@@ -743,7 +742,6 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= xscale2pmu_stop;
 	cpu_pmu->map_event	= xscale_map_event;
 	cpu_pmu->num_events	= 5;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 33147aacdafd..678ecffd3724 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -960,7 +960,6 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->start			= armv8pmu_start,
 	cpu_pmu->stop			= armv8pmu_stop,
 	cpu_pmu->reset			= armv8pmu_reset,
-	cpu_pmu->max_period		= (1LLU << 32) - 1,
 	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
 
 	return 0;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index a6347d487635..6ddc00da5373 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -28,6 +28,11 @@
 static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
 static DEFINE_PER_CPU(int, cpu_irq);
 
+static inline u64 arm_pmu_max_period(void)
+{
+	return (1ULL << 32) - 1;
+}
+
 static int
 armpmu_map_cache_event(const unsigned (*cache_map)
 				      [PERF_COUNT_HW_CACHE_MAX]
@@ -114,8 +119,10 @@ int armpmu_event_set_period(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
+	u64 max_period;
 	int ret = 0;
 
+	max_period = arm_pmu_max_period();
 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
@@ -136,8 +143,8 @@ int armpmu_event_set_period(struct perf_event *event)
 	 * effect we are reducing max_period to account for
 	 * interrupt latency (and we are being very conservative).
 	 */
-	if (left > (armpmu->max_period >> 1))
-		left = armpmu->max_period >> 1;
+	if (left > (max_period >> 1))
+		left = (max_period >> 1);
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
@@ -153,6 +160,7 @@ u64 armpmu_event_update(struct perf_event *event)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	u64 delta, prev_raw_count, new_raw_count;
+	u64 max_period = arm_pmu_max_period();
 
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
@@ -162,7 +170,7 @@ again:
 			     new_raw_count) != prev_raw_count)
 		goto again;
 
-	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
+	delta = (new_raw_count - prev_raw_count) & max_period;
 
 	local64_add(delta, &event->count);
 	local64_sub(delta, &hwc->period_left);
@@ -402,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event)
 		 * is far less likely to overtake the previous one unless
 		 * you have some serious IRQ latency issues.
 		 */
-		hwc->sample_period  = armpmu->max_period >> 1;
+		hwc->sample_period  = arm_pmu_max_period() >> 1;
 		hwc->last_period    = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
 	}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index ad5444491975..12c30a22fc8d 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -94,7 +94,6 @@ struct arm_pmu {
 	void		(*reset)(void *);
 	int		(*map_event)(struct perf_event *event);
 	int		num_events;
-	u64		max_period;
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
 	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
-- 
cgit v1.2.3


From 3a95200d3f89afd8b67f39d88d36cc7ec96ce385 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 10 Jul 2018 09:57:59 +0100
Subject: arm_pmu: Change API to support 64bit counter values

Convert the {read/write}_counter APIs to handle 64bit values
to enable supporting chained event counters. The backends still
use 32bit values and we pass them 32bit values only. So in effect
there are no functional changes.

Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_v6.c     | 4 ++--
 arch/arm/kernel/perf_event_v7.c     | 4 ++--
 arch/arm/kernel/perf_event_xscale.c | 8 ++++----
 arch/arm64/kernel/perf_event.c      | 9 ++++-----
 include/linux/perf/arm_pmu.h        | 4 ++--
 5 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f64a6bfebcec..0729f9841ef4 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -233,7 +233,7 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
 	return ret;
 }
 
-static inline u32 armv6pmu_read_counter(struct perf_event *event)
+static inline u64 armv6pmu_read_counter(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -251,7 +251,7 @@ static inline u32 armv6pmu_read_counter(struct perf_event *event)
 	return value;
 }
 
-static inline void armv6pmu_write_counter(struct perf_event *event, u32 value)
+static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 2cf1ca2925c8..973043dd6187 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -743,7 +743,7 @@ static inline void armv7_pmnc_select_counter(int idx)
 	isb();
 }
 
-static inline u32 armv7pmu_read_counter(struct perf_event *event)
+static inline u64 armv7pmu_read_counter(struct perf_event *event)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -763,7 +763,7 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event)
 	return value;
 }
 
-static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
+static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index c4f029458b52..942230fe0426 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -316,7 +316,7 @@ static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32 xscale1pmu_read_counter(struct perf_event *event)
+static inline u64 xscale1pmu_read_counter(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -337,7 +337,7 @@ static inline u32 xscale1pmu_read_counter(struct perf_event *event)
 	return val;
 }
 
-static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val)
+static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -678,7 +678,7 @@ static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32 xscale2pmu_read_counter(struct perf_event *event)
+static inline u64 xscale2pmu_read_counter(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -705,7 +705,7 @@ static inline u32 xscale2pmu_read_counter(struct perf_event *event)
 	return val;
 }
 
-static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
+static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 678ecffd3724..66a2ffdca6dd 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -512,7 +512,7 @@ static inline int armv8pmu_select_counter(int idx)
 	return idx;
 }
 
-static inline u32 armv8pmu_read_counter(struct perf_event *event)
+static inline u64 armv8pmu_read_counter(struct perf_event *event)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -530,7 +530,7 @@ static inline u32 armv8pmu_read_counter(struct perf_event *event)
 	return value;
 }
 
-static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
+static inline void armv8pmu_write_counter(struct perf_event *event, u64 value)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -545,9 +545,8 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
 		 * count using the lower 32bits and we want an interrupt when
 		 * it overflows.
 		 */
-		u64 value64 = 0xffffffff00000000ULL | value;
-
-		write_sysreg(value64, pmccntr_el0);
+		value |= 0xffffffff00000000ULL;
+		write_sysreg(value, pmccntr_el0);
 	} else if (armv8pmu_select_counter(idx) == idx)
 		write_sysreg(value, pmxevcntr_el0);
 }
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 12c30a22fc8d..f7126a21df30 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -87,8 +87,8 @@ struct arm_pmu {
 					 struct perf_event *event);
 	int		(*set_event_filter)(struct hw_perf_event *evt,
 					    struct perf_event_attr *attr);
-	u32		(*read_counter)(struct perf_event *event);
-	void		(*write_counter)(struct perf_event *event, u32 val);
+	u64		(*read_counter)(struct perf_event *event);
+	void		(*write_counter)(struct perf_event *event, u64 val);
 	void		(*start)(struct arm_pmu *);
 	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
-- 
cgit v1.2.3


From e2da97d328d4951d25f6634eda7213f7257417b6 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 10 Jul 2018 09:58:00 +0100
Subject: arm_pmu: Add support for 64bit event counters

Each PMU has a set of 32bit event counters. But in some
special cases, the events could be counted using counters
which are effectively 64bit wide.

e.g, Arm V8 PMUv3 has a 64 bit cycle counter which can count
only the CPU cycles. Also, the PMU can chain the event counters
to effectively count as a 64bit counter.

Add support for tracking the events that uses 64bit counters.
This only affects the periods set for each counter in the core
driver.

Cc: Will Deacon <will.deacon@arm.com>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/arm_pmu.c       | 16 ++++++++++------
 include/linux/perf/arm_pmu.h |  6 ++++++
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 6ddc00da5373..8cad6b535a2c 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -28,9 +28,12 @@
 static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
 static DEFINE_PER_CPU(int, cpu_irq);
 
-static inline u64 arm_pmu_max_period(void)
+static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
-	return (1ULL << 32) - 1;
+	if (event->hw.flags & ARMPMU_EVT_64BIT)
+		return GENMASK_ULL(63, 0);
+	else
+		return GENMASK_ULL(31, 0);
 }
 
 static int
@@ -122,7 +125,7 @@ int armpmu_event_set_period(struct perf_event *event)
 	u64 max_period;
 	int ret = 0;
 
-	max_period = arm_pmu_max_period();
+	max_period = arm_pmu_event_max_period(event);
 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
@@ -148,7 +151,7 @@ int armpmu_event_set_period(struct perf_event *event)
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
-	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
+	armpmu->write_counter(event, (u64)(-left) & max_period);
 
 	perf_event_update_userpage(event);
 
@@ -160,7 +163,7 @@ u64 armpmu_event_update(struct perf_event *event)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	u64 delta, prev_raw_count, new_raw_count;
-	u64 max_period = arm_pmu_max_period();
+	u64 max_period = arm_pmu_event_max_period(event);
 
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
@@ -368,6 +371,7 @@ __hw_perf_event_init(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	int mapping;
 
+	hwc->flags = 0;
 	mapping = armpmu->map_event(event);
 
 	if (mapping < 0) {
@@ -410,7 +414,7 @@ __hw_perf_event_init(struct perf_event *event)
 		 * is far less likely to overtake the previous one unless
 		 * you have some serious IRQ latency issues.
 		 */
-		hwc->sample_period  = arm_pmu_max_period() >> 1;
+		hwc->sample_period  = arm_pmu_event_max_period(event) >> 1;
 		hwc->last_period    = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
 	}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index f7126a21df30..10f92e1d8e7b 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -25,6 +25,12 @@
  */
 #define ARMPMU_MAX_HWEVENTS		32
 
+/*
+ * ARM PMU hw_event flags
+ */
+/* Event uses a 64bit counter */
+#define ARMPMU_EVT_64BIT		1
+
 #define HW_OP_UNSUPPORTED		0xFFFF
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
 #define CACHE_OP_UNSUPPORTED		0xFFFF
-- 
cgit v1.2.3


From 14b470b56840dbb093abd71f214e9d63770c87b8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Jul 2018 22:19:07 +0200
Subject: scsi: target: sbitmap: add seq_file forward declaration

The target core runs into a warning in the linux/sbitmap.h
file in some configurations:

In file included from include/target/target_core_base.h:7,
                 from drivers/target/target_core_fabric_lib.c:41:
include/linux/sbitmap.h:331:46: error: 'struct seq_file' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]
 void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
                                              ^~~~~~~~

In general, headers should not depend on others being included first,
so this fixes it with a forward declaration for that struct name, but
we probably want to merge the patch through the scsi tree to help
bisection.

Fixes: 10e9cbb6b531 ("scsi: target: Convert target drivers to use sbitmap")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/sbitmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index e6539536dea9..804a50983ec5 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -23,6 +23,8 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 
+struct seq_file;
+
 /**
  * struct sbitmap_word - Word in a &struct sbitmap.
  */
-- 
cgit v1.2.3


From b60a60405fb95a688eb2ef4ef20f5fcaa7b64f68 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Fri, 6 Jul 2018 17:37:19 +0200
Subject: netfilter: Add nf_ct_get_tuple_skb global lookup function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a global netfilter function to extract a conntrack tuple from an
skb. The function uses a new function added to nf_ct_hook, which will try
to get the tuple from skb->_nfct, and do a full lookup if that fails. This
makes it possible to use the lookup function before the skb has passed
through the conntrack init hooks (e.g., in an ingress qdisc). The tuple is
copied to the caller to avoid issues with reference counting.

The function returns false if conntrack is not loaded, allowing it to be
used without incurring a module dependency on conntrack. This is used by
the NAT mode in sch_cake.

Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h         | 11 +++++++++++
 net/netfilter/core.c              | 15 +++++++++++++++
 net/netfilter/nf_conntrack_core.c | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 23b48de8c2e2..07efffd0c759 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -414,8 +414,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
 void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+struct nf_conntrack_tuple;
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb);
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+struct nf_conntrack_tuple;
+static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	return false;
+}
 #endif
 
 struct nf_conn;
@@ -424,6 +433,8 @@ enum ip_conntrack_info;
 struct nf_ct_hook {
 	int (*update)(struct net *net, struct sk_buff *skb);
 	void (*destroy)(struct nf_conntrack *);
+	bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+			      const struct sk_buff *);
 };
 extern struct nf_ct_hook __rcu *nf_ct_hook;
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 168af54db975..dc240cb47ddf 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb)
+{
+	struct nf_ct_hook *ct_hook;
+	bool ret = false;
+
+	rcu_read_lock();
+	ct_hook = rcu_dereference(nf_ct_hook);
+	if (ct_hook)
+		ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 	.id	= NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2a98bd..85ab2fd6a665 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1683,6 +1683,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 	return 0;
 }
 
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	const struct nf_conntrack_tuple *src_tuple;
+	const struct nf_conntrack_tuple_hash *hash;
+	struct nf_conntrack_tuple srctuple;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+		memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+		return true;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       NFPROTO_IPV4, dev_net(skb->dev),
+			       &srctuple))
+		return false;
+
+	hash = nf_conntrack_find_get(dev_net(skb->dev),
+				     &nf_ct_zone_dflt,
+				     &srctuple);
+	if (!hash)
+		return false;
+
+	ct = nf_ct_tuplehash_to_ctrack(hash);
+	src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+	memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+	nf_ct_put(ct);
+
+	return true;
+}
+
 /* Bring out ya dead! */
 static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2204,6 +2239,7 @@ err_cachep:
 static struct nf_ct_hook nf_conntrack_hook = {
 	.update		= nf_conntrack_update,
 	.destroy	= destroy_conntrack,
+	.get_tuple_skb  = nf_conntrack_get_tuple_skb,
 };
 
 void nf_conntrack_init_end(void)
-- 
cgit v1.2.3


From 19f391eb05b8b005f2907ddc8f284487b446abf3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:19:32 -0400
Subject: turn filp_clone_open() into inline wrapper for dentry_open()

it's exactly the same thing as
	dentry_open(&file->f_path, file->f_flags, file->f_cred)

... and rename it to file_clone_open(), while we are at it.
'filp' naming convention is bogus; sure, it's "file pointer",
but we generally don't do that kind of Hungarian notation.
Some of the instances have too many callers to touch, but this
one has only two, so let's sanitize it while we can...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/gpu/drm/drm_lease.c |  2 +-
 fs/binfmt_misc.c            |  2 +-
 fs/open.c                   | 20 --------------------
 include/linux/fs.h          |  5 ++++-
 4 files changed, 6 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index d638c0fb3418..b54fb78a283c 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -553,7 +553,7 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 
 	/* Clone the lessor file to create a new file for us */
 	DRM_DEBUG_LEASE("Allocating lease file\n");
-	lessee_file = filp_clone_open(lessor_file);
+	lessee_file = file_clone_open(lessor_file);
 	if (IS_ERR(lessee_file)) {
 		ret = PTR_ERR(lessee_file);
 		goto out_lessee;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 4b5fff31ef27..aa4a7a23ff99 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		goto error;
 
 	if (fmt->flags & MISC_FMT_OPEN_FILE) {
-		interp_file = filp_clone_open(fmt->interp_file);
+		interp_file = file_clone_open(fmt->interp_file);
 		if (!IS_ERR(interp_file))
 			deny_write_access(interp_file);
 	} else {
diff --git a/fs/open.c b/fs/open.c
index d0e955b558ad..76c56966e297 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1063,26 +1063,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(file_open_root);
 
-struct file *filp_clone_open(struct file *oldfile)
-{
-	struct file *file;
-	int retval;
-
-	file = get_empty_filp();
-	if (IS_ERR(file))
-		return file;
-
-	file->f_flags = oldfile->f_flags;
-	retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
-	if (retval) {
-		put_filp(file);
-		return ERR_PTR(retval);
-	}
-
-	return file;
-}
-EXPORT_SYMBOL(filp_clone_open);
-
 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa9b4c169ed2..c4ca4c9c1130 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2422,7 +2422,10 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *filp_clone_open(struct file *);
+static inline struct file *file_clone_open(struct file *file)
+{
+	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
+}
 extern int filp_close(struct file *, fl_owner_t id);
 
 extern struct filename *getname_flags(const char __user *, int, int *);
-- 
cgit v1.2.3


From b4e7a7a88b5d060650094b8d3454bc521d669f6a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:17:54 -0400
Subject: drm_mode_create_lease_ioctl(): fix open-coded filp_clone_open()

Failure of ->open() should *not* be followed by fput().  Fixed by
using filp_clone_open(), which gets the cleanups right.

Cc: stable@vger.kernel.org
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/gpu/drm/drm_lease.c | 16 +---------------
 fs/internal.h               |  1 -
 include/linux/fs.h          |  1 +
 3 files changed, 2 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index 50c73c0a20b9..d638c0fb3418 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -553,24 +553,13 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 
 	/* Clone the lessor file to create a new file for us */
 	DRM_DEBUG_LEASE("Allocating lease file\n");
-	path_get(&lessor_file->f_path);
-	lessee_file = alloc_file(&lessor_file->f_path,
-				 lessor_file->f_mode,
-				 fops_get(lessor_file->f_inode->i_fop));
-
+	lessee_file = filp_clone_open(lessor_file);
 	if (IS_ERR(lessee_file)) {
 		ret = PTR_ERR(lessee_file);
 		goto out_lessee;
 	}
 
-	/* Initialize the new file for DRM */
-	DRM_DEBUG_LEASE("Initializing the file with %p\n", lessee_file->f_op->open);
-	ret = lessee_file->f_op->open(lessee_file->f_inode, lessee_file);
-	if (ret)
-		goto out_lessee_file;
-
 	lessee_priv = lessee_file->private_data;
-
 	/* Change the file to a master one */
 	drm_master_put(&lessee_priv->master);
 	lessee_priv->master = lessee;
@@ -588,9 +577,6 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 	DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n");
 	return 0;
 
-out_lessee_file:
-	fput(lessee_file);
-
 out_lessee:
 	drm_master_put(&lessee);
 
diff --git a/fs/internal.h b/fs/internal.h
index 980d005b21b4..5645b4ebf494 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -127,7 +127,6 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 
 extern int open_check_o_direct(struct file *f);
 extern int vfs_open(const struct path *, struct file *, const struct cred *);
-extern struct file *filp_clone_open(struct file *);
 
 /*
  * inode.c
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..aa9b4c169ed2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2422,6 +2422,7 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
+extern struct file *filp_clone_open(struct file *);
 extern int filp_close(struct file *, fl_owner_t id);
 
 extern struct filename *getname_flags(const char __user *, int, int *);
-- 
cgit v1.2.3


From fae68031f7fbc8b6db58d87830ba7ed1d696fbb1 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Fri, 6 Jul 2018 07:35:50 +0200
Subject: w1: core: match sub-nodes of bus masters in devicetree

Once a new slave device is detected, match it against all sub-nodes of the
master bus controller. If a match is found, set the slave device's of_node
pointer.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/w1/w1.c    | 3 +++
 include/linux/w1.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index caef0e0fd817..890c038c25f8 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -26,6 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/hwmon.h>
+#include <linux/of.h>
 
 #include <linux/atomic.h>
 
@@ -686,6 +687,8 @@ static int __w1_attach_slave_device(struct w1_slave *sl)
 	sl->dev.bus = &w1_bus_type;
 	sl->dev.release = &w1_slave_release;
 	sl->dev.groups = w1_slave_groups;
+	sl->dev.of_node = of_find_matching_node(sl->master->dev.of_node,
+						sl->family->of_match_table);
 
 	dev_set_name(&sl->dev, "%02x-%012llx",
 		 (unsigned int) sl->reg_num.family,
diff --git a/include/linux/w1.h b/include/linux/w1.h
index 694101f744c7..3111585c371f 100644
--- a/include/linux/w1.h
+++ b/include/linux/w1.h
@@ -274,6 +274,8 @@ struct w1_family {
 
 	struct w1_family_ops	*fops;
 
+	const struct of_device_id *of_match_table;
+
 	atomic_t		refcnt;
 };
 
-- 
cgit v1.2.3


From 23ebda2fc715534d383d59ae6740d4e3ebd43798 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 11 Jul 2018 17:21:05 +0200
Subject: libata: remove ata_sff_data_xfer_noirq()

ata_sff_data_xfer_noirq() is invoked via the ->sff_data_xfer hook. The
latter is invoked by ata_pio_sector(), atapi_send_cdb() and
__atapi_pio_bytes() which in turn is invoked by ata_sff_hsm_move().
The latter function requires that the "ap->lock" lock is held which
needs to be taken with disabled interrupts.

There is no need have to have ata_sff_data_xfer_noirq() which invokes
ata_sff_data_xfer32() with disabled interrupts because at this point the
interrupts are already disabled.
Remove the function and its references to it and replace all callers
with ata_sff_data_xfer32().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/driver-api/libata.rst |  3 +--
 drivers/ata/libata-sff.c            | 30 ------------------------------
 drivers/ata/pata_cmd640.c           |  2 +-
 drivers/ata/pata_icside.c           |  2 +-
 drivers/ata/pata_imx.c              |  2 +-
 drivers/ata/pata_legacy.c           |  6 +++---
 drivers/ata/pata_palmld.c           |  2 +-
 drivers/ata/pata_pcmcia.c           |  2 +-
 drivers/ata/pata_platform.c         |  2 +-
 drivers/ata/pata_via.c              |  2 +-
 include/linux/libata.h              |  2 --
 11 files changed, 11 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index 4adc056f7635..70e180e6b93d 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -118,8 +118,7 @@ PIO data read/write
 All bmdma-style drivers must implement this hook. This is the low-level
 operation that actually copies the data bytes during a PIO data
 transfer. Typically the driver will choose one of
-:c:func:`ata_sff_data_xfer_noirq`, :c:func:`ata_sff_data_xfer`, or
-:c:func:`ata_sff_data_xfer32`.
+:c:func:`ata_sff_data_xfer`, or :c:func:`ata_sff_data_xfer32`.
 
 ATA command execute
 ~~~~~~~~~~~~~~~~~~~
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index cc2f2e35f4c2..c5ea0fc635e5 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -657,36 +657,6 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf,
 }
 EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
 
-/**
- *	ata_sff_data_xfer_noirq - Transfer data by PIO
- *	@qc: queued command
- *	@buf: data buffer
- *	@buflen: buffer length
- *	@rw: read/write
- *
- *	Transfer data from/to the device data register by PIO. Do the
- *	transfer with interrupts disabled.
- *
- *	LOCKING:
- *	Inherited from caller.
- *
- *	RETURNS:
- *	Bytes consumed.
- */
-unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc, unsigned char *buf,
-				     unsigned int buflen, int rw)
-{
-	unsigned long flags;
-	unsigned int consumed;
-
-	local_irq_save(flags);
-	consumed = ata_sff_data_xfer32(qc, buf, buflen, rw);
-	local_irq_restore(flags);
-
-	return consumed;
-}
-EXPORT_SYMBOL_GPL(ata_sff_data_xfer_noirq);
-
 /**
  *	ata_pio_sector - Transfer a sector of data.
  *	@qc: Command on going
diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c
index c47caa807fa9..e3532eda7b05 100644
--- a/drivers/ata/pata_cmd640.c
+++ b/drivers/ata/pata_cmd640.c
@@ -178,7 +178,7 @@ static struct scsi_host_template cmd640_sht = {
 static struct ata_port_operations cmd640_port_ops = {
 	.inherits	= &ata_sff_port_ops,
 	/* In theory xfer_noirq is not needed once we kill the prefetcher */
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 	.sff_irq_check	= cmd640_sff_irq_check,
 	.qc_issue	= cmd640_qc_issue,
 	.cable_detect	= ata_cable_40wire,
diff --git a/drivers/ata/pata_icside.c b/drivers/ata/pata_icside.c
index 188f2f2eb21f..c272f2cbb47c 100644
--- a/drivers/ata/pata_icside.c
+++ b/drivers/ata/pata_icside.c
@@ -324,7 +324,7 @@ static struct ata_port_operations pata_icside_port_ops = {
 	.inherits		= &ata_bmdma_port_ops,
 	/* no need to build any PRD tables for DMA */
 	.qc_prep		= ata_noop_qc_prep,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.bmdma_setup		= pata_icside_bmdma_setup,
 	.bmdma_start		= pata_icside_bmdma_start,
 	.bmdma_stop		= pata_icside_bmdma_stop,
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c
index d4caa23f5a88..108101325efd 100644
--- a/drivers/ata/pata_imx.c
+++ b/drivers/ata/pata_imx.c
@@ -102,7 +102,7 @@ static struct scsi_host_template pata_imx_sht = {
 
 static struct ata_port_operations pata_imx_port_ops = {
 	.inherits		= &ata_sff_port_ops,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.cable_detect		= ata_cable_unknown,
 	.set_piomode		= pata_imx_set_piomode,
 };
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index 53828b6c3044..8ea4b8431fc8 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -246,12 +246,12 @@ static const struct ata_port_operations legacy_base_port_ops = {
 
 static struct ata_port_operations simple_port_ops = {
 	.inherits	= &legacy_base_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 };
 
 static struct ata_port_operations legacy_port_ops = {
 	.inherits	= &legacy_base_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 	.set_mode	= legacy_set_mode,
 };
 
@@ -341,7 +341,7 @@ static unsigned int pdc_data_xfer_vlb(struct ata_queued_cmd *qc,
 		}
 		local_irq_restore(flags);
 	} else
-		buflen = ata_sff_data_xfer_noirq(qc, buf, buflen, rw);
+		buflen = ata_sff_data_xfer32(qc, buf, buflen, rw);
 
 	return buflen;
 }
diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c
index 8c0d7d736b7a..d071ab6864a8 100644
--- a/drivers/ata/pata_palmld.c
+++ b/drivers/ata/pata_palmld.c
@@ -44,7 +44,7 @@ static struct scsi_host_template palmld_sht = {
 
 static struct ata_port_operations palmld_port_ops = {
 	.inherits		= &ata_sff_port_ops,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.cable_detect		= ata_cable_40wire,
 };
 
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index a541eacc5e95..9b0e6c72e3f9 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -151,7 +151,7 @@ static struct scsi_host_template pcmcia_sht = {
 
 static struct ata_port_operations pcmcia_port_ops = {
 	.inherits	= &ata_sff_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 	.cable_detect	= ata_cable_40wire,
 	.set_mode	= pcmcia_set_mode,
 };
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index c503ded87bb8..d6f8f5406442 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -49,7 +49,7 @@ static struct scsi_host_template pata_platform_sht = {
 
 static struct ata_port_operations pata_platform_port_ops = {
 	.inherits		= &ata_sff_port_ops,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.cable_detect		= ata_cable_unknown,
 	.set_mode		= pata_platform_set_mode,
 };
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 1ca6bcab369f..fd19f1ce83aa 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -471,7 +471,7 @@ static struct ata_port_operations via_port_ops = {
 
 static struct ata_port_operations via_port_ops_noirq = {
 	.inherits	= &via_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 };
 
 /**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..aa8583655a18 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1832,8 +1832,6 @@ extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
 extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
-extern unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc,
-			unsigned char *buf, unsigned int buflen, int rw);
 extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
-- 
cgit v1.2.3


From 9dc55f1389f9569acf9659e58dd836a9c70df217 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:05 -0700
Subject: iomap: add support for sub-pagesize buffered I/O without buffer heads

After already supporting a simple implementation of buffered writes for
the blocksize == PAGE_SIZE case in the last commit this adds full support
even for smaller block sizes.   There are three bits of per-block
information in the buffer_head structure that really matter for the iomap
read and write path:

 - uptodate status (BH_uptodate)
 - marked as currently under read I/O (BH_Async_Read)
 - marked as currently under write I/O (BH_Async_Write)

Instead of having new per-block structures this now adds a per-page
structure called struct iomap_page to track this information in a slightly
different form:

 - a bitmap for the per-block uptodate status.  For worst case of a 64k
   page size system this bitmap needs to contain 128 bits.  For the
   typical 4k page size case it only needs 8 bits, although we still
   need a full unsigned long due to the way the atomic bitmap API works.
 - two atomic_t counters are used to track the outstanding read and write
   counts

There is quite a bit of boilerplate code as the buffered I/O path uses
various helper methods, but the actual code is very straight forward.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 280 ++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/iomap.h |  31 ++++++
 2 files changed, 290 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index 13cdcf33e6c0..07501a647d13 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -17,6 +17,7 @@
 #include <linux/iomap.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 #include <linux/mm.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
@@ -104,6 +105,138 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static struct iomap_page *
+iomap_page_create(struct inode *inode, struct page *page)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (iop || i_blocksize(inode) == PAGE_SIZE)
+		return iop;
+
+	iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
+	atomic_set(&iop->read_count, 0);
+	atomic_set(&iop->write_count, 0);
+	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+	set_page_private(page, (unsigned long)iop);
+	SetPagePrivate(page);
+	return iop;
+}
+
+static void
+iomap_page_release(struct page *page)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (!iop)
+		return;
+	WARN_ON_ONCE(atomic_read(&iop->read_count));
+	WARN_ON_ONCE(atomic_read(&iop->write_count));
+	ClearPagePrivate(page);
+	set_page_private(page, 0);
+	kfree(iop);
+}
+
+/*
+ * Calculate the range inside the page that we actually need to read.
+ */
+static void
+iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
+		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
+{
+	unsigned block_bits = inode->i_blkbits;
+	unsigned block_size = (1 << block_bits);
+	unsigned poff = *pos & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	unsigned first = poff >> block_bits;
+	unsigned last = (poff + plen - 1) >> block_bits;
+	unsigned end = (i_size_read(inode) & (PAGE_SIZE - 1)) >> block_bits;
+
+	/*
+	 * If the block size is smaller than the page size we need to check the
+	 * per-block uptodate status and adjust the offset and length if needed
+	 * to avoid reading in already uptodate ranges.
+	 */
+	if (iop) {
+		unsigned int i;
+
+		/* move forward for each leading block marked uptodate */
+		for (i = first; i <= last; i++) {
+			if (!test_bit(i, iop->uptodate))
+				break;
+			*pos += block_size;
+			poff += block_size;
+			plen -= block_size;
+			first++;
+		}
+
+		/* truncate len if we find any trailing uptodate block(s) */
+		for ( ; i <= last; i++) {
+			if (test_bit(i, iop->uptodate)) {
+				plen -= (last - i + 1) * block_size;
+				last = i - 1;
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If the extent spans the block that contains the i_size we need to
+	 * handle both halves separately so that we properly zero data in the
+	 * page cache for blocks that are entirely outside of i_size.
+	 */
+	if (first <= end && last > end)
+		plen -= (last - end) * block_size;
+
+	*offp = poff;
+	*lenp = plen;
+}
+
+static void
+iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+	struct inode *inode = page->mapping->host;
+	unsigned first = off >> inode->i_blkbits;
+	unsigned last = (off + len - 1) >> inode->i_blkbits;
+	unsigned int i;
+	bool uptodate = true;
+
+	if (iop) {
+		for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
+			if (i >= first && i <= last)
+				set_bit(i, iop->uptodate);
+			else if (!test_bit(i, iop->uptodate))
+				uptodate = false;
+		}
+	}
+
+	if (uptodate && !PageError(page))
+		SetPageUptodate(page);
+}
+
+static void
+iomap_read_finish(struct iomap_page *iop, struct page *page)
+{
+	if (!iop || atomic_dec_and_test(&iop->read_count))
+		unlock_page(page);
+}
+
+static void
+iomap_read_page_end_io(struct bio_vec *bvec, int error)
+{
+	struct page *page = bvec->bv_page;
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (unlikely(error)) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
+	}
+
+	iomap_read_finish(iop, page);
+}
+
 static void
 iomap_read_inline_data(struct inode *inode, struct page *page,
 		struct iomap *iomap)
@@ -132,7 +265,7 @@ iomap_read_end_io(struct bio *bio)
 	int i;
 
 	bio_for_each_segment_all(bvec, bio, i)
-		page_endio(bvec->bv_page, false, error);
+		iomap_read_page_end_io(bvec, error);
 	bio_put(bio);
 }
 
@@ -150,9 +283,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 {
 	struct iomap_readpage_ctx *ctx = data;
 	struct page *page = ctx->cur_page;
-	unsigned poff = pos & (PAGE_SIZE - 1);
-	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	struct iomap_page *iop = iomap_page_create(inode, page);
 	bool is_contig = false;
+	loff_t orig_pos = pos;
+	unsigned poff, plen;
 	sector_t sector;
 
 	if (iomap->type == IOMAP_INLINE) {
@@ -161,13 +295,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		return PAGE_SIZE;
 	}
 
-	/* we don't support blocksize < PAGE_SIZE quite yet. */
-	WARN_ON_ONCE(pos != page_offset(page));
-	WARN_ON_ONCE(plen != PAGE_SIZE);
+	/* zero post-eof blocks as the page may be mapped */
+	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
+	if (plen == 0)
+		goto done;
 
 	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
 		zero_user(page, poff, plen);
-		SetPageUptodate(page);
+		iomap_set_range_uptodate(page, poff, plen);
 		goto done;
 	}
 
@@ -183,6 +318,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		is_contig = true;
 	}
 
+	/*
+	 * If we start a new segment we need to increase the read count, and we
+	 * need to do so before submitting any previous full bio to make sure
+	 * that we don't prematurely unlock the page.
+	 */
+	if (iop)
+		atomic_inc(&iop->read_count);
+
 	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
 		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
 		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -203,7 +346,13 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 	__bio_add_page(ctx->bio, page, plen, poff);
 done:
-	return plen;
+	/*
+	 * Move the caller beyond our range so that it keeps making progress.
+	 * For that we have to include any leading non-uptodate ranges, but
+	 * we can skip trailing ones as they will be handled in the next
+	 * iteration.
+	 */
+	return pos - orig_pos + plen;
 }
 
 int
@@ -214,8 +363,6 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
 	unsigned poff;
 	loff_t ret;
 
-	WARN_ON_ONCE(page_has_buffers(page));
-
 	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
 		ret = iomap_apply(inode, page_offset(page) + poff,
 				PAGE_SIZE - poff, 0, ops, &ctx,
@@ -341,6 +488,84 @@ done:
 }
 EXPORT_SYMBOL_GPL(iomap_readpages);
 
+int
+iomap_is_partially_uptodate(struct page *page, unsigned long from,
+		unsigned long count)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+	struct inode *inode = page->mapping->host;
+	unsigned first = from >> inode->i_blkbits;
+	unsigned last = (from + count - 1) >> inode->i_blkbits;
+	unsigned i;
+
+	if (iop) {
+		for (i = first; i <= last; i++)
+			if (!test_bit(i, iop->uptodate))
+				return 0;
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
+
+int
+iomap_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	/*
+	 * mm accommodates an old ext3 case where clean pages might not have had
+	 * the dirty bit cleared. Thus, it can send actual dirty pages to
+	 * ->releasepage() via shrink_active_list(), skip those here.
+	 */
+	if (PageDirty(page) || PageWriteback(page))
+		return 0;
+	iomap_page_release(page);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(iomap_releasepage);
+
+void
+iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
+{
+	/*
+	 * If we are invalidating the entire page, clear the dirty state from it
+	 * and release it to avoid unnecessary buildup of the LRU.
+	 */
+	if (offset == 0 && len == PAGE_SIZE) {
+		WARN_ON_ONCE(PageWriteback(page));
+		cancel_dirty_page(page);
+		iomap_page_release(page);
+	}
+}
+EXPORT_SYMBOL_GPL(iomap_invalidatepage);
+
+#ifdef CONFIG_MIGRATION
+int
+iomap_migrate_page(struct address_space *mapping, struct page *newpage,
+		struct page *page, enum migrate_mode mode)
+{
+	int ret;
+
+	ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+	if (ret != MIGRATEPAGE_SUCCESS)
+		return ret;
+
+	if (page_has_private(page)) {
+		ClearPagePrivate(page);
+		set_page_private(newpage, page_private(page));
+		set_page_private(page, 0);
+		SetPagePrivate(newpage);
+	}
+
+	if (mode != MIGRATE_SYNC_NO_COPY)
+		migrate_page_copy(newpage, page);
+	else
+		migrate_page_states(newpage, page);
+	return MIGRATEPAGE_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(iomap_migrate_page);
+#endif /* CONFIG_MIGRATION */
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -364,6 +589,7 @@ iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
 
 	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
 		zero_user_segments(page, poff, from, to, poff + plen);
+		iomap_set_range_uptodate(page, poff, plen);
 		return 0;
 	}
 
@@ -379,21 +605,33 @@ static int
 __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
 		struct page *page, struct iomap *iomap)
 {
+	struct iomap_page *iop = iomap_page_create(inode, page);
 	loff_t block_size = i_blocksize(inode);
 	loff_t block_start = pos & ~(block_size - 1);
 	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
-	unsigned poff = block_start & (PAGE_SIZE - 1);
-	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
-	unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
-
-	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+	unsigned from = pos & (PAGE_SIZE - 1), to = from + len, poff, plen;
+	int status = 0;
 
 	if (PageUptodate(page))
 		return 0;
-	if (from <= poff && to >= poff + plen)
-		return 0;
-	return iomap_read_page_sync(inode, block_start, page,
-			poff, plen, from, to, iomap);
+
+	do {
+		iomap_adjust_read_range(inode, iop, &block_start,
+				block_end - block_start, &poff, &plen);
+		if (plen == 0)
+			break;
+
+		if ((from > poff && from < poff + plen) ||
+		    (to > poff && to < poff + plen)) {
+			status = iomap_read_page_sync(inode, block_start, page,
+					poff, plen, from, to, iomap);
+			if (status)
+				break;
+		}
+
+	} while ((block_start += plen) < block_end);
+
+	return status;
 }
 
 static int
@@ -476,7 +714,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 	if (unlikely(copied < len && !PageUptodate(page))) {
 		copied = 0;
 	} else {
-		SetPageUptodate(page);
+		iomap_set_range_uptodate(page, pos & (PAGE_SIZE - 1), len);
 		iomap_set_page_dirty(page);
 	}
 	return __generic_write_end(inode, pos, copied, page);
@@ -812,7 +1050,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 		block_commit_write(page, 0, length);
 	} else {
 		WARN_ON_ONCE(!PageUptodate(page));
-		WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+		iomap_page_create(inode, page);
 	}
 
 	return length;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 5eb9ca8d7ce5..3555d54bf79a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -2,6 +2,9 @@
 #ifndef LINUX_IOMAP_H
 #define LINUX_IOMAP_H 1
 
+#include <linux/atomic.h>
+#include <linux/bitmap.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 
 struct address_space;
@@ -98,12 +101,40 @@ struct iomap_ops {
 			ssize_t written, unsigned flags, struct iomap *iomap);
 };
 
+/*
+ * Structure allocate for each page when block size < PAGE_SIZE to track
+ * sub-page uptodate status and I/O completions.
+ */
+struct iomap_page {
+	atomic_t		read_count;
+	atomic_t		write_count;
+	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
+};
+
+static inline struct iomap_page *to_iomap_page(struct page *page)
+{
+	if (page_has_private(page))
+		return (struct iomap_page *)page_private(page);
+	return NULL;
+}
+
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
 int iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_set_page_dirty(struct page *page);
+int iomap_is_partially_uptodate(struct page *page, unsigned long from,
+		unsigned long count);
+int iomap_releasepage(struct page *page, gfp_t gfp_mask);
+void iomap_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int len);
+#ifdef CONFIG_MIGRATION
+int iomap_migrate_page(struct address_space *mapping, struct page *newpage,
+		struct page *page, enum migrate_mode mode);
+#else
+#define iomap_migrate_page NULL
+#endif
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From 3443b00e07eed5798605ba6524de37e1d3f1a4bf Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 10 Jul 2018 10:02:57 +0300
Subject: team: Publish team_port_get_rcu()

A follow-up patch adds a new entry point, team_port_dev_txable(). Making
it an ordinary exported function would mean that any module that may
need the service in one of the supported configurations also
unconditionally needs to pull in the team module, whether or not the
user actually intends to create team interfaces.

To prevent that, team_port_dev_txable() is defined in if_team.h, and
therefore all dependencies of that function also need to be
publicly-visible.

Therefore move team_port_get_rcu() from team.c to if_team.h.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 5 -----
 include/linux/if_team.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 3a95eaae0c98..6a047d30e8c6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -41,11 +41,6 @@
 
 #define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
 
-static struct team_port *team_port_get_rcu(const struct net_device *dev)
-{
-	return rcu_dereference(dev->rx_handler_data);
-}
-
 static struct team_port *team_port_get_rtnl(const struct net_device *dev)
 {
 	struct team_port *port = rtnl_dereference(dev->rx_handler_data);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index d95cae09dea0..0d07c6655cce 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -74,6 +74,11 @@ struct team_port {
 	long mode_priv[0];
 };
 
+static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
+{
+	return rcu_dereference(dev->rx_handler_data);
+}
+
 static inline bool team_port_enabled(struct team_port *port)
 {
 	return port->index != -1;
-- 
cgit v1.2.3


From eeed992b776c54af6108187c87ac60d028e69d37 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 10 Jul 2018 10:02:58 +0300
Subject: net: Add lag.h, net_lag_port_dev_txable()

LAG devices (team or bond) recognize for each one of their slave devices
whether LAG traffic is going to be sent through that device. Bond calls
such devices "active", team calls them "txable". When this state
changes, a NETDEV_CHANGELOWERSTATE notification is distributed, together
with a netdev_notifier_changelowerstate_info structure that for LAG
devices includes a tx_enabled flag that refers to the new state. The
notification thus makes it possible to react to the changes in txability
in drivers.

However there's no way to query txability from the outside on demand.
That is problematic namely for mlxsw, which when resolving ERSPAN packet
path, may encounter a LAG device, and needs to determine which of the
slaves it should choose.

To that end, introduce a new function, net_lag_port_dev_txable(), which
determines whether a given slave device is "active" or
"txable" (depending on the flavor of the LAG device). That function then
dispatches to per-LAG-flavor helpers, bond_is_active_slave_dev() resp.
team_port_dev_txable().

Because there currently is no good place where net_lag_port_dev_txable()
should be added, introduce a new header file, lag.h, which should from
now on hold any logic common to both team and bond. (But keep
netif_is_lag_master() together with the rest of netif_is_*_master()
functions).

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_team.h | 13 +++++++++++++
 include/net/bonding.h   | 13 +++++++++++++
 include/net/lag.h       | 17 +++++++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 include/net/lag.h

(limited to 'include/linux')

diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 0d07c6655cce..ac42da56f7a2 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -89,6 +89,19 @@ static inline bool team_port_txable(struct team_port *port)
 	return port->linkup && team_port_enabled(port);
 }
 
+static inline bool team_port_dev_txable(const struct net_device *port_dev)
+{
+	struct team_port *port;
+	bool txable;
+
+	rcu_read_lock();
+	port = team_port_get_rcu(port_dev);
+	txable = port ? team_port_txable(port) : false;
+	rcu_read_unlock();
+
+	return txable;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static inline void team_netpoll_send_skb(struct team_port *port,
 					 struct sk_buff *skb)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 808f1d167349..a2d058170ea3 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -411,6 +411,19 @@ static inline bool bond_slave_can_tx(struct slave *slave)
 	       bond_is_active_slave(slave);
 }
 
+static inline bool bond_is_active_slave_dev(const struct net_device *slave_dev)
+{
+	struct slave *slave;
+	bool active;
+
+	rcu_read_lock();
+	slave = bond_slave_get_rcu(slave_dev);
+	active = bond_is_active_slave(slave);
+	rcu_read_unlock();
+
+	return active;
+}
+
 static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len)
 {
 	if (len == ETH_ALEN) {
diff --git a/include/net/lag.h b/include/net/lag.h
new file mode 100644
index 000000000000..95b880e6fdde
--- /dev/null
+++ b/include/net/lag.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IF_LAG_H
+#define _LINUX_IF_LAG_H
+
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+#include <net/bonding.h>
+
+static inline bool net_lag_port_dev_txable(const struct net_device *port_dev)
+{
+	if (netif_is_team_port(port_dev))
+		return team_port_dev_txable(port_dev);
+	else
+		return bond_is_active_slave_dev(port_dev);
+}
+
+#endif /* _LINUX_IF_LAG_H */
-- 
cgit v1.2.3


From bf1c77b4644f46d2986b7ca5e43e012f0fc8984b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Jul 2018 14:56:50 +0100
Subject: kernel: add ksys_personality()

Using this helper allows us to avoid the in-kernel call to the
sys_personality() syscall. The ksys_ prefix denotes that this function
is meant as a drop-in replacement for the syscall. In particular, it
uses the same calling convention as sys_personality().

Since ksys_personality is trivial, it is implemented directly in
<linux/syscalls.h>, as we do for ksys_close() and friends.

This helper is necessary to enable conversion of arm64's syscall
handling to use pt_regs wrappers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Martin <dave.martin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/syscalls.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a368a68cb667..abfe12d8a9c5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -80,6 +80,7 @@ union bpf_attr;
 #include <linux/unistd.h>
 #include <linux/quota.h>
 #include <linux/key.h>
+#include <linux/personality.h>
 #include <trace/syscall.h>
 
 #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
@@ -1281,4 +1282,14 @@ static inline long ksys_truncate(const char __user *pathname, loff_t length)
 	return do_sys_truncate(pathname, length);
 }
 
+static inline unsigned int ksys_personality(unsigned int personality)
+{
+	unsigned int old = current->personality;
+
+	if (personality != 0xffffffff)
+		set_personality(personality);
+
+	return old;
+}
+
 #endif
-- 
cgit v1.2.3


From 9b54bf9d6a5b30e2cc22b18793e9a4158c5b4882 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Jul 2018 14:56:51 +0100
Subject: kernel: add kcompat_sys_{f,}statfs64()

Using this helper allows us to avoid the in-kernel calls to the
compat_sys_{f,}statfs64() sycalls, as are necessary for parameter
mangling in arm64's compat handling.

Following the example of ksys_* functions, kcompat_sys_* functions are
intended to be a drop-in replacement for their compat_sys_*
counterparts, with the same calling convention.

This is necessary to enable conversion of arm64's syscall handling to
use pt_regs wrappers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 fs/statfs.c            | 14 ++++++++++++--
 include/linux/compat.h | 11 +++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/statfs.c b/fs/statfs.c
index 5b2a24f0f263..f0216629621d 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -335,7 +335,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -349,7 +349,12 @@ COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, s
 	return error;
 }
 
-COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_statfs64(pathname, sz, buf);
+}
+
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -363,6 +368,11 @@ COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct co
 	return error;
 }
 
+COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_fstatfs64(fd, sz, buf);
+}
+
 /*
  * This is a copy of sys_ustat, just dealing with a structure layout.
  * Given how simple this syscall is that apporach is more maintainable
diff --git a/include/linux/compat.h b/include/linux/compat.h
index c68acc47da57..43f4ed44c5d5 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -1028,6 +1028,17 @@ static inline struct compat_timeval ns_to_compat_timeval(s64 nsec)
 	return ctv;
 }
 
+/*
+ * Kernel code should not call compat syscalls (i.e., compat_sys_xyzyyz())
+ * directly.  Instead, use one of the functions which work equivalently, such
+ * as the kcompat_sys_xyzyyz() functions prototyped below.
+ */
+
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz,
+		     struct compat_statfs64 __user * buf);
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
+			  struct compat_statfs64 __user * buf);
+
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
-- 
cgit v1.2.3


From c9c554f21490bbc96cc554f80024d27d09670480 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 11 Jul 2018 14:19:04 -0400
Subject: alloc_file(): switch to passing O_... flags instead of FMODE_... mode

... so that it could set both ->f_flags and ->f_mode, without callers
having to set ->f_flags manually.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/misc/cxl/api.c          |  3 +--
 drivers/scsi/cxlflash/ocxl_hw.c |  3 +--
 fs/aio.c                        |  8 ++------
 fs/anon_inodes.c                |  3 +--
 fs/file_table.c                 | 17 +++++++++--------
 fs/hugetlbfs/inode.c            |  3 +--
 fs/pipe.c                       |  8 ++++----
 include/linux/file.h            |  2 +-
 ipc/shm.c                       |  8 ++++----
 mm/memfd.c                      |  2 +-
 mm/shmem.c                      |  3 +--
 net/socket.c                    |  3 +--
 12 files changed, 27 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 6b16946f9b05..0b5cb6cf91a0 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -102,12 +102,11 @@ static struct file *cxl_getfile(const char *name,
 	path.mnt = mntget(cxl_vfs_mount);
 	d_instantiate(path.dentry, inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file)) {
 		path_put(&path);
 		goto err_fs;
 	}
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 
 	return file;
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index 497a68389461..99bb393a8a34 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -129,7 +129,7 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name,
 	path.mnt = mntget(ocxlflash_vfs_mount);
 	d_instantiate(path.dentry, inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file)) {
 		rc = PTR_ERR(file);
 		dev_err(dev, "%s: alloc_file failed rc=%d\n",
@@ -138,7 +138,6 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name,
 		goto err3;
 	}
 
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 out:
 	return file;
diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..9eea53887d6c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -234,13 +234,9 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 	path.mnt = mntget(aio_mnt);
 
 	d_instantiate(path.dentry, inode);
-	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
-	if (IS_ERR(file)) {
+	file = alloc_file(&path, O_RDWR, &aio_ring_fops);
+	if (IS_ERR(file))
 		path_put(&path);
-		return file;
-	}
-
-	file->f_flags = O_RDWR;
 	return file;
 }
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3168ee4e77f4..6b235ab1df6c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,12 +102,11 @@ struct file *anon_inode_getfile(const char *name,
 
 	d_instantiate(path.dentry, anon_inode_inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file))
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
 
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 
 	return file;
diff --git a/fs/file_table.c b/fs/file_table.c
index eee7cf629e52..086c3f5ec31a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -153,10 +153,10 @@ over:
  * alloc_file - allocate and initialize a 'struct file'
  *
  * @path: the (dentry, vfsmount) pair for the new file
- * @mode: the mode with which the new file will be opened
+ * @flags: O_... flags with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
  */
-struct file *alloc_file(const struct path *path, fmode_t mode,
+struct file *alloc_file(const struct path *path, int flags,
 		const struct file_operations *fop)
 {
 	struct file *file;
@@ -165,19 +165,20 @@ struct file *alloc_file(const struct path *path, fmode_t mode,
 	if (IS_ERR(file))
 		return file;
 
+	file->f_mode = OPEN_FMODE(flags);
+	file->f_flags = flags;
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
-	if ((mode & FMODE_READ) &&
+	if ((file->f_mode & FMODE_READ) &&
 	     likely(fop->read || fop->read_iter))
-		mode |= FMODE_CAN_READ;
-	if ((mode & FMODE_WRITE) &&
+		file->f_mode |= FMODE_CAN_READ;
+	if ((file->f_mode & FMODE_WRITE) &&
 	     likely(fop->write || fop->write_iter))
-		mode |= FMODE_CAN_WRITE;
-	file->f_mode = mode;
+		file->f_mode |= FMODE_CAN_WRITE;
 	file->f_op = fop;
-	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(path->dentry->d_inode);
 	return file;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d508c7844681..71aed47422e2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1375,8 +1375,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	inode->i_size = size;
 	clear_nlink(inode);
 
-	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
-			&hugetlbfs_file_operations);
+	file = alloc_file(&path, O_RDWR, &hugetlbfs_file_operations);
 	if (IS_ERR(file))
 		goto out_dentry; /* inode is already attached */
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 9405e455f5b1..1909422e5a78 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -760,16 +760,17 @@ int create_pipe_files(struct file **res, int flags)
 
 	d_instantiate(path.dentry, inode);
 
-	f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
+	f = alloc_file(&path, O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
+			&pipefifo_fops);
 	if (IS_ERR(f)) {
 		err = PTR_ERR(f);
 		goto err_dentry;
 	}
 
-	f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
 	f->private_data = inode->i_pipe;
 
-	res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
+	res[0] = alloc_file(&path, O_RDONLY | (flags & O_NONBLOCK),
+			&pipefifo_fops);
 	if (IS_ERR(res[0])) {
 		put_pipe_info(inode, inode->i_pipe);
 		fput(f);
@@ -778,7 +779,6 @@ int create_pipe_files(struct file **res, int flags)
 
 	path_get(&path);
 	res[0]->private_data = inode->i_pipe;
-	res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	res[1] = f;
 	return 0;
 
diff --git a/include/linux/file.h b/include/linux/file.h
index 279720db984a..6d34a1262b31 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,7 +18,7 @@ struct file_operations;
 struct vfsmount;
 struct dentry;
 struct path;
-extern struct file *alloc_file(const struct path *, fmode_t mode,
+extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
 
 static inline void fput_light(struct file *file, int fput_needed)
diff --git a/ipc/shm.c b/ipc/shm.c
index 051a3e1fb8df..c702abd578a7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1362,7 +1362,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 	struct ipc_namespace *ns;
 	struct shm_file_data *sfd;
 	struct path path;
-	fmode_t f_mode;
+	int f_flags;
 	unsigned long populate = 0;
 
 	err = -EINVAL;
@@ -1395,11 +1395,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 	if (shmflg & SHM_RDONLY) {
 		prot = PROT_READ;
 		acc_mode = S_IRUGO;
-		f_mode = FMODE_READ;
+		f_flags = O_RDONLY;
 	} else {
 		prot = PROT_READ | PROT_WRITE;
 		acc_mode = S_IRUGO | S_IWUGO;
-		f_mode = FMODE_READ | FMODE_WRITE;
+		f_flags = O_RDWR;
 	}
 	if (shmflg & SHM_EXEC) {
 		prot |= PROT_EXEC;
@@ -1449,7 +1449,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 		goto out_nattch;
 	}
 
-	file = alloc_file(&path, f_mode,
+	file = alloc_file(&path, f_flags,
 			  is_file_hugepages(shp->shm_file) ?
 				&shm_file_operations_huge :
 				&shm_file_operations);
diff --git a/mm/memfd.c b/mm/memfd.c
index 27069518e3c5..2bb5e257080e 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -326,7 +326,7 @@ SYSCALL_DEFINE2(memfd_create,
 		goto err_fd;
 	}
 	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
-	file->f_flags |= O_RDWR | O_LARGEFILE;
+	file->f_flags |= O_LARGEFILE;
 
 	if (flags & MFD_ALLOW_SEALING) {
 		file_seals = memfd_file_seals_ptr(file);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cab84403055..84844e52bf24 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3942,8 +3942,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
 	if (IS_ERR(res))
 		goto put_path;
 
-	res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
-		  &shmem_file_operations);
+	res = alloc_file(&path, O_RDWR, &shmem_file_operations);
 	if (IS_ERR(res))
 		goto put_path;
 
diff --git a/net/socket.c b/net/socket.c
index 8a109012608a..2cdbe8f71b7f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -411,7 +411,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 
 	d_instantiate(path.dentry, SOCK_INODE(sock));
 
-	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK),
 		  &socket_file_ops);
 	if (IS_ERR(file)) {
 		/* drop dentry, keep inode for a bit */
@@ -423,7 +423,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 	}
 
 	sock->file = file;
-	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->private_data = sock;
 	return file;
 }
-- 
cgit v1.2.3


From e3f20ae21079ecac282df65d83865c5771f4bca0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 10 Jul 2018 13:25:29 -0400
Subject: security_file_open(): lose cred argument

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c                | 2 +-
 include/linux/security.h | 5 ++---
 security/security.c      | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index 0a9f00b7f3d5..4c65edefa487 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -776,7 +776,7 @@ static int do_dentry_open(struct file *f,
 		goto cleanup_all;
 	}
 
-	error = security_file_open(f, f->f_cred);
+	error = security_file_open(f);
 	if (error)
 		goto cleanup_all;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 63030c85ee19..88d30fc975e7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -309,7 +309,7 @@ void security_file_set_fowner(struct file *file);
 int security_file_send_sigiotask(struct task_struct *tsk,
 				 struct fown_struct *fown, int sig);
 int security_file_receive(struct file *file);
-int security_file_open(struct file *file, const struct cred *cred);
+int security_file_open(struct file *file);
 int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
 void security_task_free(struct task_struct *task);
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
@@ -858,8 +858,7 @@ static inline int security_file_receive(struct file *file)
 	return 0;
 }
 
-static inline int security_file_open(struct file *file,
-				     const struct cred *cred)
+static inline int security_file_open(struct file *file)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 68f46d849abe..235b35f58a65 100644
--- a/security/security.c
+++ b/security/security.c
@@ -970,11 +970,11 @@ int security_file_receive(struct file *file)
 	return call_int_hook(file_receive, 0, file);
 }
 
-int security_file_open(struct file *file, const struct cred *cred)
+int security_file_open(struct file *file)
 {
 	int ret;
 
-	ret = call_int_hook(file_open, 0, file, cred);
+	ret = call_int_hook(file_open, 0, file, file->f_cred);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 9481769208b5e39b871ae4e89f5328c776ec38dc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 10 Jul 2018 14:13:18 -0400
Subject: ->file_open(): lose cred argument

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lsm_hooks.h  | 2 +-
 security/apparmor/lsm.c    | 4 ++--
 security/security.c        | 2 +-
 security/selinux/hooks.c   | 4 ++--
 security/smack/smack_lsm.c | 6 +++---
 security/tomoyo/tomoyo.c   | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 8f1131c8dd54..a8ee106b865d 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1569,7 +1569,7 @@ union security_list_options {
 	int (*file_send_sigiotask)(struct task_struct *tsk,
 					struct fown_struct *fown, int sig);
 	int (*file_receive)(struct file *file);
-	int (*file_open)(struct file *file, const struct cred *cred);
+	int (*file_open)(struct file *file);
 
 	int (*task_alloc)(struct task_struct *task, unsigned long clone_flags);
 	void (*task_free)(struct task_struct *task);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 74f17376202b..8b8b70620bbe 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -395,7 +395,7 @@ static int apparmor_inode_getattr(const struct path *path)
 	return common_perm_cond(OP_GETATTR, path, AA_MAY_GETATTR);
 }
 
-static int apparmor_file_open(struct file *file, const struct cred *cred)
+static int apparmor_file_open(struct file *file)
 {
 	struct aa_file_ctx *fctx = file_ctx(file);
 	struct aa_label *label;
@@ -414,7 +414,7 @@ static int apparmor_file_open(struct file *file, const struct cred *cred)
 		return 0;
 	}
 
-	label = aa_get_newest_cred_label(cred);
+	label = aa_get_newest_cred_label(file->f_cred);
 	if (!unconfined(label)) {
 		struct inode *inode = file_inode(file);
 		struct path_cond cond = { inode->i_uid, inode->i_mode };
diff --git a/security/security.c b/security/security.c
index 235b35f58a65..5dce67070cdf 100644
--- a/security/security.c
+++ b/security/security.c
@@ -974,7 +974,7 @@ int security_file_open(struct file *file)
 {
 	int ret;
 
-	ret = call_int_hook(file_open, 0, file, file->f_cred);
+	ret = call_int_hook(file_open, 0, file);
 	if (ret)
 		return ret;
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2b5ee5fbd652..18006be15713 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3862,7 +3862,7 @@ static int selinux_file_receive(struct file *file)
 	return file_has_perm(cred, file, file_to_av(file));
 }
 
-static int selinux_file_open(struct file *file, const struct cred *cred)
+static int selinux_file_open(struct file *file)
 {
 	struct file_security_struct *fsec;
 	struct inode_security_struct *isec;
@@ -3886,7 +3886,7 @@ static int selinux_file_open(struct file *file, const struct cred *cred)
 	 * new inode label or new policy.
 	 * This check is not redundant - do not remove.
 	 */
-	return file_path_has_perm(cred, file, open_file_to_av(file));
+	return file_path_has_perm(file->f_cred, file, open_file_to_av(file));
 }
 
 /* task security operations */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 7ad226018f51..e7b6c012431d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1927,9 +1927,9 @@ static int smack_file_receive(struct file *file)
  *
  * Returns 0
  */
-static int smack_file_open(struct file *file, const struct cred *cred)
+static int smack_file_open(struct file *file)
 {
-	struct task_smack *tsp = cred->security;
+	struct task_smack *tsp = file->f_cred->security;
 	struct inode *inode = file_inode(file);
 	struct smk_audit_info ad;
 	int rc;
@@ -1937,7 +1937,7 @@ static int smack_file_open(struct file *file, const struct cred *cred)
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 	rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad);
-	rc = smk_bu_credfile(cred, file, MAY_READ, rc);
+	rc = smk_bu_credfile(file->f_cred, file, MAY_READ, rc);
 
 	return rc;
 }
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 213b8c593668..9f932e2d6852 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -320,7 +320,7 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_file_open(struct file *f, const struct cred *cred)
+static int tomoyo_file_open(struct file *f)
 {
 	int flags = f->f_flags;
 	/* Don't check read permission here if called from do_execve(). */
-- 
cgit v1.2.3


From f5d11409e61dadf1f9af91b22bbedc28a60a2e2c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 02:35:08 -0400
Subject: introduce FMODE_OPENED

basically, "is that instance set up enough for regular fput(), or
do we want put_filp() for that one".

NOTE: the only alloc_file() caller that could be followed by put_filp()
is in arch/ia64/kernel/perfmon.c, which is (Kconfig-level) broken.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 1 +
 fs/open.c          | 3 ++-
 include/linux/fs.h | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 705f486f7007..d664d10acfeb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -176,6 +176,7 @@ struct file *alloc_file(const struct path *path, int flags,
 	if ((file->f_mode & FMODE_WRITE) &&
 	     likely(fop->write || fop->write_iter))
 		file->f_mode |= FMODE_CAN_WRITE;
+	file->f_mode |= FMODE_OPENED;
 	file->f_op = fop;
 	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(path->dentry->d_inode);
diff --git a/fs/open.c b/fs/open.c
index 4c65edefa487..f3c6cb6a57b9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -749,7 +749,7 @@ static int do_dentry_open(struct file *f,
 	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
 
 	if (unlikely(f->f_flags & O_PATH)) {
-		f->f_mode = FMODE_PATH;
+		f->f_mode = FMODE_PATH | FMODE_OPENED;
 		f->f_op = &empty_fops;
 		return 0;
 	}
@@ -793,6 +793,7 @@ static int do_dentry_open(struct file *f,
 		if (error)
 			goto cleanup_all;
 	}
+	f->f_mode |= FMODE_OPENED;
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
 	if ((f->f_mode & FMODE_READ) &&
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c4ca4c9c1130..05f34726e29c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -148,6 +148,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* Has write method(s) */
 #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
+#define FMODE_OPENED		((__force fmode_t)0x80000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
-- 
cgit v1.2.3


From 4d27f3266f14e4d1d13125ce32cb49a40f3122c3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 11:14:39 -0400
Subject: fold put_filp() into fput()

Just check FMODE_OPENED in __fput() and be done with that...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 15 +++++----------
 fs/namei.c           |  4 ++--
 fs/open.c            | 11 +++--------
 include/linux/file.h |  1 -
 4 files changed, 10 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index d664d10acfeb..9b70ed2bbc4e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -192,6 +192,9 @@ static void __fput(struct file *file)
 	struct vfsmount *mnt = file->f_path.mnt;
 	struct inode *inode = file->f_inode;
 
+	if (unlikely(!(file->f_mode & FMODE_OPENED)))
+		goto out;
+
 	might_sleep();
 
 	fsnotify_close(file);
@@ -221,12 +224,10 @@ static void __fput(struct file *file)
 		put_write_access(inode);
 		__mnt_drop_write(mnt);
 	}
-	file->f_path.dentry = NULL;
-	file->f_path.mnt = NULL;
-	file->f_inode = NULL;
-	file_free(file);
 	dput(dentry);
 	mntput(mnt);
+out:
+	file_free(file);
 }
 
 static LLIST_HEAD(delayed_fput_list);
@@ -301,12 +302,6 @@ void __fput_sync(struct file *file)
 
 EXPORT_SYMBOL(fput);
 
-void put_filp(struct file *file)
-{
-	if (atomic_long_dec_and_test(&file->f_count))
-		file_free(file);
-}
-
 void __init files_init(void)
 {
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/namei.c b/fs/namei.c
index 3cf02804d5ff..503c4b968415 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3531,7 +3531,7 @@ static struct file *path_openat(struct nameidata *nd,
 
 	s = path_init(nd, flags);
 	if (IS_ERR(s)) {
-		put_filp(file);
+		fput(file);
 		return ERR_CAST(s);
 	}
 	while (!(error = link_path_walk(s, nd)) &&
@@ -3547,7 +3547,7 @@ static struct file *path_openat(struct nameidata *nd,
 out2:
 	if (!(opened & FILE_OPENED)) {
 		BUG_ON(!error);
-		put_filp(file);
+		fput(file);
 	}
 	if (unlikely(error)) {
 		if (error == -EOPENSTALE) {
diff --git a/fs/open.c b/fs/open.c
index f3c6cb6a57b9..3d09b823f12b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -921,15 +921,10 @@ struct file *dentry_open(const struct path *path, int flags,
 	f = alloc_empty_file(flags, cred);
 	if (!IS_ERR(f)) {
 		error = vfs_open(path, f);
-		if (!error) {
-			/* from now on we need fput() to dispose of f */
+		if (!error)
 			error = open_check_o_direct(f);
-			if (error) {
-				fput(f);
-				f = ERR_PTR(error);
-			}
-		} else { 
-			put_filp(f);
+		if (error) {
+			fput(f);
 			f = ERR_PTR(error);
 		}
 	}
diff --git a/include/linux/file.h b/include/linux/file.h
index 6d34a1262b31..aed45d69811e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -78,7 +78,6 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
 extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
 extern void set_close_on_exec(unsigned int fd, int flag);
 extern bool get_close_on_exec(unsigned int fd);
-extern void put_filp(struct file *);
 extern int get_unused_fd_flags(unsigned flags);
 extern void put_unused_fd(unsigned int fd);
 
-- 
cgit v1.2.3


From 73a09dd94377e4b186b300bd5461920710c7c3d5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:22:02 -0400
Subject: introduce FMODE_CREATED and switch to it

Parallel to FILE_CREATED, goes into ->f_mode instead of *opened.
NFS is a bit of a wart here - it doesn't have file at the point
where FILE_CREATED used to be set, so we need to propagate it
there (for now).  IMA is another one (here and everywhere)...

Note that this needs do_dentry_open() to leave old bits in ->f_mode
alone - we want it to preserve FMODE_CREATED if it had been already
set (no other bit can be there).

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      |  2 +-
 fs/9p/vfs_inode_dotl.c |  2 +-
 fs/ceph/file.c         |  2 +-
 fs/cifs/dir.c          |  2 +-
 fs/fuse/dir.c          |  2 +-
 fs/gfs2/inode.c        |  2 +-
 fs/namei.c             | 15 ++++++++-------
 fs/nfs/dir.c           |  5 ++++-
 fs/nfs/nfs4proc.c      |  2 +-
 include/linux/fs.h     |  1 +
 10 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 42e102e2e74a..566929792480 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -925,7 +925,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(d_inode(dentry), file);
 
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 out:
 	dput(res);
 	return err;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 7f6ae21a27b3..ee65db5c7eb0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -358,7 +358,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	file->private_data = ofid;
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(inode, file);
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 out:
 	v9fs_put_acl(dacl, pacl);
 	dput(res);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ad0bed99b1d5..38a63fff7903 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -507,7 +507,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		dout("atomic_open finish_open on dn %p\n", dn);
 		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
 			ceph_init_inode_acls(d_inode(dentry), &acls);
-			*opened |= FILE_CREATED;
+			file->f_mode |= FMODE_CREATED;
 		}
 		err = finish_open(file, dentry, ceph_open, opened);
 	}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ddae52bd1993..21d7e393900e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -539,7 +539,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	}
 
 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 
 	rc = finish_open(file, direntry, generic_file_open, opened);
 	if (rc) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..d4bdcf51e6cb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -508,7 +508,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 		goto no_open;
 
 	/* Only creates */
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 
 	if (fc->no_create)
 		goto mknod;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 67c588edf8d8..4aba00a6004b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -767,7 +767,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	mark_inode_dirty(inode);
 	d_instantiate(dentry, inode);
 	if (file) {
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 		error = finish_open(file, dentry, gfs2_open_common, opened);
 	}
 	gfs2_glock_dq_uninit(ghs);
diff --git a/fs/namei.c b/fs/namei.c
index 8a1ae074c1c1..4bd7cc0d7522 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3061,7 +3061,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 		 * permission here.
 		 */
 		int acc_mode = op->acc_mode;
-		if (*opened & FILE_CREATED) {
+		if (file->f_mode & FMODE_CREATED) {
 			WARN_ON(!(open_flag & O_CREAT));
 			fsnotify_create(dir, dentry);
 			acc_mode = 0;
@@ -3077,7 +3077,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 				dput(dentry);
 				dentry = file->f_path.dentry;
 			}
-			if (*opened & FILE_CREATED)
+			if (file->f_mode & FMODE_CREATED)
 				fsnotify_create(dir, dentry);
 			if (unlikely(d_is_negative(dentry))) {
 				error = -ENOENT;
@@ -3126,7 +3126,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 	if (unlikely(IS_DEADDIR(dir_inode)))
 		return -ENOENT;
 
-	*opened &= ~FILE_CREATED;
+	file->f_mode &= ~FMODE_CREATED;
 	dentry = d_lookup(dir, &nd->last);
 	for (;;) {
 		if (!dentry) {
@@ -3211,7 +3211,7 @@ no_open:
 
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode && (open_flag & O_CREAT)) {
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 		audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
 		if (!dir_inode->i_op->create) {
 			error = -EACCES;
@@ -3318,7 +3318,7 @@ static int do_last(struct nameidata *nd,
 		if (error)
 			goto out;
 
-		if ((*opened & FILE_CREATED) ||
+		if ((file->f_mode & FMODE_CREATED) ||
 		    !S_ISREG(file_inode(file)->i_mode))
 			will_truncate = false;
 
@@ -3326,7 +3326,7 @@ static int do_last(struct nameidata *nd,
 		goto opened;
 	}
 
-	if (*opened & FILE_CREATED) {
+	if (file->f_mode & FMODE_CREATED) {
 		/* Don't check for write permission, don't truncate */
 		open_flag &= ~O_TRUNC;
 		will_truncate = false;
@@ -3400,7 +3400,8 @@ finish_open_created:
 	if (error)
 		goto out;
 opened:
-	error = ima_file_check(file, op->acc_mode, *opened);
+	error = ima_file_check(file, op->acc_mode,
+				file->f_mode & FMODE_CREATED ? FILE_CREATED : 0);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7a9c14426855..0ac50983fc4e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1461,6 +1461,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	unsigned int lookup_flags = 0;
 	bool switched = false;
+	int created = 0;
 	int err;
 
 	/* Expect a negative dentry */
@@ -1521,7 +1522,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
-	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
+	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
+	if (created)
+		file->f_mode |= FMODE_CREATED;
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ed45090e4df6..2c4df0ffbca1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2951,7 +2951,7 @@ static int _nfs4_do_open(struct inode *dir,
 		}
 	}
 	if (opened && opendata->file_created)
-		*opened |= FILE_CREATED;
+		*opened = 1;
 
 	if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
 		*ctx_th = opendata->f_attr.mdsthreshold;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05f34726e29c..ca668c7e48a7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -149,6 +149,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 #define FMODE_OPENED		((__force fmode_t)0x80000)
+#define FMODE_CREATED		((__force fmode_t)0x100000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
-- 
cgit v1.2.3


From 6035a27b25ab9dadc8c3d5c5df5eae3fca62fc95 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:40:10 -0400
Subject: IMA: don't propagate opened through the entire thing

just check ->f_mode in ima_appraise_measurement()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                            |  3 +--
 fs/nfsd/vfs.c                         |  2 +-
 include/linux/ima.h                   |  4 ++--
 security/integrity/ima/ima.h          |  4 ++--
 security/integrity/ima/ima_appraise.c |  4 ++--
 security/integrity/ima/ima_main.c     | 16 ++++++++--------
 6 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 4bd7cc0d7522..d2aeb282ed05 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3400,8 +3400,7 @@ finish_open_created:
 	if (error)
 		goto out;
 opened:
-	error = ima_file_check(file, op->acc_mode,
-				file->f_mode & FMODE_CREATED ? FILE_CREATED : 0);
+	error = ima_file_check(file, op->acc_mode);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b0555d7d8200..55a099e47ba2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -763,7 +763,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 		goto out_nfserr;
 	}
 
-	host_err = ima_file_check(file, may_flags, 0);
+	host_err = ima_file_check(file, may_flags);
 	if (host_err) {
 		fput(file);
 		goto out_nfserr;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e4647e0eb60..d9ba3fc363b7 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -16,7 +16,7 @@ struct linux_binprm;
 
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_file_check(struct file *file, int mask, int opened);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
@@ -34,7 +34,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
-static inline int ima_file_check(struct file *file, int mask, int opened)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 354bb5716ce3..e4c1a236976c 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -238,7 +238,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 			     struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len, int opened);
+			     int xattr_len);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -254,7 +254,7 @@ static inline int ima_appraise_measurement(enum ima_hooks func,
 					   struct file *file,
 					   const unsigned char *filename,
 					   struct evm_ima_xattr_data *xattr_value,
-					   int xattr_len, int opened)
+					   int xattr_len)
 {
 	return INTEGRITY_UNKNOWN;
 }
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 8bd7a0733e51..deec1804a00a 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -212,7 +212,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 			     struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len, int opened)
+			     int xattr_len)
 {
 	static const char op[] = "appraise_data";
 	const char *cause = "unknown";
@@ -231,7 +231,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 		cause = iint->flags & IMA_DIGSIG_REQUIRED ?
 				"IMA-signature-required" : "missing-hash";
 		status = INTEGRITY_NOLABEL;
-		if (opened & FILE_CREATED)
+		if (file->f_mode & FMODE_CREATED)
 			iint->flags |= IMA_NEW_FILE;
 		if ((iint->flags & IMA_NEW_FILE) &&
 		    (!(iint->flags & IMA_DIGSIG_REQUIRED) ||
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index dca44cf7838e..b286f37712d5 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -168,7 +168,7 @@ void ima_file_free(struct file *file)
 
 static int process_measurement(struct file *file, const struct cred *cred,
 			       u32 secid, char *buf, loff_t size, int mask,
-			       enum ima_hooks func, int opened)
+			       enum ima_hooks func)
 {
 	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint = NULL;
@@ -294,7 +294,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
 	if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
 		inode_lock(inode);
 		rc = ima_appraise_measurement(func, iint, file, pathname,
-					      xattr_value, xattr_len, opened);
+					      xattr_value, xattr_len);
 		inode_unlock(inode);
 	}
 	if (action & IMA_AUDIT)
@@ -338,7 +338,7 @@ int ima_file_mmap(struct file *file, unsigned long prot)
 	if (file && (prot & PROT_EXEC)) {
 		security_task_getsecid(current, &secid);
 		return process_measurement(file, current_cred(), secid, NULL,
-					   0, MAY_EXEC, MMAP_CHECK, 0);
+					   0, MAY_EXEC, MMAP_CHECK);
 	}
 
 	return 0;
@@ -364,13 +364,13 @@ int ima_bprm_check(struct linux_binprm *bprm)
 
 	security_task_getsecid(current, &secid);
 	ret = process_measurement(bprm->file, current_cred(), secid, NULL, 0,
-				  MAY_EXEC, BPRM_CHECK, 0);
+				  MAY_EXEC, BPRM_CHECK);
 	if (ret)
 		return ret;
 
 	security_cred_getsecid(bprm->cred, &secid);
 	return process_measurement(bprm->file, bprm->cred, secid, NULL, 0,
-				   MAY_EXEC, CREDS_CHECK, 0);
+				   MAY_EXEC, CREDS_CHECK);
 }
 
 /**
@@ -383,14 +383,14 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * On success return 0.  On integrity appraisal error, assuming the file
  * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
  */
-int ima_file_check(struct file *file, int mask, int opened)
+int ima_file_check(struct file *file, int mask)
 {
 	u32 secid;
 
 	security_task_getsecid(current, &secid);
 	return process_measurement(file, current_cred(), secid, NULL, 0,
 				   mask & (MAY_READ | MAY_WRITE | MAY_EXEC |
-					   MAY_APPEND), FILE_CHECK, opened);
+					   MAY_APPEND), FILE_CHECK);
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
@@ -493,7 +493,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 	func = read_idmap[read_id] ?: FILE_CHECK;
 	security_task_getsecid(current, &secid);
 	return process_measurement(file, current_cred(), secid, buf, size,
-				   MAY_READ, func, 0);
+				   MAY_READ, func);
 }
 
 static int __init init_ima(void)
-- 
cgit v1.2.3


From be12af3ef5e61ebc44d065e121424ac605d7bb8e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:44:56 -0400
Subject: getting rid of 'opened' argument of ->atomic_open() - part 1

'opened' argument of finish_open() is unused.  Kill it.

Signed-off-by Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 2 +-
 fs/9p/vfs_inode_dotl.c | 2 +-
 fs/ceph/file.c         | 2 +-
 fs/cifs/dir.c          | 2 +-
 fs/fuse/dir.c          | 2 +-
 fs/gfs2/inode.c        | 6 +++---
 fs/namei.c             | 2 +-
 fs/nfs/dir.c           | 2 +-
 fs/open.c              | 3 +--
 include/linux/fs.h     | 3 +--
 10 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 566929792480..7b6ff3275d9c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -917,7 +917,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		v9inode->writeback_fid = (void *) inode_fid;
 	}
 	mutex_unlock(&v9inode->v_mutex);
-	err = finish_open(file, dentry, generic_file_open, opened);
+	err = finish_open(file, dentry, generic_file_open);
 	if (err)
 		goto error;
 
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ee65db5c7eb0..c6939b7cb18c 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -352,7 +352,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	}
 	mutex_unlock(&v9inode->v_mutex);
 	/* Since we are opening a file, assign the open fid to the file */
-	err = finish_open(file, dentry, generic_file_open, opened);
+	err = finish_open(file, dentry, generic_file_open);
 	if (err)
 		goto err_clunk_old_fid;
 	file->private_data = ofid;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 38a63fff7903..38b28cb2fac1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -509,7 +509,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			ceph_init_inode_acls(d_inode(dentry), &acls);
 			file->f_mode |= FMODE_CREATED;
 		}
-		err = finish_open(file, dentry, ceph_open, opened);
+		err = finish_open(file, dentry, ceph_open);
 	}
 out_req:
 	if (!req->r_err && req->r_target_inode)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 21d7e393900e..891bfd62e67a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -541,7 +541,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 		file->f_mode |= FMODE_CREATED;
 
-	rc = finish_open(file, direntry, generic_file_open, opened);
+	rc = finish_open(file, direntry, generic_file_open);
 	if (rc) {
 		if (server->ops->close)
 			server->ops->close(xid, tcon, &fid);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d4bdcf51e6cb..a5b1f5ff8cb7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -469,7 +469,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	d_instantiate(entry, inode);
 	fuse_change_entry_timeout(entry, &outentry);
 	fuse_invalidate_attr(dir);
-	err = finish_open(file, entry, generic_file_open, opened);
+	err = finish_open(file, entry, generic_file_open);
 	if (err) {
 		fuse_sync_release(ff, flags);
 	} else {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4aba00a6004b..59f695e96d63 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -626,7 +626,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		error = 0;
 		if (file) {
 			if (S_ISREG(inode->i_mode))
-				error = finish_open(file, dentry, gfs2_open_common, opened);
+				error = finish_open(file, dentry, gfs2_open_common);
 			else
 				error = finish_no_open(file, NULL);
 		}
@@ -768,7 +768,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	d_instantiate(dentry, inode);
 	if (file) {
 		file->f_mode |= FMODE_CREATED;
-		error = finish_open(file, dentry, gfs2_open_common, opened);
+		error = finish_open(file, dentry, gfs2_open_common);
 	}
 	gfs2_glock_dq_uninit(ghs);
 	gfs2_glock_dq_uninit(ghs + 1);
@@ -866,7 +866,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
 		return d;
 	}
 	if (file && S_ISREG(inode->i_mode))
-		error = finish_open(file, dentry, gfs2_open_common, opened);
+		error = finish_open(file, dentry, gfs2_open_common);
 
 	gfs2_glock_dq_uninit(&gh);
 	if (error) {
diff --git a/fs/namei.c b/fs/namei.c
index d2aeb282ed05..117b118853f2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3475,7 +3475,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	if (error)
 		goto out2;
 	file->f_path.mnt = path.mnt;
-	error = finish_open(file, child, NULL, opened);
+	error = finish_open(file, child, NULL);
 out2:
 	mnt_drop_write(path.mnt);
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0ac50983fc4e..22176a3818d5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1439,7 +1439,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 {
 	int err;
 
-	err = finish_open(file, dentry, do_open, opened);
+	err = finish_open(file, dentry, do_open);
 	if (err)
 		goto out;
 	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
diff --git a/fs/open.c b/fs/open.c
index d2030a3c5c52..dbaac9efc7fc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -843,8 +843,7 @@ cleanup_file:
  * Returns zero on success or -errno if the open failed.
  */
 int finish_open(struct file *file, struct dentry *dentry,
-		int (*open)(struct inode *, struct file *),
-		int *opened)
+		int (*open)(struct inode *, struct file *))
 {
 	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca668c7e48a7..70be3e4c26ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2441,8 +2441,7 @@ enum {
 	FILE_OPENED = 2
 };
 extern int finish_open(struct file *file, struct dentry *dentry,
-			int (*open)(struct inode *, struct file *),
-			int *opened);
+			int (*open)(struct inode *, struct file *));
 extern int finish_no_open(struct file *file, struct dentry *dentry);
 
 /* fs/ioctl.c */
-- 
cgit v1.2.3


From 44907d79002466049fdbb8ef15730d185e0808b4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:32:02 -0400
Subject: get rid of 'opened' argument of ->atomic_open() - part 3

now it can be done...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 3 +--
 fs/9p/vfs_inode_dotl.c | 3 +--
 fs/bad_inode.c         | 2 +-
 fs/ceph/file.c         | 3 +--
 fs/ceph/super.h        | 3 +--
 fs/cifs/cifsfs.h       | 3 +--
 fs/cifs/dir.c          | 3 +--
 fs/fuse/dir.c          | 2 +-
 fs/gfs2/inode.c        | 3 +--
 fs/namei.c             | 3 +--
 fs/nfs/dir.c           | 2 +-
 fs/nfs/nfs4_fs.h       | 2 +-
 include/linux/fs.h     | 2 +-
 13 files changed, 13 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7b6ff3275d9c..85ff859d3af5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -859,8 +859,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 static int
 v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct file *file, unsigned flags, umode_t mode,
-		     int *opened)
+		     struct file *file, unsigned flags, umode_t mode)
 {
 	int err;
 	u32 perm;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c6939b7cb18c..4823e1c46999 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -241,8 +241,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 
 static int
 v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
-			  struct file *file, unsigned flags, umode_t omode,
-			  int *opened)
+			  struct file *file, unsigned flags, umode_t omode)
 {
 	int err = 0;
 	kgid_t gid;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 125e8bbd22a2..8035d2a44561 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -134,7 +134,7 @@ static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
 
 static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
 				 struct file *file, unsigned int open_flag,
-				 umode_t create_mode, int *opened)
+				 umode_t create_mode)
 {
 	return -EIO;
 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 38b28cb2fac1..e2679e8a2535 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -429,8 +429,7 @@ out:
  * file or symlink, return 1 so the VFS can retry.
  */
 int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct file *file, unsigned flags, umode_t mode,
-		     int *opened)
+		     struct file *file, unsigned flags, umode_t mode)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a7077a0c989f..971328b99ede 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1025,8 +1025,7 @@ extern const struct file_operations ceph_file_fops;
 extern int ceph_renew_caps(struct inode *inode);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-			    struct file *file, unsigned flags, umode_t mode,
-			    int *opened);
+			    struct file *file, unsigned flags, umode_t mode);
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 				  char *data, size_t len);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5f0231803431..f3a78efc3109 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -65,8 +65,7 @@ extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       bool excl);
 extern int cifs_atomic_open(struct inode *, struct dentry *,
-			    struct file *, unsigned, umode_t,
-			    int *);
+			    struct file *, unsigned, umode_t);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  unsigned int);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 891bfd62e67a..3713d22b95a7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -465,8 +465,7 @@ out_err:
 
 int
 cifs_atomic_open(struct inode *inode, struct dentry *direntry,
-		 struct file *file, unsigned oflags, umode_t mode,
-		 int *opened)
+		 struct file *file, unsigned oflags, umode_t mode)
 {
 	int rc;
 	unsigned int xid;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b8d7e9d423c8..c979329311c8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -489,7 +489,7 @@ out_err:
 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 			    struct file *file, unsigned flags,
-			    umode_t mode, int *opened)
+			    umode_t mode)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 15e2a8a3b917..648f0ca1ad57 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1228,14 +1228,13 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
  * @file: The proposed new struct file
  * @flags: open flags
  * @mode: File mode
- * @opened: Flag to say whether the file has been opened or not
  *
  * Returns: error code or 0 for success
  */
 
 static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags,
-			    umode_t mode, int *opened)
+			    umode_t mode)
 {
 	struct dentry *d;
 	bool excl = !!(flags & O_EXCL);
diff --git a/fs/namei.c b/fs/namei.c
index 117b118853f2..1da272bf8ed3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3052,8 +3052,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 	file->f_path.dentry = DENTRY_NOT_SET;
 	file->f_path.mnt = nd->path.mnt;
 	error = dir->i_op->atomic_open(dir, dentry, file,
-				       open_to_namei_flags(open_flag),
-				       mode, opened);
+				       open_to_namei_flags(open_flag), mode);
 	d_lookup_done(dentry);
 	if (!error) {
 		/*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 71ae3cc3e53a..f447b1a24350 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1451,7 +1451,7 @@ out:
 
 int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		    struct file *file, unsigned open_flags,
-		    umode_t mode, int *opened)
+		    umode_t mode)
 {
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	struct nfs_open_context *ctx;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 137e18abb7e7..51beb6e38c90 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,7 +258,7 @@ extern const struct dentry_operations nfs4_dentry_operations;
 
 /* dir.c */
 int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
-		    unsigned, umode_t, int *);
+		    unsigned, umode_t);
 
 /* super.c */
 extern struct file_system_type nfs4_fs_type;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70be3e4c26ac..c25896b30e9f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1781,7 +1781,7 @@ struct inode_operations {
 	int (*update_time)(struct inode *, struct timespec64 *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
 			   struct file *, unsigned open_flag,
-			   umode_t create_mode, int *opened);
+			   umode_t create_mode);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
-- 
cgit v1.2.3


From dbae8f2ca2f0586f4b80201c78ff0aed2a012ab5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 17:51:47 -0400
Subject: kill FILE_{CREATED,OPENED}

no users left

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c25896b30e9f..bd904c496878 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2436,10 +2436,6 @@ extern struct filename *getname(const char __user *);
 extern struct filename *getname_kernel(const char *);
 extern void putname(struct filename *name);
 
-enum {
-	FILE_CREATED = 1,
-	FILE_OPENED = 2
-};
 extern int finish_open(struct file *file, struct dentry *dentry,
 			int (*open)(struct inode *, struct file *));
 extern int finish_no_open(struct file *file, struct dentry *dentry);
-- 
cgit v1.2.3


From d93aa9d82aea80b80f225dbf9c7986df444d8106 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2018 09:40:05 -0400
Subject: new wrapper: alloc_file_pseudo()

takes inode, vfsmount, name, O_... flags and file_operations and
either returns a new struct file (in which case inode reference we
held is consumed) or returns ERR_PTR(), in which case no refcounts
are altered.

converted aio_private_file() and sock_alloc_file() to it

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c             | 20 ++++----------------
 fs/file_table.c      | 27 +++++++++++++++++++++++++++
 include/linux/file.h |  3 +++
 net/socket.c         | 28 +++++-----------------------
 4 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 9eea53887d6c..c3a8bac16374 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -215,9 +215,7 @@ static const struct address_space_operations aio_ctx_aops;
 
 static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 {
-	struct qstr this = QSTR_INIT("[aio]", 5);
 	struct file *file;
-	struct path path;
 	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
@@ -226,27 +224,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 	inode->i_mapping->private_data = ctx;
 	inode->i_size = PAGE_SIZE * nr_pages;
 
-	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
-	if (!path.dentry) {
-		iput(inode);
-		return ERR_PTR(-ENOMEM);
-	}
-	path.mnt = mntget(aio_mnt);
-
-	d_instantiate(path.dentry, inode);
-	file = alloc_file(&path, O_RDWR, &aio_ring_fops);
+	file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
+				O_RDWR, &aio_ring_fops);
 	if (IS_ERR(file))
-		path_put(&path);
+		iput(inode);
 	return file;
 }
 
 static struct dentry *aio_mount(struct file_system_type *fs_type,
 				int flags, const char *dev_name, void *data)
 {
-	static const struct dentry_operations ops = {
-		.d_dname	= simple_dname,
-	};
-	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
 					   AIO_RING_MAGIC);
 
 	if (!IS_ERR(root))
diff --git a/fs/file_table.c b/fs/file_table.c
index 9b70ed2bbc4e..6b3723909342 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -184,6 +184,33 @@ struct file *alloc_file(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(alloc_file);
 
+struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
+				const char *name, int flags,
+				const struct file_operations *fops)
+{
+	static const struct dentry_operations anon_ops = {
+		.d_dname = simple_dname
+	};
+	struct qstr this = QSTR_INIT(name, strlen(name));
+	struct path path;
+	struct file *file;
+
+	path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
+	if (!path.dentry)
+		return ERR_PTR(-ENOMEM);
+	if (!mnt->mnt_sb->s_d_op)
+		d_set_d_op(path.dentry, &anon_ops);
+	path.mnt = mntget(mnt);
+	d_instantiate(path.dentry, inode);
+	file = alloc_file(&path, flags, fops);
+	if (IS_ERR(file)) {
+		ihold(inode);
+		path_put(&path);
+	}
+	return file;
+}
+EXPORT_SYMBOL(alloc_file_pseudo);
+
 /* the real guts of fput() - releasing the last reference to file
  */
 static void __fput(struct file *file)
diff --git a/include/linux/file.h b/include/linux/file.h
index aed45d69811e..5b25388f2f79 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -17,9 +17,12 @@ extern void fput(struct file *);
 struct file_operations;
 struct vfsmount;
 struct dentry;
+struct inode;
 struct path;
 extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
+extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
+	const char *, int flags, const struct file_operations *);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
diff --git a/net/socket.c b/net/socket.c
index 2cdbe8f71b7f..4cf3568caf9f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -391,33 +391,15 @@ static struct file_system_type sock_fs_type = {
 
 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 {
-	struct qstr name = { .name = "" };
-	struct path path;
 	struct file *file;
 
-	if (dname) {
-		name.name = dname;
-		name.len = strlen(name.name);
-	} else if (sock->sk) {
-		name.name = sock->sk->sk_prot_creator->name;
-		name.len = strlen(name.name);
-	}
-	path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
-	if (unlikely(!path.dentry)) {
-		sock_release(sock);
-		return ERR_PTR(-ENOMEM);
-	}
-	path.mnt = mntget(sock_mnt);
-
-	d_instantiate(path.dentry, SOCK_INODE(sock));
+	if (!dname)
+		dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
 
-	file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK),
-		  &socket_file_ops);
+	file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
+				O_RDWR | (flags & O_NONBLOCK),
+				&socket_file_ops);
 	if (IS_ERR(file)) {
-		/* drop dentry, keep inode for a bit */
-		ihold(d_inode(path.dentry));
-		path_put(&path);
-		/* ... and now kill it properly */
 		sock_release(sock);
 		return file;
 	}
-- 
cgit v1.2.3


From 183266f26f45a47958afb5c9aa1b3d4651e2eb8c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 17 Jun 2018 14:15:10 -0400
Subject: new helper: alloc_file_clone()

alloc_file_clone(old_file, mode, ops): create a new struct file with
->f_path equal to that of old_file.  pipe converted.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 11 +++++++++++
 fs/pipe.c            |  6 ++----
 include/linux/file.h |  2 ++
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 6b3723909342..78b067ddb386 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -211,6 +211,17 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(alloc_file_pseudo);
 
+struct file *alloc_file_clone(struct file *base, int flags,
+				const struct file_operations *fops)
+{
+	struct file *f = alloc_file(&base->f_path, flags, fops);
+	if (!IS_ERR(f)) {
+		path_get(&f->f_path);
+		f->f_mapping = base->f_mapping;
+	}
+	return f;
+}
+
 /* the real guts of fput() - releasing the last reference to file
  */
 static void __fput(struct file *file)
diff --git a/fs/pipe.c b/fs/pipe.c
index 9701bd3458d1..5f50070774bc 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -761,15 +761,13 @@ int create_pipe_files(struct file **res, int flags)
 
 	f->private_data = inode->i_pipe;
 
-	res[0] = alloc_file(&f->f_path, O_RDONLY | (flags & O_NONBLOCK),
-			&pipefifo_fops);
+	res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
+				  &pipefifo_fops);
 	if (IS_ERR(res[0])) {
 		put_pipe_info(inode, inode->i_pipe);
 		fput(f);
 		return PTR_ERR(res[0]);
 	}
-
-	path_get(&f->f_path);
 	res[0]->private_data = inode->i_pipe;
 	res[1] = f;
 	return 0;
diff --git a/include/linux/file.h b/include/linux/file.h
index 5b25388f2f79..60914843c737 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -23,6 +23,8 @@ extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
 extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
 	const char *, int flags, const struct file_operations *);
+extern struct file *alloc_file_clone(struct file *, int flags,
+	const struct file_operations *);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
-- 
cgit v1.2.3


From ee1904ba44bd4a242b453e8fe179b374906da173 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 17 Jun 2018 14:21:27 -0400
Subject: make alloc_file() static

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 3 +--
 include/linux/file.h | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 78b067ddb386..d6eccd04d703 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -157,7 +157,7 @@ over:
  * @flags: O_... flags with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
  */
-struct file *alloc_file(const struct path *path, int flags,
+static struct file *alloc_file(const struct path *path, int flags,
 		const struct file_operations *fop)
 {
 	struct file *file;
@@ -182,7 +182,6 @@ struct file *alloc_file(const struct path *path, int flags,
 		i_readcount_inc(path->dentry->d_inode);
 	return file;
 }
-EXPORT_SYMBOL(alloc_file);
 
 struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
 				const char *name, int flags,
diff --git a/include/linux/file.h b/include/linux/file.h
index 60914843c737..6b2fb032416c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -19,8 +19,6 @@ struct vfsmount;
 struct dentry;
 struct inode;
 struct path;
-extern struct file *alloc_file(const struct path *, int flags,
-	const struct file_operations *fop);
 extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
 	const char *, int flags, const struct file_operations *);
 extern struct file *alloc_file_clone(struct file *, int flags,
-- 
cgit v1.2.3


From 0238d2b4a4c7bb1b2268e1ef789629878e16ab02 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Thu, 12 Jul 2018 09:57:00 +0800
Subject: serial: 8250: introduce get_divisor() and set_divisor() hook

Add these two hooks so that they can be overridden with driver specific
implementations.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c |  4 ++++
 drivers/tty/serial/8250/8250_port.c | 27 +++++++++++++++++++++++----
 include/linux/serial_core.h         |  7 +++++++
 3 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 9342fc2ee7df..a0bb77290747 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1023,6 +1023,10 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 			uart->port.get_mctrl = up->port.get_mctrl;
 		if (up->port.set_mctrl)
 			uart->port.set_mctrl = up->port.set_mctrl;
+		if (up->port.get_divisor)
+			uart->port.get_divisor = up->port.get_divisor;
+		if (up->port.set_divisor)
+			uart->port.set_divisor = up->port.set_divisor;
 		if (up->port.startup)
 			uart->port.startup = up->port.startup;
 		if (up->port.shutdown)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 709fe6b4265c..ce0dc17f18ee 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2498,9 +2498,9 @@ static unsigned int npcm_get_divisor(struct uart_8250_port *up,
 	return DIV_ROUND_CLOSEST(port->uartclk, 16 * baud + 2) - 2;
 }
 
-static unsigned int serial8250_get_divisor(struct uart_port *port,
-					   unsigned int baud,
-					   unsigned int *frac)
+static unsigned int serial8250_do_get_divisor(struct uart_port *port,
+					      unsigned int baud,
+					      unsigned int *frac)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned int quot;
@@ -2532,6 +2532,16 @@ static unsigned int serial8250_get_divisor(struct uart_port *port,
 	return quot;
 }
 
+static unsigned int serial8250_get_divisor(struct uart_port *port,
+					   unsigned int baud,
+					   unsigned int *frac)
+{
+	if (port->get_divisor)
+		return port->get_divisor(port, baud, frac);
+
+	return serial8250_do_get_divisor(port, baud, frac);
+}
+
 static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
 					    tcflag_t c_cflag)
 {
@@ -2570,7 +2580,7 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
 	return cval;
 }
 
-static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
+static void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
 			    unsigned int quot, unsigned int quot_frac)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -2603,6 +2613,15 @@ static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
 	}
 }
 
+static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
+				   unsigned int quot, unsigned int quot_frac)
+{
+	if (port->set_divisor)
+		port->set_divisor(port, baud, quot, quot_frac);
+	else
+		serial8250_do_set_divisor(port, baud, quot, quot_frac);
+}
+
 static unsigned int serial8250_get_baud_rate(struct uart_port *port,
 					     struct ktermios *termios,
 					     struct ktermios *old)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 06ea4eeb09ab..406edae44ca3 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -127,6 +127,13 @@ struct uart_port {
 					     struct ktermios *);
 	unsigned int		(*get_mctrl)(struct uart_port *);
 	void			(*set_mctrl)(struct uart_port *, unsigned int);
+	unsigned int		(*get_divisor)(struct uart_port *,
+					       unsigned int baud,
+					       unsigned int *frac);
+	void			(*set_divisor)(struct uart_port *,
+					       unsigned int baud,
+					       unsigned int quot,
+					       unsigned int quot_frac);
 	int			(*startup)(struct uart_port *port);
 	void			(*shutdown)(struct uart_port *port);
 	void			(*throttle)(struct uart_port *port);
-- 
cgit v1.2.3


From 6226e5f31a786b58f33781852c897ac32a325ce6 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Thu, 12 Jul 2018 09:58:14 +0800
Subject: serial: 8250: export serial8250_do_set_divisor()

Some drivers could call serial8250_do_set_divisor() to complete its
own set_divisor routine. Export this symbol for code reusing.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 5 +++--
 include/linux/serial_8250.h         | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index ce0dc17f18ee..945f8dc2d50f 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2580,8 +2580,8 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
 	return cval;
 }
 
-static void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
-			    unsigned int quot, unsigned int quot_frac)
+void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
+			       unsigned int quot, unsigned int quot_frac)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
@@ -2612,6 +2612,7 @@ static void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
 		serial_port_out(port, 0x2, quot_frac);
 	}
 }
+EXPORT_SYMBOL_GPL(serial8250_do_set_divisor);
 
 static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
 				   unsigned int quot, unsigned int quot_frac)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 76b9db71e489..18e21427bce4 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -160,6 +160,9 @@ extern void serial8250_do_shutdown(struct uart_port *port);
 extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
 			     unsigned int oldstate);
 extern void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl);
+extern void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
+				      unsigned int quot,
+				      unsigned int quot_frac);
 extern int fsl8250_handle_irq(struct uart_port *port);
 int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
 unsigned char serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr);
-- 
cgit v1.2.3


From 2abc77af89e17582db9039293c8ac881c8c96d79 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Jul 2018 11:18:42 -0400
Subject: new helper: open_with_fake_path()

open a file by given inode, faking ->f_path.  Use with shitloads
of caution - at the very least you'd damn better make sure that
some dentry alias of that inode is pinned down by the path in
question.  Again, this is no general-purpose interface and I hope
it will eventually go away.  Right now overlayfs wants something
like that, but nothing else should.

Any out-of-tree code with bright idea of using this one *will*
eventually get hurt, with zero notice and great delight on my part.
I refuse to use EXPORT_SYMBOL_GPL(), especially in situations when
it's really EXPORT_SYMBOL_DONT_USE_IT(), but don't take that export
as "you are welcome to use it".

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c          | 18 ++++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index ee893240d199..dd15711eb658 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -925,6 +925,24 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
+struct file *open_with_fake_path(const struct path *path, int flags,
+				struct inode *inode, const struct cred *cred)
+{
+	struct file *f = alloc_empty_file(flags, cred);
+	if (!IS_ERR(f)) {
+		int error;
+
+		f->f_path = *path;
+		error = do_dentry_open(f, inode, NULL);
+		if (error) {
+			fput(f);
+			f = ERR_PTR(error);
+		}
+	}
+	return f;
+}
+EXPORT_SYMBOL(open_with_fake_path);
+
 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
 	int lookup_flags = 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05f34726e29c..4ff7b7012186 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2424,6 +2424,8 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
+extern struct file * open_with_fake_path(const struct path *, int,
+					 struct inode*, const struct cred *);
 static inline struct file *file_clone_open(struct file *file)
 {
 	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
-- 
cgit v1.2.3


From a8802d97e73346bc81609df9dfba7d3306f40d87 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 11 Jul 2018 11:16:41 -0700
Subject: ktime: Provide typesafe ktime_to_ns()

Using ktime_to_ns() is nice to help backports to stable kernels.

Having a typesafe function instead of a macro avoid stupid typos
and waste of time tracking these typos.

Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lkml.kernel.org/r/20180711181641.10369-1-edumazet@google.com
---
 include/linux/ktime.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 5b9fddbaac41..b2bb44f87f5a 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -93,8 +93,11 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
 /* Map the ktime_t to timeval conversion to ns_to_timeval function */
 #define ktime_to_timeval(kt)		ns_to_timeval((kt))
 
-/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
-#define ktime_to_ns(kt)			(kt)
+/* Convert ktime_t to nanoseconds */
+static inline s64 ktime_to_ns(const ktime_t kt)
+{
+	return kt;
+}
 
 /**
  * ktime_compare - Compares two ktime_t variables for less, greater or equal
-- 
cgit v1.2.3


From cca9bab1b72cd2296097c75f59ef11ef80461279 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jul 2018 12:16:12 +0200
Subject: tcp: use monotonic timestamps for PAWS

Using get_seconds() for timestamps is deprecated since it can lead
to overflows on 32-bit systems. While the interface generally doesn't
overflow until year 2106, the specific implementation of the TCP PAWS
algorithm breaks in 2038 when the intermediate signed 32-bit timestamps
overflow.

A related problem is that the local timestamps in CLOCK_REALTIME form
lead to unexpected behavior when settimeofday is called to set the system
clock backwards or forwards by more than 24 days.

While the first problem could be solved by using an overflow-safe method
of comparing the timestamps, a nicer solution is to use a monotonic
clocksource with ktime_get_seconds() that simply doesn't overflow (at
least not until 136 years after boot) and that doesn't change during
settimeofday().

To make 32-bit and 64-bit architectures behave the same way here, and
also save a few bytes in the tcp_options_received structure, I'm changing
the type to a 32-bit integer, which is now safe on all architectures.

Finally, the ts_recent_stamp field also (confusingly) gets used to store
a jiffies value in tcp_synq_overflow()/tcp_synq_no_recent_overflow().
This is currently safe, but changing the type to 32-bit requires
some small changes there to keep it working.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_cm.c |  2 +-
 include/linux/tcp.h                     |  4 ++--
 include/net/tcp.h                       | 17 ++++++++++-------
 net/ipv4/tcp_input.c                    |  2 +-
 net/ipv4/tcp_ipv4.c                     |  3 ++-
 net/ipv4/tcp_minisocks.c                |  8 ++++----
 6 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index 2bb6f0380758..0997e166ea57 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -1673,7 +1673,7 @@ static void chtls_timewait(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->rcv_nxt++;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 	tp->srtt_us = 0;
 	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
 }
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3dbea6610304..58a8d7d71354 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -89,7 +89,7 @@ struct tcp_sack_block {
 
 struct tcp_options_received {
 /*	PAWS/RTTM data	*/
-	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+	int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
 	u32	ts_recent;	/* Time stamp to echo next		*/
 	u32	rcv_tsval;	/* Time stamp value             	*/
 	u32	rcv_tsecr;	/* Time stamp echo reply        	*/
@@ -426,7 +426,7 @@ struct tcp_timewait_sock {
 	/* The time we sent the last out-of-window ACK: */
 	u32			  tw_last_oow_ack_time;
 
-	long			  tw_ts_recent_stamp;
+	int			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
 #endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f6cb20e6e524..582304955087 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -472,19 +472,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
  */
 static inline void tcp_synq_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
-	unsigned long now = jiffies;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	if (time_after(now, last_overflow + HZ))
+	if (time_after32(now, last_overflow + HZ))
 		tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
 }
 
 /* syncookies: no recent synqueue overflow on this listening socket? */
 static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+	return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
 }
 
 static inline u32 tcp_cookie_time(void)
@@ -1375,7 +1376,8 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
 {
 	if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
 		return true;
-	if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
+	if (unlikely(!time_before32(ktime_get_seconds(),
+				    rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
 		return true;
 	/*
 	 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1405,7 +1407,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 
 	   However, we can relax time bounds for RST segments to MSL.
 	 */
-	if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+	if (rst && !time_before32(ktime_get_seconds(),
+				  rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
 		return false;
 	return true;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 814ea43dd12f..d3b6390ecf23 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3462,7 +3462,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 static void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 }
 
 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bea17f1e8302..dc415c66a33a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -155,7 +155,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	   and use initial timestamp retrieved from peer table.
 	 */
 	if (tcptw->tw_ts_recent_stamp &&
-	    (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+	    (!twp || (reuse && time_after32(ktime_get_seconds(),
+					    tcptw->tw_ts_recent_stamp)))) {
 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 		if (tp->write_seq == 0)
 			tp->write_seq = 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dac5893a52b4..75ef332a7caf 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		tw->tw_substate	  = TCP_TIME_WAIT;
 		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (tmp_opt.saw_tstamp) {
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
@@ -189,7 +189,7 @@ kill:
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 		}
 
 		inet_twsk_put(tw);
@@ -537,7 +537,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 	if (newtp->rx_opt.tstamp_ok) {
 		newtp->rx_opt.ts_recent = req->ts_recent;
-		newtp->rx_opt.ts_recent_stamp = get_seconds();
+		newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 		newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 	} else {
 		newtp->rx_opt.ts_recent_stamp = 0;
@@ -603,7 +603,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+			tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
 			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 		}
 	}
-- 
cgit v1.2.3


From 3f3a89e1d7c31558c070692241e3d6146d2cf1bf Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 07:18:03 +0200
Subject: i2c: remove i2c_lock_adapter and use i2c_lock_bus directly

The i2c_lock_adapter name is ambiguous since it is unclear if it
refers to the root adapter or the adapter you name in the argument.
The natural interpretation is the adapter you name in the argument,
but there are historical reasons for that not being the case; it
in fact locks the root adapter. Just remove the function and force
users to spell out the I2C_LOCK_ROOT_ADAPTER name to indicate what
is really going on. Also remove i2c_unlock_adapter, of course.

This patch was generated with

git grep -l 'i2c_\(un\)\?lock_adapter' \
| xargs sed -i 's/i2c_\(un\)\?lock_adapter(\([^)]*\))/'\
'i2c_\1lock_bus(\2, I2C_LOCK_ROOT_ADAPTER)/g'

followed by white-space touch-up.

Signed-off-by: Peter Rosin <peda@axentia.se>
Acked-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-brcmstb.c   |  8 ++++----
 drivers/i2c/busses/i2c-davinci.c   |  4 ++--
 drivers/i2c/busses/i2c-gpio.c      | 40 +++++++++++++++++++-------------------
 drivers/i2c/busses/i2c-s3c2410.c   |  4 ++--
 drivers/i2c/busses/i2c-sprd.c      |  8 ++++----
 drivers/i2c/i2c-core-slave.c       |  8 ++++----
 drivers/iio/temperature/mlx90614.c |  4 ++--
 include/linux/i2c.h                | 12 ------------
 8 files changed, 38 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 78792b4d6437..826d32049996 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -689,9 +689,9 @@ static int brcmstb_i2c_suspend(struct device *dev)
 {
 	struct brcmstb_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
-	i2c_lock_adapter(&i2c_dev->adapter);
+	i2c_lock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 	i2c_dev->is_suspended = true;
-	i2c_unlock_adapter(&i2c_dev->adapter);
+	i2c_unlock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
@@ -700,10 +700,10 @@ static int brcmstb_i2c_resume(struct device *dev)
 {
 	struct brcmstb_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
-	i2c_lock_adapter(&i2c_dev->adapter);
+	i2c_lock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 	brcmstb_i2c_set_bsc_reg_defaults(i2c_dev);
 	i2c_dev->is_suspended = false;
-	i2c_unlock_adapter(&i2c_dev->adapter);
+	i2c_unlock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c
index 75d6ab177055..d945a2654c2f 100644
--- a/drivers/i2c/busses/i2c-davinci.c
+++ b/drivers/i2c/busses/i2c-davinci.c
@@ -714,14 +714,14 @@ static int i2c_davinci_cpufreq_transition(struct notifier_block *nb,
 
 	dev = container_of(nb, struct davinci_i2c_dev, freq_transition);
 
-	i2c_lock_adapter(&dev->adapter);
+	i2c_lock_bus(&dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 	if (val == CPUFREQ_PRECHANGE) {
 		davinci_i2c_reset_ctrl(dev, 0);
 	} else if (val == CPUFREQ_POSTCHANGE) {
 		i2c_davinci_calc_clk_dividers(dev);
 		davinci_i2c_reset_ctrl(dev, 1);
 	}
-	i2c_unlock_adapter(&dev->adapter);
+	i2c_unlock_bus(&dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index 66f85bbf3591..20b3dd756fc7 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c
@@ -78,24 +78,24 @@ static struct dentry *i2c_gpio_debug_dir;
 #define getscl(bd)	((bd)->getscl((bd)->data))
 
 #define WIRE_ATTRIBUTE(wire) \
-static int fops_##wire##_get(void *data, u64 *val)	\
-{							\
-	struct i2c_gpio_private_data *priv = data;	\
-							\
-	i2c_lock_adapter(&priv->adap);			\
-	*val = get##wire(&priv->bit_data);		\
-	i2c_unlock_adapter(&priv->adap);		\
-	return 0;					\
-}							\
-static int fops_##wire##_set(void *data, u64 val)	\
-{							\
-	struct i2c_gpio_private_data *priv = data;	\
-							\
-	i2c_lock_adapter(&priv->adap);			\
-	set##wire(&priv->bit_data, val);		\
-	i2c_unlock_adapter(&priv->adap);		\
-	return 0;					\
-}							\
+static int fops_##wire##_get(void *data, u64 *val)		\
+{								\
+	struct i2c_gpio_private_data *priv = data;		\
+								\
+	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	*val = get##wire(&priv->bit_data);			\
+	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	return 0;						\
+}								\
+static int fops_##wire##_set(void *data, u64 val)		\
+{								\
+	struct i2c_gpio_private_data *priv = data;		\
+								\
+	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	set##wire(&priv->bit_data, val);			\
+	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	return 0;						\
+}								\
 DEFINE_DEBUGFS_ATTRIBUTE(fops_##wire, fops_##wire##_get, fops_##wire##_set, "%llu\n")
 
 WIRE_ATTRIBUTE(scl);
@@ -113,7 +113,7 @@ static int fops_incomplete_transfer_set(void *data, u64 addr)
 	/* ADDR (7 bit) + RD (1 bit) + SDA hi (1 bit) */
 	pattern = (addr << 2) | 3;
 
-	i2c_lock_adapter(&priv->adap);
+	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	/* START condition */
 	setsda(bit_data, 0);
@@ -129,7 +129,7 @@ static int fops_incomplete_transfer_set(void *data, u64 addr)
 		udelay(bit_data->udelay);
 	}
 
-	i2c_unlock_adapter(&priv->adap);
+	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 9fe2b6951895..2f2e28d60ef5 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -919,9 +919,9 @@ static int s3c24xx_i2c_cpufreq_transition(struct notifier_block *nb,
 
 	if ((val == CPUFREQ_POSTCHANGE && delta_f < 0) ||
 	    (val == CPUFREQ_PRECHANGE && delta_f > 0)) {
-		i2c_lock_adapter(&i2c->adap);
+		i2c_lock_bus(&i2c->adap, I2C_LOCK_ROOT_ADAPTER);
 		ret = s3c24xx_i2c_clockrate(i2c, &got);
-		i2c_unlock_adapter(&i2c->adap);
+		i2c_unlock_bus(&i2c->adap, I2C_LOCK_ROOT_ADAPTER);
 
 		if (ret < 0)
 			dev_err(i2c->dev, "cannot find frequency (%d)\n", ret);
diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c
index 4053259bccb8..a94e724f51dc 100644
--- a/drivers/i2c/busses/i2c-sprd.c
+++ b/drivers/i2c/busses/i2c-sprd.c
@@ -590,9 +590,9 @@ static int __maybe_unused sprd_i2c_suspend_noirq(struct device *pdev)
 {
 	struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev);
 
-	i2c_lock_adapter(&i2c_dev->adap);
+	i2c_lock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 	i2c_dev->is_suspended = true;
-	i2c_unlock_adapter(&i2c_dev->adap);
+	i2c_unlock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	return pm_runtime_force_suspend(pdev);
 }
@@ -601,9 +601,9 @@ static int __maybe_unused sprd_i2c_resume_noirq(struct device *pdev)
 {
 	struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev);
 
-	i2c_lock_adapter(&i2c_dev->adap);
+	i2c_lock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 	i2c_dev->is_suspended = false;
-	i2c_unlock_adapter(&i2c_dev->adap);
+	i2c_unlock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	return pm_runtime_force_resume(pdev);
 }
diff --git a/drivers/i2c/i2c-core-slave.c b/drivers/i2c/i2c-core-slave.c
index 4a78c65e9971..47a9f70a24a9 100644
--- a/drivers/i2c/i2c-core-slave.c
+++ b/drivers/i2c/i2c-core-slave.c
@@ -47,9 +47,9 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 
 	client->slave_cb = slave_cb;
 
-	i2c_lock_adapter(client->adapter);
+	i2c_lock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 	ret = client->adapter->algo->reg_slave(client);
-	i2c_unlock_adapter(client->adapter);
+	i2c_unlock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	if (ret) {
 		client->slave_cb = NULL;
@@ -69,9 +69,9 @@ int i2c_slave_unregister(struct i2c_client *client)
 		return -EOPNOTSUPP;
 	}
 
-	i2c_lock_adapter(client->adapter);
+	i2c_lock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 	ret = client->adapter->algo->unreg_slave(client);
-	i2c_unlock_adapter(client->adapter);
+	i2c_unlock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	if (ret == 0)
 		client->slave_cb = NULL;
diff --git a/drivers/iio/temperature/mlx90614.c b/drivers/iio/temperature/mlx90614.c
index d619e8634a00..13a4cec64ea8 100644
--- a/drivers/iio/temperature/mlx90614.c
+++ b/drivers/iio/temperature/mlx90614.c
@@ -433,11 +433,11 @@ static int mlx90614_wakeup(struct mlx90614_data *data)
 
 	dev_dbg(&data->client->dev, "Requesting wake-up");
 
-	i2c_lock_adapter(data->client->adapter);
+	i2c_lock_bus(data->client->adapter, I2C_LOCK_ROOT_ADAPTER);
 	gpiod_direction_output(data->wakeup_gpio, 0);
 	msleep(MLX90614_TIMING_WAKEUP);
 	gpiod_direction_input(data->wakeup_gpio);
-	i2c_unlock_adapter(data->client->adapter);
+	i2c_unlock_bus(data->client->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	data->ready_timestamp = jiffies +
 			msecs_to_jiffies(MLX90614_TIMING_STARTUP);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..795e3a860afe 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -754,18 +754,6 @@ i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags)
 	adapter->lock_ops->unlock_bus(adapter, flags);
 }
 
-static inline void
-i2c_lock_adapter(struct i2c_adapter *adapter)
-{
-	i2c_lock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
-}
-
-static inline void
-i2c_unlock_adapter(struct i2c_adapter *adapter)
-{
-	i2c_unlock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
-}
-
 /*flags for the client struct: */
 #define I2C_CLIENT_PEC		0x04	/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
-- 
cgit v1.2.3


From 8b7008620b8452728cadead460a36f64ed78c460 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Wed, 11 Jul 2018 14:39:42 +0200
Subject: net: Don't copy pfmemalloc flag in __copy_skb_header()

The pfmemalloc flag indicates that the skb was allocated from
the PFMEMALLOC reserves, and the flag is currently copied on skb
copy and clone.

However, an skb copied from an skb flagged with pfmemalloc
wasn't necessarily allocated from PFMEMALLOC reserves, and on
the other hand an skb allocated that way might be copied from an
skb that wasn't.

So we should not copy the flag on skb copy, and rather decide
whether to allow an skb to be associated with sockets unrelated
to page reclaim depending only on how it was allocated.

Move the pfmemalloc flag before headers_start[0] using an
existing 1-bit hole, so that __copy_skb_header() doesn't copy
it.

When cloning, we'll now take care of this flag explicitly,
contravening to the warning comment of __skb_clone().

While at it, restore the newline usage introduced by commit
b19372273164 ("net: reorganize sk_buff for faster
__copy_skb_header()") to visually separate bytes used in
bitfields after headers_start[0], that was gone after commit
a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage
area"), and describe the pfmemalloc flag in the kernel-doc
structure comment.

This doesn't change the size of sk_buff or cacheline boundaries,
but consolidates the 15 bits hole before tc_index into a 2 bytes
hole before csum, that could now be filled more easily.

Reported-by: Patrick Talbert <ptalbert@redhat.com>
Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 +++++-----
 net/core/skbuff.c      |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 164cdedf6012..610a201126ee 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -630,6 +630,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
+ *	@pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -735,7 +736,7 @@ struct sk_buff {
 				peeked:1,
 				head_frag:1,
 				xmit_more:1,
-				__unused:1; /* one bit hole */
+				pfmemalloc:1;
 
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
@@ -754,31 +755,30 @@ struct sk_buff {
 
 	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3;
-	__u8			pfmemalloc:1;
 	__u8			ignore_df:1;
-
 	__u8			nf_trace:1;
 	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
+
 	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
-
 	__u8			no_fcs:1;
 	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
+
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
 	__u8			csum_not_inet:1;
-
 	__u8			dst_pending_confirm:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			ipvs_property:1;
+
 	__u8			inner_protocol_type:1;
 	__u8			remcsum_offload:1;
 #ifdef CONFIG_NET_SWITCHDEV
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eba8dae22c25..4df3164bb5fc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -858,6 +858,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->cloned = 1;
 	n->nohdr = 0;
 	n->peeked = 0;
+	if (skb->pfmemalloc)
+		n->pfmemalloc = 1;
 	n->destructor = NULL;
 	C(tail);
 	C(end);
-- 
cgit v1.2.3


From 3949fa9bac090ad217534c30bc3b6572289abf21 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 May 2018 15:29:10 -0700
Subject: rcu: Make rcu_read_unlock_special() static

Because rcu_read_unlock_special() is no longer used outside of
kernel/rcu/tree_plugin.h, this commit makes it static.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 1 -
 kernel/rcu/tree_plugin.h | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 65163aa0bb04..67ec077c7ee5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -64,7 +64,6 @@ void rcu_barrier_tasks(void);
 
 void __rcu_read_lock(void);
 void __rcu_read_unlock(void);
-void rcu_read_unlock_special(struct task_struct *t);
 void synchronize_rcu(void);
 
 /*
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 613372246a07..54a251640f53 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -127,6 +127,7 @@ static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
 
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
+static void rcu_read_unlock_special(struct task_struct *t);
 
 /*
  * Tell them what RCU they are running.
@@ -461,7 +462,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
-void rcu_read_unlock_special(struct task_struct *t)
+static void rcu_read_unlock_special(struct task_struct *t)
 {
 	bool empty_exp;
 	bool empty_norm;
-- 
cgit v1.2.3


From 07f27570dcd148a5f4de7dc3513c1d1cd069b362 Mon Sep 17 00:00:00 2001
From: Byungchul Park <byungchul.park@lge.com>
Date: Fri, 11 May 2018 17:30:34 +0900
Subject: rcu: Improve rcu_note_voluntary_context_switch() reporting

We expect a quiescent state of TASKS_RCU when cond_resched_tasks_rcu_qs()
is called, no matter whether it actually be scheduled or not. However,
it currently doesn't report the quiescent state when the task enters
into __schedule() as it's called with preempt = true. So make it report
the quiescent state unconditionally when cond_resched_tasks_rcu_qs() is
called.

And in TINY_RCU, even though the quiescent state of rcu_bh also should
be reported when the tick interrupt comes from user, it doesn't. So make
it reported.

Lastly in TREE_RCU, rcu_note_voluntary_context_switch() should be
reported when the tick interrupt comes from not only user but also idle,
as an extended quiescent state.

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Simplify rcutiny portion given no RCU-tasks for !PREEMPT. ]
---
 include/linux/rcupdate.h | 4 ++--
 kernel/rcu/tiny.c        | 4 +---
 kernel/rcu/tree.c        | 4 ++--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 67ec077c7ee5..5cab15e7ec83 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -194,8 +194,8 @@ static inline void exit_tasks_rcu_finish(void) { }
  */
 #define cond_resched_tasks_rcu_qs() \
 do { \
-	if (!cond_resched()) \
-		rcu_note_voluntary_context_switch_lite(current); \
+	rcu_note_voluntary_context_switch_lite(current); \
+	cond_resched(); \
 } while (0)
 
 /*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index a64eee0db39e..befc9321a89c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -122,10 +122,8 @@ void rcu_check_callbacks(int user)
 {
 	if (user)
 		rcu_sched_qs();
-	else if (!in_softirq())
+	if (user || !in_softirq())
 		rcu_bh_qs();
-	if (user)
-		rcu_note_voluntary_context_switch(current);
 }
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d3333ee2c6f5..19beabe73629 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2645,6 +2645,7 @@ void rcu_check_callbacks(int user)
 
 		rcu_sched_qs();
 		rcu_bh_qs();
+		rcu_note_voluntary_context_switch(current);
 
 	} else if (!in_softirq()) {
 
@@ -2660,8 +2661,7 @@ void rcu_check_callbacks(int user)
 	rcu_preempt_check_callbacks();
 	if (rcu_pending())
 		invoke_rcu_core();
-	if (user)
-		rcu_note_voluntary_context_switch(current);
+
 	trace_rcu_utilization(TPS("End scheduler-tick"));
 }
 
-- 
cgit v1.2.3


From 1445e9175bead409bb9930f3c745246a09f22cf6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 7 May 2018 06:35:46 -0300
Subject: rcu: rcupdate.h: Get rid of Sphinx warnings at rcu_pointer_handoff()

The code example at rcupdate.h currently produce lots of warnings:

	./include/linux/rcupdate.h:572: WARNING: Unexpected indentation.
	./include/linux/rcupdate.h:576: WARNING: Unexpected indentation.
	./include/linux/rcupdate.h:580: WARNING: Block quote ends without a blank line; unexpected unindent.
	./include/linux/rcupdate.h:582: WARNING: Block quote ends without a blank line; unexpected unindent.
	./include/linux/rcupdate.h:582: WARNING: Inline literal start-string without end-string.

This commit therefore changes it to a code-block.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5cab15e7ec83..dacc90358b33 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -566,8 +566,8 @@ static inline void rcu_preempt_sleep_check(void) { }
  * This is simply an identity function, but it documents where a pointer
  * is handed off from RCU to some other synchronization mechanism, for
  * example, reference counting or locking.  In C11, it would map to
- * kill_dependency().  It could be used as follows:
- * ``
+ * kill_dependency().  It could be used as follows::
+ *
  *	rcu_read_lock();
  *	p = rcu_dereference(gp);
  *	long_lived = is_long_lived(p);
@@ -578,7 +578,6 @@ static inline void rcu_preempt_sleep_check(void) { }
  *			p = rcu_pointer_handoff(p);
  *	}
  *	rcu_read_unlock();
- *``
  */
 #define rcu_pointer_handoff(p) (p)
 
-- 
cgit v1.2.3


From 6f56f714db067056c80f5d71510118f82872e34c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 14 May 2018 13:52:27 -0700
Subject: rcu: Improve RCU-tasks naming and comments

The naming and comments associated with some RCU-tasks code make
the faulty assumption that context switches due to cond_resched()
are voluntary.  As several people pointed out, this is not the case.
This commit therefore updates function names and comments to better
reflect current reality.

Reported-by: Byungchul Park <byungchul.park@lge.com>
Reported-by: Joel Fernandes <joel@joelfernandes.org>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 12 ++++++------
 include/linux/rcutiny.h  |  2 +-
 kernel/rcu/tree.c        |  2 +-
 kernel/rcu/update.c      | 27 ++++++++++++++-------------
 4 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dacc90358b33..75e5b393cf44 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -158,11 +158,11 @@ static inline void rcu_init_nohz(void) { }
 	} while (0)
 
 /*
- * Note a voluntary context switch for RCU-tasks benefit.  This is a
- * macro rather than an inline function to avoid #include hell.
+ * Note a quasi-voluntary context switch for RCU-tasks's benefit.
+ * This is a macro rather than an inline function to avoid #include hell.
  */
 #ifdef CONFIG_TASKS_RCU
-#define rcu_note_voluntary_context_switch_lite(t) \
+#define rcu_tasks_qs(t) \
 	do { \
 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
@@ -170,14 +170,14 @@ static inline void rcu_init_nohz(void) { }
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
 		rcu_all_qs(); \
-		rcu_note_voluntary_context_switch_lite(t); \
+		rcu_tasks_qs(t); \
 	} while (0)
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void exit_tasks_rcu_start(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU */
-#define rcu_note_voluntary_context_switch_lite(t)	do { } while (0)
+#define rcu_tasks_qs(t)	do { } while (0)
 #define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
 #define call_rcu_tasks call_rcu_sched
 #define synchronize_rcu_tasks synchronize_sched
@@ -194,7 +194,7 @@ static inline void exit_tasks_rcu_finish(void) { }
  */
 #define cond_resched_tasks_rcu_qs() \
 do { \
-	rcu_note_voluntary_context_switch_lite(current); \
+	rcu_tasks_qs(current); \
 	cond_resched(); \
 } while (0)
 
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7b3c82e8a625..8d9a0ea8f0b5 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -93,7 +93,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 #define rcu_note_context_switch(preempt) \
 	do { \
 		rcu_sched_qs(); \
-		rcu_note_voluntary_context_switch_lite(current); \
+		rcu_tasks_qs(current); \
 	} while (0)
 
 static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6f2922168216..ccc061acf887 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -457,7 +457,7 @@ void rcu_note_context_switch(bool preempt)
 		rcu_momentary_dyntick_idle();
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	if (!preempt)
-		rcu_note_voluntary_context_switch_lite(current);
+		rcu_tasks_qs(current);
 out:
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4c230a60ece4..5783bdf86e5a 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -507,14 +507,15 @@ early_initcall(check_cpu_stall_init);
 #ifdef CONFIG_TASKS_RCU
 
 /*
- * Simple variant of RCU whose quiescent states are voluntary context switch,
- * user-space execution, and idle.  As such, grace periods can take one good
- * long time.  There are no read-side primitives similar to rcu_read_lock()
- * and rcu_read_unlock() because this implementation is intended to get
- * the system into a safe state for some of the manipulations involved in
- * tracing and the like.  Finally, this implementation does not support
- * high call_rcu_tasks() rates from multiple CPUs.  If this is required,
- * per-CPU callback lists will be needed.
+ * Simple variant of RCU whose quiescent states are voluntary context
+ * switch, cond_resched_rcu_qs(), user-space execution, and idle.
+ * As such, grace periods can take one good long time.  There are no
+ * read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
+ * because this implementation is intended to get the system into a safe
+ * state for some of the manipulations involved in tracing and the like.
+ * Finally, this implementation does not support high call_rcu_tasks()
+ * rates from multiple CPUs.  If this is required, per-CPU callback lists
+ * will be needed.
  */
 
 /* Global list of callbacks and associated lock. */
@@ -542,11 +543,11 @@ static struct task_struct *rcu_tasks_kthread_ptr;
  * period elapses, in other words after all currently executing RCU
  * read-side critical sections have completed. call_rcu_tasks() assumes
  * that the read-side critical sections end at a voluntary context
- * switch (not a preemption!), entry into idle, or transition to usermode
- * execution.  As such, there are no read-side primitives analogous to
- * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
- * to determine that all tasks have passed through a safe state, not so
- * much for data-strcuture synchronization.
+ * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
+ * or transition to usermode execution.  As such, there are no read-side
+ * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
+ * this primitive is intended to determine that all tasks have passed
+ * through a safe state, not so much for data-strcuture synchronization.
  *
  * See the description of call_rcu() for more detailed information on
  * memory ordering guarantees.
-- 
cgit v1.2.3


From b7b6f94cf6e6d961f78064315b6f5de5d9c6414b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 14:22:40 +1000
Subject: rculist: Improve documentation for list_for_each_entry_from_rcu()

Unfortunately the patch for adding list_for_each_entry_from_rcu()
wasn't the final patch after all review.  It is functionally
correct but the documentation was incomplete.

This patch adds this missing documentation which includes an update to
the documentation for list_for_each_entry_continue_rcu() to match the
documentation for the new list_for_each_entry_from_rcu(), and adds
list_for_each_entry_from_rcu() and the already existing
hlist_for_each_entry_from_rcu() to section 7 of whatisRCU.txt.

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/whatisRCU.txt |  2 ++
 include/linux/rculist.h         | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 94288f1b8759..c2a7facf7ff9 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -820,11 +820,13 @@ RCU list traversal:
 	list_next_rcu
 	list_for_each_entry_rcu
 	list_for_each_entry_continue_rcu
+	list_for_each_entry_from_rcu
 	hlist_first_rcu
 	hlist_next_rcu
 	hlist_pprev_rcu
 	hlist_for_each_entry_rcu
 	hlist_for_each_entry_rcu_bh
+	hlist_for_each_entry_from_rcu
 	hlist_for_each_entry_continue_rcu
 	hlist_for_each_entry_continue_rcu_bh
 	hlist_nulls_first_rcu
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 36df6ccbc874..4786c2235b98 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -396,7 +396,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @member:	the name of the list_head within the struct.
  *
  * Continue to iterate over list of given type, continuing after
- * the current position.
+ * the current position which must have been in the list when the RCU read
+ * lock was taken.
+ * This would typically require either that you obtained the node from a
+ * previous walk of the list in the same RCU read-side critical section, or
+ * that you held some sort of non-RCU reference (such as a reference count)
+ * to keep the node alive *and* in the list.
+ *
+ * This iterator is similar to list_for_each_entry_from_rcu() except
+ * this starts after the given position and that one starts at the given
+ * position.
  */
 #define list_for_each_entry_continue_rcu(pos, head, member) 		\
 	for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
@@ -411,6 +420,14 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  *
  * Iterate over the tail of a list starting from a given position,
  * which must have been in the list when the RCU read lock was taken.
+ * This would typically require either that you obtained the node from a
+ * previous walk of the list in the same RCU read-side critical section, or
+ * that you held some sort of non-RCU reference (such as a reference count)
+ * to keep the node alive *and* in the list.
+ *
+ * This iterator is similar to list_for_each_entry_continue_rcu() except
+ * this starts from the given position and that one starts from the position
+ * after the given position.
  */
 #define list_for_each_entry_from_rcu(pos, head, member)			\
 	for (; &(pos)->member != (head);					\
-- 
cgit v1.2.3


From 3025520ec424df8b0fd5cdc319ad6b83406d9954 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 22 May 2018 11:38:47 -0700
Subject: rcutorture: Use per-CPU random state for rcu_torture_timer()

Currently, the rcu_torture_timer() function uses a single global
torture_random_state structure protected by a single global lock.
This conflicts to some extent with performance and scalability,
but even more with the goal of consolidating read-side testing
with rcu_torture_reader().  This commit therefore creates a per-CPU
torture_random_state structure for use by rcu_torture_timer() and
eliminates the lock.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Make rcu_torture_timer_rand static, per 0day Test Robot report. ]
---
 include/linux/torture.h |  2 ++
 kernel/rcu/rcutorture.c | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index a55e80817dae..61dfd93b6ee4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -64,6 +64,8 @@ struct torture_random_state {
 	long trs_count;
 };
 #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 }
+#define DEFINE_TORTURE_RANDOM_PERCPU(name) \
+	DEFINE_PER_CPU(struct torture_random_state, name)
 unsigned long torture_random(struct torture_random_state *trsp);
 
 /* Task shuffler, which causes CPUs to occasionally go idle. */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 2452e4a29923..d5a5465d2507 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1143,6 +1143,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
 	return true;
 }
 
+static DEFINE_TORTURE_RANDOM_PERCPU(rcu_torture_timer_rand);
+
 /*
  * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
  * incrementing the corresponding element of the pipeline array.  The
@@ -1154,12 +1156,12 @@ static void rcu_torture_timer(struct timer_list *unused)
 	int idx;
 	unsigned long started;
 	unsigned long completed;
-	static DEFINE_TORTURE_RANDOM(rand);
-	static DEFINE_SPINLOCK(rand_lock);
 	struct rcu_torture *p;
 	int pipe_count;
+	struct torture_random_state *trsp;
 	unsigned long long ts;
 
+	trsp = this_cpu_ptr(&rcu_torture_timer_rand);
 	atomic_long_inc(&n_rcu_torture_timers);
 	idx = cur_ops->readlock();
 	started = cur_ops->get_gp_seq();
@@ -1176,9 +1178,7 @@ static void rcu_torture_timer(struct timer_list *unused)
 	}
 	if (p->rtort_mbtest == 0)
 		atomic_inc(&n_rcu_torture_mberror);
-	spin_lock(&rand_lock);
-	cur_ops->read_delay(&rand);
-	spin_unlock(&rand_lock);
+	cur_ops->read_delay(trsp);
 	preempt_disable();
 	pipe_count = p->rtort_pipe_count;
 	if (pipe_count > RCU_TORTURE_PIPE_LEN) {
-- 
cgit v1.2.3


From 523f9eb1ef25aab4aaf9aeb5356160e8039411ef Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:15 +0300
Subject: net/mlx4_core: Add health buffer address capability

Health buffer address is a 32 bit PCI address offset provided by
the FW. This offset is used for reading FW health debug data
located on the shared CR space. Cr space is accessible in both
driver and FW and allows for different queries and configurations.
Health buffer size is always 64B of readable data followed by a
lock which is used to block volatile CR space access.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 5 ++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h   | 1 +
 drivers/net/ethernet/mellanox/mlx4/main.c | 1 +
 include/linux/mlx4/device.h               | 1 +
 4 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 46dcbfbe4c5e..babcfd9c0571 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -825,7 +825,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET	0xcc
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET	0xd0
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET	0xd2
-
+#define QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET	0xe4
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -1082,6 +1082,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->rl_caps.min_unit = size >> 14;
 	}
 
+	MLX4_GET(dev_cap->health_buffer_addrs, outbox,
+		 QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET);
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index cd6399c76bfd..650ae08c71de 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -128,6 +128,7 @@ struct mlx4_dev_cap {
 	u32 dmfs_high_rate_qpn_base;
 	u32 dmfs_high_rate_qpn_range;
 	struct mlx4_rate_limit_caps rl_caps;
+	u32 health_buffer_addrs;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
 	bool wol_port[MLX4_MAX_PORTS + 1];
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index c42eddfcd2b5..806d441b3701 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -523,6 +523,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
 	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
+	dev->caps.health_buffer_addrs  = dev_cap->health_buffer_addrs;
 
 	/* Save uar page shift */
 	if (!mlx4_is_slave(dev)) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 122e7e9d3091..e3bfe76aea98 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -630,6 +630,7 @@ struct mlx4_caps {
 	u32			vf_caps;
 	bool			wol_port[MLX4_MAX_PORTS + 1];
 	struct mlx4_rate_limit_caps rl_caps;
+	u32			health_buffer_addrs;
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3


From bedc989b0c98285b277ff8a08ff9514e580913f4 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:16 +0300
Subject: net/mlx4_core: Add Crdump FW snapshot support

Crdump allows the driver to create a snapshot of the FW PCI
crspace and health buffer during a critical FW issue.
In case of a FW command timeout, FW getting stuck or a non zero
value on the catastrophic buffer, a snapshot will be taken.

The snapshot is exposed using devlink, cr-space, fw-health
address regions are registered on init and snapshots are attached
once a new snapshot is collected by the driver.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/Makefile |   2 +-
 drivers/net/ethernet/mellanox/mlx4/catas.c  |   6 +-
 drivers/net/ethernet/mellanox/mlx4/crdump.c | 231 ++++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/main.c   |  10 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h   |   4 +
 include/linux/mlx4/device.h                 |   6 +
 6 files changed, 255 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx4/crdump.c

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile
index 16b10d01fcf4..3f400770fcd8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx4/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX4_CORE)		+= mlx4_core.o
 
 mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
 		main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
-		srq.o resource_tracker.o
+		srq.o resource_tracker.o crdump.o
 
 obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 8afe4b5fb09b..c81d15bf259c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -178,10 +178,12 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 
 	dev = persist->dev;
 	mlx4_err(dev, "device is going to be reset\n");
-	if (mlx4_is_slave(dev))
+	if (mlx4_is_slave(dev)) {
 		err = mlx4_reset_slave(dev);
-	else
+	} else {
+		mlx4_crdump_collect(dev);
 		err = mlx4_reset_master(dev);
+	}
 
 	if (!err) {
 		mlx4_err(dev, "device was reset successfully\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
new file mode 100644
index 000000000000..4d5524dffec4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4.h"
+
+#define BAD_ACCESS			0xBADACCE5
+#define HEALTH_BUFFER_SIZE		0x40
+#define CR_ENABLE_BIT			swab32(BIT(6))
+#define CR_ENABLE_BIT_OFFSET		0xF3F04
+#define MAX_NUM_OF_DUMPS_TO_STORE	(8)
+
+static const char *region_cr_space_str = "cr-space";
+static const char *region_fw_health_str = "fw-health";
+
+/* Set to true in case cr enable bit was set to true before crdump */
+static bool crdump_enbale_bit_set;
+
+static void crdump_enable_crspace_access(struct mlx4_dev *dev,
+					 u8 __iomem *cr_space)
+{
+	/* Get current enable bit value */
+	crdump_enbale_bit_set =
+		readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT;
+
+	/* Enable FW CR filter (set bit6 to 0) */
+	if (crdump_enbale_bit_set)
+		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT,
+		       cr_space + CR_ENABLE_BIT_OFFSET);
+
+	/* Enable block volatile crspace accesses */
+	writel(swab32(1), cr_space + dev->caps.health_buffer_addrs +
+	       HEALTH_BUFFER_SIZE);
+}
+
+static void crdump_disable_crspace_access(struct mlx4_dev *dev,
+					  u8 __iomem *cr_space)
+{
+	/* Disable block volatile crspace accesses */
+	writel(0, cr_space + dev->caps.health_buffer_addrs +
+	       HEALTH_BUFFER_SIZE);
+
+	/* Restore FW CR filter value (set bit6 to original value) */
+	if (crdump_enbale_bit_set)
+		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT,
+		       cr_space + CR_ENABLE_BIT_OFFSET);
+}
+
+static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
+					u8 __iomem *cr_space,
+					u32 id)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	struct pci_dev *pdev = dev->persist->pdev;
+	unsigned long cr_res_size;
+	u8 *crspace_data;
+	int offset;
+	int err;
+
+	if (!crdump->region_crspace) {
+		mlx4_err(dev, "crdump: cr-space region is NULL\n");
+		return;
+	}
+
+	/* Try to collect CR space */
+	cr_res_size = pci_resource_len(pdev, 0);
+	crspace_data = kvmalloc(cr_res_size, GFP_KERNEL);
+	if (crspace_data) {
+		for (offset = 0; offset < cr_res_size; offset += 4)
+			*(u32 *)(crspace_data + offset) =
+					readl(cr_space + offset);
+
+		err = devlink_region_snapshot_create(crdump->region_crspace,
+						     cr_res_size, crspace_data,
+						     id, &kvfree);
+		if (err) {
+			kvfree(crspace_data);
+			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+				  region_cr_space_str, id, err);
+		} else {
+			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+				  id, region_cr_space_str);
+		}
+	} else {
+		mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n");
+	}
+}
+
+static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
+					  u8 __iomem *cr_space,
+					  u32 id)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	u8 *health_data;
+	int offset;
+	int err;
+
+	if (!crdump->region_fw_health) {
+		mlx4_err(dev, "crdump: fw-health region is NULL\n");
+		return;
+	}
+
+	/* Try to collect health buffer */
+	health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL);
+	if (health_data) {
+		u8 __iomem *health_buf_start =
+				cr_space + dev->caps.health_buffer_addrs;
+
+		for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4)
+			*(u32 *)(health_data + offset) =
+					readl(health_buf_start + offset);
+
+		err = devlink_region_snapshot_create(crdump->region_fw_health,
+						     HEALTH_BUFFER_SIZE,
+						     health_data,
+						     id, &kvfree);
+		if (err) {
+			kvfree(health_data);
+			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+				  region_fw_health_str, id, err);
+		} else {
+			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+				  id, region_fw_health_str);
+		}
+	} else {
+		mlx4_err(dev, "crdump: Failed to allocate health buffer\n");
+	}
+}
+
+int mlx4_crdump_collect(struct mlx4_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct pci_dev *pdev = dev->persist->pdev;
+	unsigned long cr_res_size;
+	u8 __iomem *cr_space;
+	u32 id;
+
+	if (!dev->caps.health_buffer_addrs) {
+		mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
+		return 0;
+	}
+
+	cr_res_size = pci_resource_len(pdev, 0);
+
+	cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
+	if (!cr_space) {
+		mlx4_err(dev, "crdump: Failed to map pci cr region\n");
+		return -ENODEV;
+	}
+
+	crdump_enable_crspace_access(dev, cr_space);
+
+	/* Get the available snapshot ID for the dumps */
+	id = devlink_region_shapshot_id_get(devlink);
+
+	/* Try to capture dumps */
+	mlx4_crdump_collect_crspace(dev, cr_space, id);
+	mlx4_crdump_collect_fw_health(dev, cr_space, id);
+
+	crdump_disable_crspace_access(dev, cr_space);
+
+	iounmap(cr_space);
+	return 0;
+}
+
+int mlx4_crdump_init(struct mlx4_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	struct pci_dev *pdev = dev->persist->pdev;
+
+	/* Create cr-space region */
+	crdump->region_crspace =
+		devlink_region_create(devlink,
+				      region_cr_space_str,
+				      MAX_NUM_OF_DUMPS_TO_STORE,
+				      pci_resource_len(pdev, 0));
+	if (IS_ERR(crdump->region_crspace))
+		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+			  region_cr_space_str,
+			  PTR_ERR(crdump->region_crspace));
+
+	/* Create fw-health region */
+	crdump->region_fw_health =
+		devlink_region_create(devlink,
+				      region_fw_health_str,
+				      MAX_NUM_OF_DUMPS_TO_STORE,
+				      HEALTH_BUFFER_SIZE);
+	if (IS_ERR(crdump->region_fw_health))
+		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+			  region_fw_health_str,
+			  PTR_ERR(crdump->region_fw_health));
+
+	return 0;
+}
+
+void mlx4_crdump_end(struct mlx4_dev *dev)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+
+	devlink_region_destroy(crdump->region_fw_health);
+	devlink_region_destroy(crdump->region_crspace);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 806d441b3701..46b021409b8b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3807,10 +3807,14 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 	}
 
-	err = mlx4_catas_init(&priv->dev);
+	err = mlx4_crdump_init(&priv->dev);
 	if (err)
 		goto err_release_regions;
 
+	err = mlx4_catas_init(&priv->dev);
+	if (err)
+		goto err_crdump;
+
 	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
 	if (err)
 		goto err_catas;
@@ -3820,6 +3824,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 err_catas:
 	mlx4_catas_end(&priv->dev);
 
+err_crdump:
+	mlx4_crdump_end(&priv->dev);
+
 err_release_regions:
 	pci_release_regions(pdev);
 
@@ -4081,6 +4088,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	else
 		mlx4_info(dev, "%s: interface is down\n", __func__);
 	mlx4_catas_end(dev);
+	mlx4_crdump_end(dev);
 	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
 		mlx4_warn(dev, "Disabling SR-IOV\n");
 		pci_disable_sriov(pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 2ebaa3bee42f..6e016092a6f1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1042,6 +1042,8 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 int mlx4_catas_init(struct mlx4_dev *dev);
 void mlx4_catas_end(struct mlx4_dev *dev);
+int mlx4_crdump_init(struct mlx4_dev *dev);
+void mlx4_crdump_end(struct mlx4_dev *dev);
 int mlx4_restart_one(struct pci_dev *pdev, bool reload,
 		     struct devlink *devlink);
 int mlx4_register_device(struct mlx4_dev *dev);
@@ -1228,6 +1230,8 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
 int mlx4_comm_internal_err(u32 slave_read);
 
+int mlx4_crdump_collect(struct mlx4_dev *dev);
+
 int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
 		    enum mlx4_port_type *type);
 void mlx4_do_sense_ports(struct mlx4_dev *dev,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e3bfe76aea98..300b944e6e1e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -852,6 +852,11 @@ struct mlx4_vf_dev {
 	u8			n_ports;
 };
 
+struct mlx4_fw_crdump {
+	struct devlink_region *region_crspace;
+	struct devlink_region *region_fw_health;
+};
+
 enum mlx4_pci_status {
 	MLX4_PCI_STATUS_DISABLED,
 	MLX4_PCI_STATUS_ENABLED,
@@ -872,6 +877,7 @@ struct mlx4_dev_persistent {
 	u8	interface_state;
 	struct mutex		pci_status_mutex; /* sync pci state */
 	enum mlx4_pci_status	pci_status;
+	struct mlx4_fw_crdump	crdump;
 };
 
 struct mlx4_dev {
-- 
cgit v1.2.3


From 3c641ba4a852cf4e90e3d7f29c5df08e24213c5d Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:18 +0300
Subject: net/mlx4_core: Use devlink region_snapshot parameter

This parameter enables capturing region snapshot of the crspace
during critical errors. The default value of this parameter is
disabled, it can be enabled using devlink param commands.
It is possible to configure during runtime and also driver init.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/crdump.c |  8 ++++++
 drivers/net/ethernet/mellanox/mlx4/main.c   | 41 +++++++++++++++++++++++++++++
 include/linux/mlx4/device.h                 |  1 +
 3 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
index 4d5524dffec4..88316c743820 100644
--- a/drivers/net/ethernet/mellanox/mlx4/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -158,6 +158,7 @@ static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
 int mlx4_crdump_collect(struct mlx4_dev *dev)
 {
 	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
 	struct pci_dev *pdev = dev->persist->pdev;
 	unsigned long cr_res_size;
 	u8 __iomem *cr_space;
@@ -168,6 +169,11 @@ int mlx4_crdump_collect(struct mlx4_dev *dev)
 		return 0;
 	}
 
+	if (!crdump->snapshot_enable) {
+		mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n");
+		return 0;
+	}
+
 	cr_res_size = pci_resource_len(pdev, 0);
 
 	cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
@@ -197,6 +203,8 @@ int mlx4_crdump_init(struct mlx4_dev *dev)
 	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
 	struct pci_dev *pdev = dev->persist->pdev;
 
+	crdump->snapshot_enable = false;
+
 	/* Create cr-space region */
 	crdump->region_crspace =
 		devlink_region_create(devlink,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 46b021409b8b..2d979a652b7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -191,6 +191,26 @@ static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
 	return 0;
 }
 
+static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
+					    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+
+	ctx->val.vbool = dev->persist->crdump.snapshot_enable;
+	return 0;
+}
+
+static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
+					    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+
+	dev->persist->crdump.snapshot_enable = ctx->val.vbool;
+	return 0;
+}
+
 static int
 mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
 			       union devlink_param_value val,
@@ -224,6 +244,11 @@ static const struct devlink_param mlx4_devlink_params[] = {
 	DEVLINK_PARAM_GENERIC(MAX_MACS,
 			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 			      NULL, NULL, mlx4_devlink_max_macs_validate),
+	DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
+			      BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
+			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+			      mlx4_devlink_crdump_snapshot_get,
+			      mlx4_devlink_crdump_snapshot_set, NULL),
 	DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
 			     "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
 			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
@@ -270,6 +295,11 @@ static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
 	mlx4_devlink_set_init_value(devlink,
 				    MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
 				    value);
+
+	value.vbool = false;
+	mlx4_devlink_set_init_value(devlink,
+				    DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+				    value);
 }
 
 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
@@ -3862,6 +3892,9 @@ static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
 
 static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
 {
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
 	union devlink_param_value saved_value;
 	int err;
 
@@ -3889,6 +3922,14 @@ static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
 						 &saved_value);
 	if (!err)
 		enable_4k_uar = saved_value.vbool;
+	err = devlink_param_driverinit_value_get(devlink,
+						 DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+						 &saved_value);
+	if (!err && crdump->snapshot_enable != saved_value.vbool) {
+		crdump->snapshot_enable = saved_value.vbool;
+		devlink_param_value_changed(devlink,
+					    DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT);
+	}
 }
 
 static int mlx4_devlink_reload(struct devlink *devlink,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 300b944e6e1e..dca6ab4eaa99 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -853,6 +853,7 @@ struct mlx4_vf_dev {
 };
 
 struct mlx4_fw_crdump {
+	bool snapshot_enable;
 	struct devlink_region *region_crspace;
 	struct devlink_region *region_fw_health;
 };
-- 
cgit v1.2.3


From 57e94c8b974db2d83c60e1139c89a70806abbea0 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 4 Jul 2018 17:08:18 +0200
Subject: mfd: cros-ec: Increase maximum mkbp event size

Having a 16 byte mkbp event size makes it possible to send CEC
messages from the EC to the AP directly inside the mkbp event
instead of first doing a notification and then a read.

Signed-off-by: Stefan Adolfsson <sadolfsson@chromium.org>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/platform/chrome/cros_ec_proto.c | 40 +++++++++++++++++++++++++--------
 include/linux/mfd/cros_ec.h             |  2 +-
 include/linux/mfd/cros_ec_commands.h    | 16 +++++++++++++
 3 files changed, 48 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 8350ca2311c7..398393ab5df8 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -506,10 +506,31 @@ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_cmd_xfer_status);
 
+static int get_next_event_xfer(struct cros_ec_device *ec_dev,
+			       struct cros_ec_command *msg,
+			       int version, uint32_t size)
+{
+	int ret;
+
+	msg->version = version;
+	msg->command = EC_CMD_GET_NEXT_EVENT;
+	msg->insize = size;
+	msg->outsize = 0;
+
+	ret = cros_ec_cmd_xfer(ec_dev, msg);
+	if (ret > 0) {
+		ec_dev->event_size = ret - 1;
+		memcpy(&ec_dev->event_data, msg->data, ec_dev->event_size);
+	}
+
+	return ret;
+}
+
 static int get_next_event(struct cros_ec_device *ec_dev)
 {
 	u8 buffer[sizeof(struct cros_ec_command) + sizeof(ec_dev->event_data)];
 	struct cros_ec_command *msg = (struct cros_ec_command *)&buffer;
+	static int cmd_version = 1;
 	int ret;
 
 	if (ec_dev->suspended) {
@@ -517,18 +538,19 @@ static int get_next_event(struct cros_ec_device *ec_dev)
 		return -EHOSTDOWN;
 	}
 
-	msg->version = 0;
-	msg->command = EC_CMD_GET_NEXT_EVENT;
-	msg->insize = sizeof(ec_dev->event_data);
-	msg->outsize = 0;
+	if (cmd_version == 1) {
+		ret = get_next_event_xfer(ec_dev, msg, cmd_version,
+				sizeof(struct ec_response_get_next_event_v1));
+		if (ret < 0 || msg->result != EC_RES_INVALID_VERSION)
+			return ret;
 
-	ret = cros_ec_cmd_xfer(ec_dev, msg);
-	if (ret > 0) {
-		ec_dev->event_size = ret - 1;
-		memcpy(&ec_dev->event_data, msg->data,
-		       sizeof(ec_dev->event_data));
+		/* Fallback to version 0 for future send attempts */
+		cmd_version = 0;
 	}
 
+	ret = get_next_event_xfer(ec_dev, msg, cmd_version,
+				  sizeof(struct ec_response_get_next_event));
+
 	return ret;
 }
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 32421dfeb996..20949dde35cd 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -147,7 +147,7 @@ struct cros_ec_device {
 	bool mkbp_event_supported;
 	struct blocking_notifier_head event_notifier;
 
-	struct ec_response_get_next_event event_data;
+	struct ec_response_get_next_event_v1 event_data;
 	int event_size;
 	u32 host_event_wake_mask;
 };
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index f2edd9969b40..c4f0caa642be 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2093,12 +2093,28 @@ union ec_response_get_next_data {
 	uint32_t   sysrq;
 } __packed;
 
+union ec_response_get_next_data_v1 {
+	uint8_t key_matrix[16];
+	uint32_t host_event;
+	uint32_t buttons;
+	uint32_t switches;
+	uint32_t sysrq;
+	uint32_t cec_events;
+	uint8_t cec_message[16];
+} __packed;
+
 struct ec_response_get_next_event {
 	uint8_t event_type;
 	/* Followed by event data if any */
 	union ec_response_get_next_data data;
 } __packed;
 
+struct ec_response_get_next_event_v1 {
+	uint8_t event_type;
+	/* Followed by event data if any */
+	union ec_response_get_next_data_v1 data;
+} __packed;
+
 /* Bit indices for buttons and switches.*/
 /* Buttons */
 #define EC_MKBP_POWER_BUTTON	0
-- 
cgit v1.2.3


From f47674e5263dfe0f89a640fc8b7dc54f9b79d5db Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 4 Jul 2018 17:08:19 +0200
Subject: mfd: cros-ec: Introduce CEC commands and events definitions.

The EC can expose a CEC bus, this patch adds the CEC related definitions
needed by the cros-ec-cec driver.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 81 ++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index c4f0caa642be..5079bc2d4378 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -804,6 +804,8 @@ enum ec_feature_code {
 	EC_FEATURE_MOTION_SENSE_FIFO = 24,
 	/* EC has RTC feature that can be controlled by host commands */
 	EC_FEATURE_RTC = 27,
+	/* EC supports CEC commands */
+	EC_FEATURE_CEC = 35,
 };
 
 #define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32))
@@ -2078,6 +2080,12 @@ enum ec_mkbp_event {
 	/* EC sent a sysrq command */
 	EC_MKBP_EVENT_SYSRQ = 6,
 
+	/* Notify the AP that something happened on CEC */
+	EC_MKBP_EVENT_CEC_EVENT = 8,
+
+	/* Send an incoming CEC message to the AP */
+	EC_MKBP_EVENT_CEC_MESSAGE = 9,
+
 	/* Number of MKBP events */
 	EC_MKBP_EVENT_COUNT,
 };
@@ -2845,6 +2853,79 @@ struct ec_params_reboot_ec {
 #define EC_ACPI_MEM_VERSION_CURRENT 1
 
 
+/*****************************************************************************/
+/*
+ * HDMI CEC commands
+ *
+ * These commands are for sending and receiving message via HDMI CEC
+ */
+#define EC_MAX_CEC_MSG_LEN 16
+
+/* CEC message from the AP to be written on the CEC bus */
+#define EC_CMD_CEC_WRITE_MSG 0x00B8
+
+/**
+ * struct ec_params_cec_write - Message to write to the CEC bus
+ * @msg: message content to write to the CEC bus
+ */
+struct ec_params_cec_write {
+	uint8_t msg[EC_MAX_CEC_MSG_LEN];
+} __packed;
+
+/* Set various CEC parameters */
+#define EC_CMD_CEC_SET 0x00BA
+
+/**
+ * struct ec_params_cec_set - CEC parameters set
+ * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC
+ *	or 1 to enable CEC functionality, in case cmd is CEC_CMD_LOGICAL_ADDRESS,
+ *	this field encodes the requested logical address between 0 and 15
+ *	or 0xff to unregister
+ */
+struct ec_params_cec_set {
+	uint8_t cmd; /* enum cec_command */
+	uint8_t val;
+} __packed;
+
+/* Read various CEC parameters */
+#define EC_CMD_CEC_GET 0x00BB
+
+/**
+ * struct ec_params_cec_get - CEC parameters get
+ * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ */
+struct ec_params_cec_get {
+	uint8_t cmd; /* enum cec_command */
+} __packed;
+
+/**
+ * struct ec_response_cec_get - CEC parameters get response
+ * @val: in case cmd was CEC_CMD_ENABLE, this field will 0 if CEC is
+ *	disabled or 1 if CEC functionality is enabled,
+ *	in case cmd was CEC_CMD_LOGICAL_ADDRESS, this will encode the
+ *	configured logical address between 0 and 15 or 0xff if unregistered
+ */
+struct ec_response_cec_get {
+	uint8_t val;
+} __packed;
+
+/* CEC parameters command */
+enum ec_cec_command {
+	/* CEC reading, writing and events enable */
+	CEC_CMD_ENABLE,
+	/* CEC logical address  */
+	CEC_CMD_LOGICAL_ADDRESS,
+};
+
+/* Events from CEC to AP */
+enum mkbp_cec_event {
+	/* Outgoing message was acknowledged by a follower */
+	EC_MKBP_CEC_SEND_OK			= BIT(0),
+	/* Outgoing message was not acknowledged */
+	EC_MKBP_CEC_SEND_FAILED			= BIT(1),
+};
+
 /*****************************************************************************/
 /*
  * Special commands
-- 
cgit v1.2.3


From 03133347b4452ef9b1f1456b92f5fafa467c0655 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Mon, 30 Apr 2018 18:33:24 +0200
Subject: KVM: s390: a utility function for migration

Introduce a utility function that will be used later on for storage
attributes migration, and use it in kvm_main.c to replace existing code
that does the same thing.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Message-Id: <1525106005-13931-2-git-send-email-imbrenda@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/kvm_host.h | 7 +++++++
 virt/kvm/kvm_main.c      | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4ee7bc548a83..5f138b40e433 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -309,6 +309,13 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl
 	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 }
 
+static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+	unsigned long len = kvm_dirty_bitmap_bytes(memslot);
+
+	return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
+}
+
 struct kvm_s390_adapter_int {
 	u64 ind_addr;
 	u64 summary_addr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47507faab5..f519eb8d06b1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1169,7 +1169,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 
 	n = kvm_dirty_bitmap_bytes(memslot);
 
-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+	dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
 	memset(dirty_bitmap_buffer, 0, n);
 
 	spin_lock(&kvm->mmu_lock);
-- 
cgit v1.2.3


From afed7bcf9487bb28e2e2b016a195085c07416c0b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Jul 2018 10:36:07 +0100
Subject: locking/refcount: Always allow checked forms

In many cases, it would be useful to be able to use the full
sanity-checked refcount helpers regardless of CONFIG_REFCOUNT_FULL,
as this would help to avoid duplicate warnings where callers try to
sanity-check refcount manipulation.

This patch refactors things such that the full refcount helpers were
always built, as refcount_${op}_checked(), such that they can be used
regardless of CONFIG_REFCOUNT_FULL. This will allow code which *always*
wants a checked refcount to opt-in, avoiding the need to duplicate the
logic for warnings.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180711093607.1644-1-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 27 +++++++++++++++++-------
 lib/refcount.c           | 53 +++++++++++++++++++++++-------------------------
 2 files changed, 45 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index c36addd27dd5..53c5eca24d83 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -43,17 +43,30 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
+extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r);
+extern void refcount_add_checked(unsigned int i, refcount_t *r);
+
+extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r);
+extern void refcount_inc_checked(refcount_t *r);
+
+extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r);
+
+extern __must_check bool refcount_dec_and_test_checked(refcount_t *r);
+extern void refcount_dec_checked(refcount_t *r);
+
 #ifdef CONFIG_REFCOUNT_FULL
-extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
-extern void refcount_add(unsigned int i, refcount_t *r);
 
-extern __must_check bool refcount_inc_not_zero(refcount_t *r);
-extern void refcount_inc(refcount_t *r);
+#define refcount_add_not_zero	refcount_add_not_zero_checked
+#define refcount_add		refcount_add_checked
+
+#define refcount_inc_not_zero	refcount_inc_not_zero_checked
+#define refcount_inc		refcount_inc_checked
+
+#define refcount_sub_and_test	refcount_sub_and_test_checked
 
-extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
+#define refcount_dec_and_test	refcount_dec_and_test_checked
+#define refcount_dec		refcount_dec_checked
 
-extern __must_check bool refcount_dec_and_test(refcount_t *r);
-extern void refcount_dec(refcount_t *r);
 #else
 # ifdef CONFIG_ARCH_HAS_REFCOUNT
 #  include <asm/refcount.h>
diff --git a/lib/refcount.c b/lib/refcount.c
index 4bd842f20749..5c4aaefc0682 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -40,10 +40,8 @@
 #include <linux/spinlock.h>
 #include <linux/bug.h>
 
-#ifdef CONFIG_REFCOUNT_FULL
-
 /**
- * refcount_add_not_zero - add a value to a refcount unless it is 0
+ * refcount_add_not_zero_checked - add a value to a refcount unless it is 0
  * @i: the value to add to the refcount
  * @r: the refcount
  *
@@ -60,7 +58,7 @@
  *
  * Return: false if the passed refcount is 0, true otherwise
  */
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -81,10 +79,10 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 
 	return true;
 }
-EXPORT_SYMBOL(refcount_add_not_zero);
+EXPORT_SYMBOL(refcount_add_not_zero_checked);
 
 /**
- * refcount_add - add a value to a refcount
+ * refcount_add_checked - add a value to a refcount
  * @i: the value to add to the refcount
  * @r: the refcount
  *
@@ -99,14 +97,14 @@ EXPORT_SYMBOL(refcount_add_not_zero);
  * cases, refcount_inc(), or one of its variants, should instead be used to
  * increment a reference count.
  */
-void refcount_add(unsigned int i, refcount_t *r)
+void refcount_add_checked(unsigned int i, refcount_t *r)
 {
-	WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n");
 }
-EXPORT_SYMBOL(refcount_add);
+EXPORT_SYMBOL(refcount_add_checked);
 
 /**
- * refcount_inc_not_zero - increment a refcount unless it is 0
+ * refcount_inc_not_zero_checked - increment a refcount unless it is 0
  * @r: the refcount to increment
  *
  * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN.
@@ -117,7 +115,7 @@ EXPORT_SYMBOL(refcount_add);
  *
  * Return: true if the increment was successful, false otherwise
  */
-bool refcount_inc_not_zero(refcount_t *r)
+bool refcount_inc_not_zero_checked(refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -136,10 +134,10 @@ bool refcount_inc_not_zero(refcount_t *r)
 
 	return true;
 }
-EXPORT_SYMBOL(refcount_inc_not_zero);
+EXPORT_SYMBOL(refcount_inc_not_zero_checked);
 
 /**
- * refcount_inc - increment a refcount
+ * refcount_inc_checked - increment a refcount
  * @r: the refcount to increment
  *
  * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN.
@@ -150,14 +148,14 @@ EXPORT_SYMBOL(refcount_inc_not_zero);
  * Will WARN if the refcount is 0, as this represents a possible use-after-free
  * condition.
  */
-void refcount_inc(refcount_t *r)
+void refcount_inc_checked(refcount_t *r)
 {
-	WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n");
 }
-EXPORT_SYMBOL(refcount_inc);
+EXPORT_SYMBOL(refcount_inc_checked);
 
 /**
- * refcount_sub_and_test - subtract from a refcount and test if it is 0
+ * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0
  * @i: amount to subtract from the refcount
  * @r: the refcount
  *
@@ -176,7 +174,7 @@ EXPORT_SYMBOL(refcount_inc);
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -194,10 +192,10 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 
 	return !new;
 }
-EXPORT_SYMBOL(refcount_sub_and_test);
+EXPORT_SYMBOL(refcount_sub_and_test_checked);
 
 /**
- * refcount_dec_and_test - decrement a refcount and test if it is 0
+ * refcount_dec_and_test_checked - decrement a refcount and test if it is 0
  * @r: the refcount
  *
  * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
@@ -209,14 +207,14 @@ EXPORT_SYMBOL(refcount_sub_and_test);
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-bool refcount_dec_and_test(refcount_t *r)
+bool refcount_dec_and_test_checked(refcount_t *r)
 {
-	return refcount_sub_and_test(1, r);
+	return refcount_sub_and_test_checked(1, r);
 }
-EXPORT_SYMBOL(refcount_dec_and_test);
+EXPORT_SYMBOL(refcount_dec_and_test_checked);
 
 /**
- * refcount_dec - decrement a refcount
+ * refcount_dec_checked - decrement a refcount
  * @r: the refcount
  *
  * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
@@ -225,12 +223,11 @@ EXPORT_SYMBOL(refcount_dec_and_test);
  * Provides release memory ordering, such that prior loads and stores are done
  * before.
  */
-void refcount_dec(refcount_t *r)
+void refcount_dec_checked(refcount_t *r)
 {
-	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+	WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
-EXPORT_SYMBOL(refcount_dec);
-#endif /* CONFIG_REFCOUNT_FULL */
+EXPORT_SYMBOL(refcount_dec_checked);
 
 /**
  * refcount_dec_if_one - decrement a refcount if it is 1
-- 
cgit v1.2.3


From 05814a10370b3252fe2b0898b6adac3cdd531096 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Fri, 13 Jul 2018 17:07:26 +0300
Subject: block: remove blkdev_entry_to_request() macro

Remove blkdev_entry_to_request() macro, which remained unused through
the observable history, also note that it repeats list_entry_rq() macro
verbatim.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 137759862f07..1939ed95f936 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1436,8 +1436,6 @@ enum blk_default_limits {
 	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
 };
 
-#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
-- 
cgit v1.2.3


From 8e1b706b6e819bed215c0db16345568864660393 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 13 Jul 2018 16:23:23 +0200
Subject: cpu/hotplug: Expose SMT control init function

The L1TF mitigation will gain a commend line parameter which allows to set
a combination of hypervisor mitigation and SMT control.

Expose cpu_smt_disable() so the command line parser can tweak SMT settings.

[ tglx: Split out of larger patch and made it preserve an already existing
  	force off state ]

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.039715135@linutronix.de
---
 include/linux/cpu.h |  2 ++
 kernel/cpu.c        | 16 +++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7532cbf27b1d..c9b23ad27b38 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -177,8 +177,10 @@ enum cpuhp_smt_control {
 
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
+extern void cpu_smt_disable(bool force);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
+static inline void cpu_smt_disable(bool force) { }
 #endif
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d79e24df2420..8453e31f2d1a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -347,13 +347,23 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
 
-static int __init smt_cmdline_disable(char *str)
+void __init cpu_smt_disable(bool force)
 {
-	cpu_smt_control = CPU_SMT_DISABLED;
-	if (str && !strcmp(str, "force")) {
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		return;
+
+	if (force) {
 		pr_info("SMT: Force disabled\n");
 		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+	} else {
+		cpu_smt_control = CPU_SMT_DISABLED;
 	}
+}
+
+static int __init smt_cmdline_disable(char *str)
+{
+	cpu_smt_disable(str && !strcmp(str, "force"));
 	return 0;
 }
 early_param("nosmt", smt_cmdline_disable);
-- 
cgit v1.2.3


From fee0aede6f4739c87179eca76136f83210953b86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:24 +0200
Subject: cpu/hotplug: Set CPU_SMT_NOT_SUPPORTED early

The CPU_SMT_NOT_SUPPORTED state is set (if the processor does not support
SMT) when the sysfs SMT control file is initialized.

That was fine so far as this was only required to make the output of the
control file correct and to prevent writes in that case.

With the upcoming l1tf command line parameter, this needs to be set up
before the L1TF mitigation selection and command line parsing happens.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.121795971@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c |  6 ++++++
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 13 ++++++++++---
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7df7df9a2753..4be69a7bef0b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -58,6 +58,12 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 
+	/*
+	 * identify_boot_cpu() initialized SMT support information, let the
+	 * core code know.
+	 */
+	cpu_smt_check_topology();
+
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
 		print_cpu_info(&boot_cpu_data);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index c9b23ad27b38..ccdf3a67ce56 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -178,9 +178,11 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology(void) { }
 #endif
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8453e31f2d1a..37eec872042b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -361,6 +361,16 @@ void __init cpu_smt_disable(bool force)
 	}
 }
 
+/*
+ * The decision whether SMT is supported can only be done after the full
+ * CPU identification. Called from architecture code.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!topology_smt_supported())
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -2115,9 +2125,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	if (!topology_smt_supported())
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
-- 
cgit v1.2.3


From 6b8675897338f874c41612655a85d8e10cdb23d8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:39 -0700
Subject: xdp: don't make drivers report attachment mode

prog_attached of struct netdev_bpf should have been superseded
by simply setting prog_id long time ago, but we kept it around
to allow offloading drivers to communicate attachment mode (drv
vs hw).  Subsequently drivers were also allowed to report back
attachment flags (prog_flags), and since nowadays only programs
attached will XDP_FLAGS_HW_MODE can get offloaded, we can tell
the attachment mode from the flags driver reports.  Remove
prog_attached member.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c       | 1 -
 drivers/net/ethernet/cavium/thunder/nicvf_main.c    | 1 -
 drivers/net/ethernet/intel/i40e/i40e_main.c         | 1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c       | 1 -
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c   | 1 -
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c      | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c   | 1 -
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ---
 drivers/net/ethernet/qlogic/qede/qede_filter.c      | 1 -
 drivers/net/netdevsim/bpf.c                         | 1 -
 drivers/net/tun.c                                   | 1 -
 drivers/net/virtio_net.c                            | 1 -
 include/linux/netdevice.h                           | 5 -----
 net/core/dev.c                                      | 7 +++----
 net/core/rtnetlink.c                                | 8 ++++++--
 15 files changed, 9 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 1f0e872d0667..0584d07c8c33 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -219,7 +219,6 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		rc = bnxt_xdp_set(bp, xdp->prog);
 		break;
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!bp->xdp_prog;
 		xdp->prog_id = bp->xdp_prog ? bp->xdp_prog->aux->id : 0;
 		rc = 0;
 		break;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 135766c4296b..768f584f8392 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1848,7 +1848,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return nicvf_xdp_setup(nic, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!nic->xdp_prog;
 		xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0;
 		return 0;
 	default:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 426b0ccb1fc6..51762428b40e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11841,7 +11841,6 @@ static int i40e_xdp(struct net_device *dev,
 	case XDP_SETUP_PROG:
 		return i40e_xdp_setup(vsi, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
 		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
 		return 0;
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a8e21becb619..3862fea1c923 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9966,7 +9966,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return ixgbe_xdp_setup(dev, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!(adapter->xdp_prog);
 		xdp->prog_id = adapter->xdp_prog ?
 			adapter->xdp_prog->aux->id : 0;
 		return 0;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 59416eddd840..d86446d202d5 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4462,7 +4462,6 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return ixgbevf_xdp_setup(dev, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!(adapter->xdp_prog);
 		xdp->prog_id = adapter->xdp_prog ?
 			       adapter->xdp_prog->aux->id : 0;
 		return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 65eb06e017e4..6785661d1a72 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2926,7 +2926,6 @@ static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return mlx4_xdp_set(dev, xdp->prog);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = mlx4_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bbd2fd0b2e06..e4a9a0768a81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4192,7 +4192,6 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return mlx5e_xdp_set(dev, xdp->prog);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = mlx5e_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 7df5ca37bfb8..d20714598613 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3459,9 +3459,6 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 		return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags,
 					 xdp->extack);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!nn->xdp_prog;
-		if (nn->dp.bpf_offload_xdp)
-			xdp->prog_attached = XDP_ATTACHED_HW;
 		xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
 		xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0;
 		return 0;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index b823bfe2ea4d..f9a327c821eb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1116,7 +1116,6 @@ int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return qede_xdp_set(edev, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!edev->xdp_prog;
 		xdp->prog_id = edev->xdp_prog ? edev->xdp_prog->aux->id : 0;
 		return 0;
 	default:
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 75c25306d234..712e6f918065 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -567,7 +567,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 		nsim_bpf_destroy_prog(bpf->offload.prog);
 		return 0;
 	case XDP_QUERY_PROG:
-		bpf->prog_attached = ns->xdp_prog_mode;
 		bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0;
 		bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0;
 		return 0;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a192a017cc68..49a50219d0da 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1268,7 +1268,6 @@ static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return tun_xdp_set(dev, xdp->prog, xdp->extack);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = tun_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 53085c63277b..2ff08bc103a9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2343,7 +2343,6 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = virtnet_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b683971e500d..69a664789b33 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -819,10 +819,6 @@ enum bpf_netdev_command {
 	 */
 	XDP_SETUP_PROG,
 	XDP_SETUP_PROG_HW,
-	/* Check if a bpf program is set on the device.  The callee should
-	 * set @prog_attached to one of XDP_ATTACHED_* values, note that "true"
-	 * is equivalent to XDP_ATTACHED_DRV.
-	 */
 	XDP_QUERY_PROG,
 	/* BPF program for offload callbacks, invoked at program load time. */
 	BPF_OFFLOAD_VERIFIER_PREP,
@@ -849,7 +845,6 @@ struct netdev_bpf {
 		};
 		/* XDP_QUERY_PROG */
 		struct {
-			u8 prog_attached;
 			u32 prog_id;
 			/* flags with which program was installed */
 			u32 prog_flags;
diff --git a/net/core/dev.c b/net/core/dev.c
index 89825c1eccdc..9fa3b3705a8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4926,7 +4926,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 		break;
 
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!old;
 		xdp->prog_id = old ? old->aux->id : 0;
 		break;
 
@@ -7593,13 +7592,13 @@ void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
 	WARN_ON(bpf_op(dev, xdp) < 0);
 }
 
-static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
+static bool __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
 {
 	struct netdev_bpf xdp;
 
 	__dev_xdp_query(dev, bpf_op, &xdp);
 
-	return xdp.prog_attached;
+	return xdp.prog_id;
 }
 
 static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
@@ -7634,7 +7633,7 @@ static void dev_xdp_uninstall(struct net_device *dev)
 		return;
 
 	__dev_xdp_query(dev, ndo_bpf, &xdp);
-	if (xdp.prog_attached == XDP_ATTACHED_NONE)
+	if (!xdp.prog_id)
 		return;
 
 	/* Program removal should always succeed */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b40242459907..02ebc056a688 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1372,9 +1372,13 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
 		return XDP_ATTACHED_NONE;
 
 	__dev_xdp_query(dev, ops->ndo_bpf, &xdp);
-	*prog_id = xdp.prog_id;
+	if (!xdp.prog_id)
+		return XDP_ATTACHED_NONE;
 
-	return xdp.prog_attached;
+	*prog_id = xdp.prog_id;
+	if (xdp.prog_flags & XDP_FLAGS_HW_MODE)
+		return XDP_ATTACHED_HW;
+	return XDP_ATTACHED_DRV;
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
-- 
cgit v1.2.3


From a25717d2b604347d9af8da81deea7b08e8c94220 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:41 -0700
Subject: xdp: support simultaneous driver and hw XDP attachment

Split the query of HW-attached program from the software one.
Introduce new .ndo_bpf command to query HW-attached program.
This will allow drivers to install different programs in HW
and SW at the same time.  Netlink can now also carry multiple
programs on dump (in which case mode will be set to
XDP_ATTACHED_MULTI and user has to check per-attachment point
attributes, IFLA_XDP_PROG_ID will not be present).  We reuse
IFLA_XDP_PROG_ID skb space for second mode, so rtnl_xdp_size()
doesn't need to be updated.

Note that the installation side is still not there, since all
drivers currently reject installing more than one program at
the time.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |  6 ++
 drivers/net/netdevsim/bpf.c                        |  6 ++
 include/linux/netdevice.h                          |  7 +-
 include/uapi/linux/if_link.h                       |  1 +
 net/core/dev.c                                     | 45 ++++++-----
 net/core/rtnetlink.c                               | 93 ++++++++++++----------
 6 files changed, 96 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 4bb589dbffbc..bb1e72e8dbc2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3453,6 +3453,12 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG_HW:
 		return nfp_net_xdp_setup(nn, xdp);
 	case XDP_QUERY_PROG:
+		if (nn->dp.bpf_offload_xdp)
+			return 0;
+		return xdp_attachment_query(&nn->xdp, xdp);
+	case XDP_QUERY_PROG_HW:
+		if (!nn->dp.bpf_offload_xdp)
+			return 0;
 		return xdp_attachment_query(&nn->xdp, xdp);
 	default:
 		return nfp_app_bpf(nn->app, nn, xdp);
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index c485d97b5df4..5544c9b51173 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -561,6 +561,12 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 		nsim_bpf_destroy_prog(bpf->offload.prog);
 		return 0;
 	case XDP_QUERY_PROG:
+		if (ns->xdp_prog_mode != XDP_ATTACHED_DRV)
+			return 0;
+		return xdp_attachment_query(&ns->xdp, bpf);
+	case XDP_QUERY_PROG_HW:
+		if (ns->xdp_prog_mode != XDP_ATTACHED_HW)
+			return 0;
 		return xdp_attachment_query(&ns->xdp, bpf);
 	case XDP_SETUP_PROG:
 		err = nsim_setup_prog_checks(ns, bpf);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69a664789b33..2422c0e88f5c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -820,6 +820,7 @@ enum bpf_netdev_command {
 	XDP_SETUP_PROG,
 	XDP_SETUP_PROG_HW,
 	XDP_QUERY_PROG,
+	XDP_QUERY_PROG_HW,
 	/* BPF program for offload callbacks, invoked at program load time. */
 	BPF_OFFLOAD_VERIFIER_PREP,
 	BPF_OFFLOAD_TRANSLATE,
@@ -843,7 +844,7 @@ struct netdev_bpf {
 			struct bpf_prog *prog;
 			struct netlink_ext_ack *extack;
 		};
-		/* XDP_QUERY_PROG */
+		/* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */
 		struct {
 			u32 prog_id;
 			/* flags with which program was installed */
@@ -3533,8 +3534,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags);
-void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
-		     struct netdev_bpf *xdp);
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
+		    enum bpf_netdev_command cmd);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bc86c2b105ec..8759cfb8aa2e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -920,6 +920,7 @@ enum {
 	XDP_ATTACHED_DRV,
 	XDP_ATTACHED_SKB,
 	XDP_ATTACHED_HW,
+	XDP_ATTACHED_MULTI,
 };
 
 enum {
diff --git a/net/core/dev.c b/net/core/dev.c
index 9fa3b3705a8e..993cdc3cd086 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7582,21 +7582,19 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
-void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
-		     struct netdev_bpf *xdp)
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+		    enum bpf_netdev_command cmd)
 {
-	memset(xdp, 0, sizeof(*xdp));
-	xdp->command = XDP_QUERY_PROG;
+	struct netdev_bpf xdp;
 
-	/* Query must always succeed. */
-	WARN_ON(bpf_op(dev, xdp) < 0);
-}
+	if (!bpf_op)
+		return 0;
 
-static bool __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
-{
-	struct netdev_bpf xdp;
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = cmd;
 
-	__dev_xdp_query(dev, bpf_op, &xdp);
+	/* Query must always succeed. */
+	WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
 
 	return xdp.prog_id;
 }
@@ -7632,12 +7630,19 @@ static void dev_xdp_uninstall(struct net_device *dev)
 	if (!ndo_bpf)
 		return;
 
-	__dev_xdp_query(dev, ndo_bpf, &xdp);
-	if (!xdp.prog_id)
-		return;
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG;
+	WARN_ON(ndo_bpf(dev, &xdp));
+	if (xdp.prog_id)
+		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+					NULL));
 
-	/* Program removal should always succeed */
-	WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+	/* Remove HW offload */
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG_HW;
+	if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
+		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+					NULL));
 }
 
 /**
@@ -7653,12 +7658,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	enum bpf_netdev_command query;
 	struct bpf_prog *prog = NULL;
 	bpf_op_t bpf_op, bpf_chk;
 	int err;
 
 	ASSERT_RTNL();
 
+	query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+
 	bpf_op = bpf_chk = ops->ndo_bpf;
 	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
 		return -EOPNOTSUPP;
@@ -7668,10 +7676,11 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		bpf_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
+		if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) ||
+		    __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW))
 			return -EEXIST;
 		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-		    __dev_xdp_attached(dev, bpf_op))
+		    __dev_xdp_query(dev, bpf_op, query))
 			return -EBUSY;
 
 		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02ebc056a688..c9929ef17539 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -964,7 +964,7 @@ static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
 			  nla_total_size(1) +	/* XDP_ATTACHED */
-			  nla_total_size(4) +	/* XDP_PROG_ID */
+			  nla_total_size(4) +	/* XDP_PROG_ID (or 1st mode) */
 			  nla_total_size(4);	/* XDP_<mode>_PROG_ID */
 
 	return xdp_size;
@@ -1354,37 +1354,57 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
+static u32 rtnl_xdp_prog_skb(struct net_device *dev)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
 	const struct bpf_prog *generic_xdp_prog;
-	struct netdev_bpf xdp;
 
 	ASSERT_RTNL();
 
-	*prog_id = 0;
 	generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
-	if (generic_xdp_prog) {
-		*prog_id = generic_xdp_prog->aux->id;
-		return XDP_ATTACHED_SKB;
-	}
-	if (!ops->ndo_bpf)
-		return XDP_ATTACHED_NONE;
+	if (!generic_xdp_prog)
+		return 0;
+	return generic_xdp_prog->aux->id;
+}
+
+static u32 rtnl_xdp_prog_drv(struct net_device *dev)
+{
+	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+}
+
+static u32 rtnl_xdp_prog_hw(struct net_device *dev)
+{
+	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
+			       XDP_QUERY_PROG_HW);
+}
+
+static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
+			       u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
+			       u32 (*get_prog_id)(struct net_device *dev))
+{
+	u32 curr_id;
+	int err;
+
+	curr_id = get_prog_id(dev);
+	if (!curr_id)
+		return 0;
+
+	*prog_id = curr_id;
+	err = nla_put_u32(skb, attr, curr_id);
+	if (err)
+		return err;
 
-	__dev_xdp_query(dev, ops->ndo_bpf, &xdp);
-	if (!xdp.prog_id)
-		return XDP_ATTACHED_NONE;
+	if (*mode != XDP_ATTACHED_NONE)
+		*mode = XDP_ATTACHED_MULTI;
+	else
+		*mode = tgt_mode;
 
-	*prog_id = xdp.prog_id;
-	if (xdp.prog_flags & XDP_FLAGS_HW_MODE)
-		return XDP_ATTACHED_HW;
-	return XDP_ATTACHED_DRV;
+	return 0;
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
-	u32 prog_attr, prog_id;
 	struct nlattr *xdp;
+	u32 prog_id;
 	int err;
 	u8 mode;
 
@@ -1392,35 +1412,26 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 	if (!xdp)
 		return -EMSGSIZE;
 
-	mode = rtnl_xdp_attached_mode(dev, &prog_id);
+	prog_id = 0;
+	mode = XDP_ATTACHED_NONE;
+	if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
+				IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb))
+		goto err_cancel;
+	if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
+				IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv))
+		goto err_cancel;
+	if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
+				IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw))
+		goto err_cancel;
+
 	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
 	if (err)
 		goto err_cancel;
 
-	if (prog_id) {
+	if (prog_id && mode != XDP_ATTACHED_MULTI) {
 		err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
 		if (err)
 			goto err_cancel;
-
-		switch (mode) {
-		case XDP_ATTACHED_DRV:
-			prog_attr = IFLA_XDP_DRV_PROG_ID;
-			break;
-		case XDP_ATTACHED_SKB:
-			prog_attr = IFLA_XDP_SKB_PROG_ID;
-			break;
-		case XDP_ATTACHED_HW:
-			prog_attr = IFLA_XDP_HW_PROG_ID;
-			break;
-		case XDP_ATTACHED_NONE:
-		default:
-			err = -EINVAL;
-			goto err_cancel;
-		}
-
-		err = nla_put_u32(skb, prog_attr, prog_id);
-		if (err)
-			goto err_cancel;
 	}
 
 	nla_nest_end(skb, xdp);
-- 
cgit v1.2.3


From c921c2077b32081617789a645120148bc8b60c98 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 13 Jul 2018 12:16:43 +0300
Subject: net: ipmr: add support for passing full packet on wrong vif

This patch adds support for IGMPMSG_WRVIFWHOLE which is used to pass
full packet and real vif id when the incoming interface is wrong.
While the RP and FHR are setting up state we need to be sending the
registers encapsulated with all the data inside otherwise we lose it.
The RP then decapsulates it and forwards it to the interested parties.
Currently with WRONGVIF we can only be sending empty register packets
and will lose that data.
This behaviour can be enabled by using MRT_PIM with
val == IGMPMSG_WRVIFWHOLE. This doesn't prevent IGMPMSG_WRONGVIF from
happening, it happens in addition to it, also it is controlled by the same
throttling parameters as WRONGVIF (i.e. 1 packet per 3 seconds currently).
Both messages are generated to keep backwards compatibily and avoid
breaking someone who was enabling MRT_PIM with val == 4, since any
positive val is accepted and treated the same.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h |  1 +
 include/uapi/linux/mroute.h |  2 ++
 net/ipv4/ipmr.c             | 21 ++++++++++++++++-----
 3 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index fd436cdd4725..6675b9f81979 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -254,6 +254,7 @@ struct mr_table {
 	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
 	bool			mroute_do_pim;
+	bool			mroute_do_wrvifwhole;
 	int			mroute_reg_vif_num;
 };
 
diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 10f9ff9426a2..5d37a9ccce63 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -120,6 +120,7 @@ enum {
 	IPMRA_TABLE_MROUTE_DO_ASSERT,
 	IPMRA_TABLE_MROUTE_DO_PIM,
 	IPMRA_TABLE_VIFS,
+	IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
 	__IPMRA_TABLE_MAX
 };
 #define IPMRA_TABLE_MAX (__IPMRA_TABLE_MAX - 1)
@@ -173,5 +174,6 @@ enum {
 #define IGMPMSG_NOCACHE		1		/* Kern cache fill request to mrouted */
 #define IGMPMSG_WRONGVIF	2		/* For PIM assert processing (unused) */
 #define IGMPMSG_WHOLEPKT	3		/* For PIM Register processing */
+#define IGMPMSG_WRVIFWHOLE	4		/* For PIM Register and assert processing */
 
 #endif /* _UAPI__LINUX_MROUTE_H */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 82f914122f1b..5660adcf7a04 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1052,7 +1052,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	struct sk_buff *skb;
 	int ret;
 
-	if (assert == IGMPMSG_WHOLEPKT)
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 	else
 		skb = alloc_skb(128, GFP_ATOMIC);
@@ -1060,7 +1060,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	if (!skb)
 		return -ENOBUFS;
 
-	if (assert == IGMPMSG_WHOLEPKT) {
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
 		/* Ugly, but we have no choice with this interface.
 		 * Duplicate old header, fix ihl, length etc.
 		 * And all this only to mangle msg->im_msgtype and
@@ -1071,9 +1071,12 @@ static int ipmr_cache_report(struct mr_table *mrt,
 		skb_reset_transport_header(skb);
 		msg = (struct igmpmsg *)skb_network_header(skb);
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
-		msg->im_msgtype = IGMPMSG_WHOLEPKT;
+		msg->im_msgtype = assert;
 		msg->im_mbz = 0;
-		msg->im_vif = mrt->mroute_reg_vif_num;
+		if (assert == IGMPMSG_WRVIFWHOLE)
+			msg->im_vif = vifi;
+		else
+			msg->im_vif = mrt->mroute_reg_vif_num;
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -1372,6 +1375,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	struct mr_table *mrt;
 	struct vifctl vif;
 	struct mfcctl mfc;
+	bool do_wrvifwhole;
 	u32 uval;
 
 	/* There's one exception to the lock - MRT_DONE which needs to unlock */
@@ -1502,10 +1506,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 			break;
 		}
 
+		do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
 		val = !!val;
 		if (val != mrt->mroute_do_pim) {
 			mrt->mroute_do_pim = val;
 			mrt->mroute_do_assert = val;
+			mrt->mroute_do_wrvifwhole = do_wrvifwhole;
 		}
 		break;
 	case MRT_TABLE:
@@ -1983,6 +1989,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			       MFC_ASSERT_THRESH)) {
 			c->_c.mfc_un.res.last_assert = jiffies;
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
+			if (mrt->mroute_do_wrvifwhole)
+				ipmr_cache_report(mrt, skb, true_vifi,
+						  IGMPMSG_WRVIFWHOLE);
 		}
 		goto dont_forward;
 	}
@@ -2659,7 +2668,9 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
 			mrt->mroute_reg_vif_num) ||
 	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
 		       mrt->mroute_do_assert) ||
-	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
+		       mrt->mroute_do_wrvifwhole))
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From 25b4a2b909dc9fdd141ee4239cf78d1617a9d91d Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 11 Jul 2018 13:40:13 -0600
Subject: coresight: Remove function coresight_vpid_to_pid()

Now that we prevent users from using contextID tracing when PID namespaces
are involved there is no client for function coresight_vpid_to_pid().  As
such simply remove it.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Kim Phillips <kim.phillips@arm.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index c265e0468414..e5421b83e4e6 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -267,24 +267,4 @@ static inline struct coresight_platform_data *of_get_coresight_platform_data(
 	struct device *dev, const struct device_node *node) { return NULL; }
 #endif
 
-#ifdef CONFIG_PID_NS
-static inline unsigned long
-coresight_vpid_to_pid(unsigned long vpid)
-{
-	struct task_struct *task = NULL;
-	unsigned long pid = 0;
-
-	rcu_read_lock();
-	task = find_task_by_vpid(vpid);
-	if (task)
-		pid = task_pid_nr(task);
-	rcu_read_unlock();
-
-	return pid;
-}
-#else
-static inline unsigned long
-coresight_vpid_to_pid(unsigned long vpid) { return vpid; }
-#endif
-
 #endif
-- 
cgit v1.2.3


From 575694978247b7cc1588ec233bde3fbf66db9e50 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:27 -0600
Subject: coresight: Cleanup platform description data

Nobody uses the "clk" field in struct coresight_platform_data.
Remove it.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index e5421b83e4e6..69a5c9f82469 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -87,7 +87,6 @@ struct coresight_dev_subtype {
  * @child_ports:child component port number the current component is
 		connected  to.
  * @nr_outport:	number of output ports for this component.
- * @clk:	The clock this component is associated to.
  */
 struct coresight_platform_data {
 	int cpu;
@@ -97,7 +96,6 @@ struct coresight_platform_data {
 	const char **child_names;
 	int *child_ports;
 	int nr_outport;
-	struct clk *clk;
 };
 
 /**
-- 
cgit v1.2.3


From 00b78e8b7b003c686a8570df7d231787f514226c Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:29 -0600
Subject: coresight: Cleanup device subtype struct

Clean up our struct a little bit by using a union instead of
a struct for tracking the subtype of a device.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 69a5c9f82469..7c188c2a5b3c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -63,17 +63,20 @@ enum coresight_dev_subtype_source {
 };
 
 /**
- * struct coresight_dev_subtype - further characterisation of a type
+ * union coresight_dev_subtype - further characterisation of a type
  * @sink_subtype:	type of sink this component is, as defined
-			by @coresight_dev_subtype_sink.
+ *			by @coresight_dev_subtype_sink.
  * @link_subtype:	type of link this component is, as defined
-			by @coresight_dev_subtype_link.
+ *			by @coresight_dev_subtype_link.
  * @source_subtype:	type of source this component is, as defined
-			by @coresight_dev_subtype_source.
+ *			by @coresight_dev_subtype_source.
  */
-struct coresight_dev_subtype {
-	enum coresight_dev_subtype_sink sink_subtype;
-	enum coresight_dev_subtype_link link_subtype;
+union coresight_dev_subtype {
+	/* We have some devices which acts as LINK and SINK */
+	struct {
+		enum coresight_dev_subtype_sink sink_subtype;
+		enum coresight_dev_subtype_link link_subtype;
+	};
 	enum coresight_dev_subtype_source source_subtype;
 };
 
@@ -111,7 +114,7 @@ struct coresight_platform_data {
  */
 struct coresight_desc {
 	enum coresight_dev_type type;
-	struct coresight_dev_subtype subtype;
+	union coresight_dev_subtype subtype;
 	const struct coresight_ops *ops;
 	struct coresight_platform_data *pdata;
 	struct device *dev;
@@ -155,7 +158,7 @@ struct coresight_device {
 	int nr_inport;
 	int nr_outport;
 	enum coresight_dev_type type;
-	struct coresight_dev_subtype subtype;
+	union coresight_dev_subtype subtype;
 	const struct coresight_ops *ops;
 	struct device dev;
 	atomic_t *refcnt;
-- 
cgit v1.2.3


From 8a091d847cecd1a3656172cb79b53a19e90b8c4f Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:30 -0600
Subject: coresight: Add helper device type

Add a new coresight device type, which do not belong to any
of the existing types, i.e, source, sink, link etc. A helper
device could be connected to a coresight device, which could
augment the functionality of the coresight device.

This is intended to cover Coresight Address Translation Unit (CATU)
devices, which provide improved Scatter Gather mechanism for TMC
ETR. The idea is that the helper device could be controlled by
the driver of the device it is attached to (in this case ETR),
transparent to the generic coresight driver (and paths).

The operations include enable(), disable(), both of which could
accept a device specific "data" which the driving device and
the helper device could share. Since they don't appear in the
coresight "path" tracked by software, we have to ensure that
they are powered up/down whenever the master device is turned
on.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight.c | 43 +++++++++++++++++++++++++++++++--
 include/linux/coresight.h               | 24 ++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 7f2b478a048f..3e07fd335f8c 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -425,6 +425,42 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate)
 	return dev ? to_coresight_device(dev) : NULL;
 }
 
+/*
+ * coresight_grab_device - Power up this device and any of the helper
+ * devices connected to it for trace operation. Since the helper devices
+ * don't appear on the trace path, they should be handled along with the
+ * the master device.
+ */
+static void coresight_grab_device(struct coresight_device *csdev)
+{
+	int i;
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_device *child = csdev->conns[i].child_dev;
+
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+			pm_runtime_get_sync(child->dev.parent);
+	}
+	pm_runtime_get_sync(csdev->dev.parent);
+}
+
+/*
+ * coresight_drop_device - Release this device and any of the helper
+ * devices connected to it.
+ */
+static void coresight_drop_device(struct coresight_device *csdev)
+{
+	int i;
+
+	pm_runtime_put(csdev->dev.parent);
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_device *child = csdev->conns[i].child_dev;
+
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+			pm_runtime_put(child->dev.parent);
+	}
+}
+
 /**
  * _coresight_build_path - recursively build a path from a @csdev to a sink.
  * @csdev:	The device to start from.
@@ -473,9 +509,9 @@ out:
 	if (!node)
 		return -ENOMEM;
 
+	coresight_grab_device(csdev);
 	node->csdev = csdev;
 	list_add(&node->link, path);
-	pm_runtime_get_sync(csdev->dev.parent);
 
 	return 0;
 }
@@ -519,7 +555,7 @@ void coresight_release_path(struct list_head *path)
 	list_for_each_entry_safe(nd, next, path, link) {
 		csdev = nd->csdev;
 
-		pm_runtime_put_sync(csdev->dev.parent);
+		coresight_drop_device(csdev);
 		list_del(&nd->link);
 		kfree(nd);
 	}
@@ -770,6 +806,9 @@ static struct device_type coresight_dev_type[] = {
 		.name = "source",
 		.groups = coresight_source_groups,
 	},
+	{
+		.name = "helper",
+	},
 };
 
 static void coresight_device_release(struct device *dev)
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 7c188c2a5b3c..3d40a2b135f1 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -40,6 +40,7 @@ enum coresight_dev_type {
 	CORESIGHT_DEV_TYPE_LINK,
 	CORESIGHT_DEV_TYPE_LINKSINK,
 	CORESIGHT_DEV_TYPE_SOURCE,
+	CORESIGHT_DEV_TYPE_HELPER,
 };
 
 enum coresight_dev_subtype_sink {
@@ -62,6 +63,10 @@ enum coresight_dev_subtype_source {
 	CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE,
 };
 
+enum coresight_dev_subtype_helper {
+	CORESIGHT_DEV_SUBTYPE_HELPER_NONE,
+};
+
 /**
  * union coresight_dev_subtype - further characterisation of a type
  * @sink_subtype:	type of sink this component is, as defined
@@ -70,6 +75,8 @@ enum coresight_dev_subtype_source {
  *			by @coresight_dev_subtype_link.
  * @source_subtype:	type of source this component is, as defined
  *			by @coresight_dev_subtype_source.
+ * @helper_subtype:	type of helper this component is, as defined
+ *			by @coresight_dev_subtype_helper.
  */
 union coresight_dev_subtype {
 	/* We have some devices which acts as LINK and SINK */
@@ -78,6 +85,7 @@ union coresight_dev_subtype {
 		enum coresight_dev_subtype_link link_subtype;
 	};
 	enum coresight_dev_subtype_source source_subtype;
+	enum coresight_dev_subtype_helper helper_subtype;
 };
 
 /**
@@ -172,6 +180,7 @@ struct coresight_device {
 #define source_ops(csdev)	csdev->ops->source_ops
 #define sink_ops(csdev)		csdev->ops->sink_ops
 #define link_ops(csdev)		csdev->ops->link_ops
+#define helper_ops(csdev)	csdev->ops->helper_ops
 
 /**
  * struct coresight_ops_sink - basic operations for a sink
@@ -231,10 +240,25 @@ struct coresight_ops_source {
 			struct perf_event *event);
 };
 
+/**
+ * struct coresight_ops_helper - Operations for a helper device.
+ *
+ * All operations could pass in a device specific data, which could
+ * help the helper device to determine what to do.
+ *
+ * @enable	: Enable the device
+ * @disable	: Disable the device
+ */
+struct coresight_ops_helper {
+	int (*enable)(struct coresight_device *csdev, void *data);
+	int (*disable)(struct coresight_device *csdev, void *data);
+};
+
 struct coresight_ops {
 	const struct coresight_ops_sink *sink_ops;
 	const struct coresight_ops_link *link_ops;
 	const struct coresight_ops_source *source_ops;
+	const struct coresight_ops_helper *helper_ops;
 };
 
 #ifdef CONFIG_CORESIGHT
-- 
cgit v1.2.3


From fcacb5c154baaeaee3d89b2a2b7cf6e4ce43f5f5 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:31 -0600
Subject: coresight: Introduce support for Coresight Address Translation Unit

Add the initial support for Coresight Address Translation Unit, which
augments the TMC in Coresight SoC-600 by providing an improved Scatter
Gather mechanism. CATU is always connected to a single TMC-ETR and
converts the AXI address with a translated address (from a given SG
table with specific format). The CATU should be programmed in pass
through mode and enabled even if the ETR doesn't use the translation
by CATU.

This patch provides mechanism to enable/disable the CATU always in the
pass through mode.

We reuse the existing ports mechanism to link the TMC-ETR to the
connected CATU.

i.e, TMC-ETR:output_port0 -> CATU:input_port0

Reference manual for CATU component is avilable in version r2p0 of :
"Arm Coresight System-on-Chip SoC-600 Technical Reference Manual".

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/Kconfig             |  11 ++
 drivers/hwtracing/coresight/Makefile            |   1 +
 drivers/hwtracing/coresight/coresight-catu.c    | 214 ++++++++++++++++++++++++
 drivers/hwtracing/coresight/coresight-catu.h    |  84 ++++++++++
 drivers/hwtracing/coresight/coresight-tmc-etr.c |  52 ++++++
 include/linux/coresight.h                       |   1 +
 6 files changed, 363 insertions(+)
 create mode 100644 drivers/hwtracing/coresight/coresight-catu.c
 create mode 100644 drivers/hwtracing/coresight/coresight-catu.h

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index ef9cb3c164e1..ad34380cac49 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -31,6 +31,17 @@ config CORESIGHT_LINK_AND_SINK_TMC
 	  complies with the generic implementation of the component without
 	  special enhancement or added features.
 
+config CORESIGHT_CATU
+	bool "Coresight Address Translation Unit (CATU) driver"
+	depends on CORESIGHT_LINK_AND_SINK_TMC
+	help
+	   Enable support for the Coresight Address Translation Unit (CATU).
+	   CATU supports a scatter gather table of 4K pages, with forward/backward
+	   lookup. CATU helps TMC ETR to use a large physically non-contiguous trace
+	   buffer by translating the addresses used by ETR to the physical address
+	   by looking up the provided table. CATU can also be used in pass-through
+	   mode where the address is not translated.
+
 config CORESIGHT_SINK_TPIU
 	bool "Coresight generic TPIU driver"
 	depends on CORESIGHT_LINKS_AND_SINKS
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
index 61db9dd0d571..41870ded51a3 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \
 obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += coresight-dynamic-replicator.o
 obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
 obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
+obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
new file mode 100644
index 000000000000..9d0cb1f6f16b
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Coresight Address Translation Unit support
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "coresight-catu.h"
+#include "coresight-priv.h"
+
+#define csdev_to_catu_drvdata(csdev)	\
+	dev_get_drvdata(csdev->dev.parent)
+
+coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID);
+coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL);
+coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS);
+coresight_simple_reg32(struct catu_drvdata, mode, CATU_MODE);
+coresight_simple_reg32(struct catu_drvdata, axictrl, CATU_AXICTRL);
+coresight_simple_reg32(struct catu_drvdata, irqen, CATU_IRQEN);
+coresight_simple_reg64(struct catu_drvdata, sladdr,
+		       CATU_SLADDRLO, CATU_SLADDRHI);
+coresight_simple_reg64(struct catu_drvdata, inaddr,
+		       CATU_INADDRLO, CATU_INADDRHI);
+
+static struct attribute *catu_mgmt_attrs[] = {
+	&dev_attr_devid.attr,
+	&dev_attr_control.attr,
+	&dev_attr_status.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_axictrl.attr,
+	&dev_attr_irqen.attr,
+	&dev_attr_sladdr.attr,
+	&dev_attr_inaddr.attr,
+	NULL,
+};
+
+static const struct attribute_group catu_mgmt_group = {
+	.attrs = catu_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *catu_groups[] = {
+	&catu_mgmt_group,
+	NULL,
+};
+
+
+static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
+{
+	return coresight_timeout(drvdata->base,
+				 CATU_STATUS, CATU_STATUS_READY, 1);
+}
+
+static int catu_enable_hw(struct catu_drvdata *drvdata, void *__unused)
+{
+	u32 control;
+
+	if (catu_wait_for_ready(drvdata))
+		dev_warn(drvdata->dev, "Timeout while waiting for READY\n");
+
+	control = catu_read_control(drvdata);
+	if (control & BIT(CATU_CONTROL_ENABLE)) {
+		dev_warn(drvdata->dev, "CATU is already enabled\n");
+		return -EBUSY;
+	}
+
+	control |= BIT(CATU_CONTROL_ENABLE);
+	catu_write_mode(drvdata, CATU_MODE_PASS_THROUGH);
+	catu_write_control(drvdata, control);
+	dev_dbg(drvdata->dev, "Enabled in Pass through mode\n");
+	return 0;
+}
+
+static int catu_enable(struct coresight_device *csdev, void *data)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_enable_hw(catu_drvdata, data);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+static int catu_disable_hw(struct catu_drvdata *drvdata)
+{
+	int rc = 0;
+
+	catu_write_control(drvdata, 0);
+	if (catu_wait_for_ready(drvdata)) {
+		dev_info(drvdata->dev, "Timeout while waiting for READY\n");
+		rc = -EAGAIN;
+	}
+
+	dev_dbg(drvdata->dev, "Disabled\n");
+	return rc;
+}
+
+static int catu_disable(struct coresight_device *csdev, void *__unused)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_disable_hw(catu_drvdata);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+const struct coresight_ops_helper catu_helper_ops = {
+	.enable = catu_enable,
+	.disable = catu_disable,
+};
+
+const struct coresight_ops catu_ops = {
+	.helper_ops = &catu_helper_ops,
+};
+
+static int catu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	u32 dma_mask;
+	struct catu_drvdata *drvdata;
+	struct coresight_desc catu_desc;
+	struct coresight_platform_data *pdata = NULL;
+	struct device *dev = &adev->dev;
+	struct device_node *np = dev->of_node;
+	void __iomem *base;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			goto out;
+		}
+		dev->platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	drvdata->dev = dev;
+	dev_set_drvdata(dev, drvdata);
+	base = devm_ioremap_resource(dev, &adev->res);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto out;
+	}
+
+	/* Setup dma mask for the device */
+	dma_mask = readl_relaxed(base + CORESIGHT_DEVID) & 0x3f;
+	switch (dma_mask) {
+	case 32:
+	case 40:
+	case 44:
+	case 48:
+	case 52:
+	case 56:
+	case 64:
+		break;
+	default:
+		/* Default to the 40bits as supported by TMC-ETR */
+		dma_mask = 40;
+	}
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_mask));
+	if (ret)
+		goto out;
+
+	drvdata->base = base;
+	catu_desc.pdata = pdata;
+	catu_desc.dev = dev;
+	catu_desc.groups = catu_groups;
+	catu_desc.type = CORESIGHT_DEV_TYPE_HELPER;
+	catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU;
+	catu_desc.ops = &catu_ops;
+	drvdata->csdev = coresight_register(&catu_desc);
+	if (IS_ERR(drvdata->csdev))
+		ret = PTR_ERR(drvdata->csdev);
+out:
+	pm_runtime_put(&adev->dev);
+	return ret;
+}
+
+static struct amba_id catu_ids[] = {
+	{
+		.id	= 0x000bb9ee,
+		.mask	= 0x000fffff,
+	},
+	{},
+};
+
+static struct amba_driver catu_driver = {
+	.drv = {
+		.name			= "coresight-catu",
+		.owner			= THIS_MODULE,
+		.suppress_bind_attrs	= true,
+	},
+	.probe				= catu_probe,
+	.id_table			= catu_ids,
+};
+
+builtin_amba_driver(catu_driver);
diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h
new file mode 100644
index 000000000000..4f221fccffca
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#ifndef _CORESIGHT_CATU_H
+#define _CORESIGHT_CATU_H
+
+#include "coresight-priv.h"
+
+/* Register offset from base */
+#define CATU_CONTROL		0x000
+#define CATU_MODE		0x004
+#define CATU_AXICTRL		0x008
+#define CATU_IRQEN		0x00c
+#define CATU_SLADDRLO		0x020
+#define CATU_SLADDRHI		0x024
+#define CATU_INADDRLO		0x028
+#define CATU_INADDRHI		0x02c
+#define CATU_STATUS		0x100
+#define CATU_DEVARCH		0xfbc
+
+#define CATU_CONTROL_ENABLE	0
+
+#define CATU_MODE_PASS_THROUGH	0U
+#define CATU_MODE_TRANSLATE	1U
+
+#define CATU_STATUS_READY	8
+#define CATU_STATUS_ADRERR	0
+#define CATU_STATUS_AXIERR	4
+
+#define CATU_IRQEN_ON		0x1
+#define CATU_IRQEN_OFF		0x0
+
+struct catu_drvdata {
+	struct device *dev;
+	void __iomem *base;
+	struct coresight_device *csdev;
+	int irq;
+};
+
+#define CATU_REG32(name, offset)					\
+static inline u32							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return coresight_read_reg_pair(drvdata->base, offset, -1);	\
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u32 val)		\
+{									\
+	coresight_write_reg_pair(drvdata->base, val, offset, -1);	\
+}
+
+#define CATU_REG_PAIR(name, lo_off, hi_off)				\
+static inline u64							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return coresight_read_reg_pair(drvdata->base, lo_off, hi_off);	\
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u64 val)		\
+{									\
+	coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off);	\
+}
+
+CATU_REG32(control, CATU_CONTROL);
+CATU_REG32(mode, CATU_MODE);
+CATU_REG_PAIR(sladdr, CATU_SLADDRLO, CATU_SLADDRHI)
+CATU_REG_PAIR(inaddr, CATU_INADDRLO, CATU_INADDRHI)
+
+static inline bool coresight_is_catu_device(struct coresight_device *csdev)
+{
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return false;
+	if (csdev->type != CORESIGHT_DEV_TYPE_HELPER)
+		return false;
+	if (csdev->subtype.helper_subtype != CORESIGHT_DEV_SUBTYPE_HELPER_CATU)
+		return false;
+	return true;
+}
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 8af451230121..e37923acb725 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -9,6 +9,7 @@
 #include <linux/iommu.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include "coresight-catu.h"
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
@@ -701,6 +702,48 @@ static const struct etr_buf_operations etr_sg_buf_ops = {
 	.get_data = tmc_etr_get_data_sg_buf,
 };
 
+/*
+ * TMC ETR could be connected to a CATU device, which can provide address
+ * translation service. This is represented by the Output port of the TMC
+ * (ETR) connected to the input port of the CATU.
+ *
+ * Returns	: coresight_device ptr for the CATU device if a CATU is found.
+ *		: NULL otherwise.
+ */
+static inline struct coresight_device *
+tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
+{
+	int i;
+	struct coresight_device *tmp, *etr = drvdata->csdev;
+
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return NULL;
+
+	for (i = 0; i < etr->nr_outport; i++) {
+		tmp = etr->conns[i].child_dev;
+		if (tmp && coresight_is_catu_device(tmp))
+			return tmp;
+	}
+
+	return NULL;
+}
+
+static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
+
+	if (catu && helper_ops(catu)->enable)
+		helper_ops(catu)->enable(catu, NULL);
+}
+
+static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
+
+	if (catu && helper_ops(catu)->disable)
+		helper_ops(catu)->disable(catu, NULL);
+}
+
 static const struct etr_buf_operations *etr_buf_ops[] = {
 	[ETR_MODE_FLAT] = &etr_flat_buf_ops,
 	[ETR_MODE_ETR_SG] = &etr_sg_buf_ops,
@@ -844,6 +887,12 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
 	u32 axictl, sts;
 	struct etr_buf *etr_buf = drvdata->etr_buf;
 
+	/*
+	 * If this ETR is connected to a CATU, enable it before we turn
+	 * this on
+	 */
+	tmc_etr_enable_catu(drvdata);
+
 	CS_UNLOCK(drvdata->base);
 
 	/* Wait for TMCSReady bit to be set */
@@ -952,6 +1001,9 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 	tmc_disable_hw(drvdata);
 
 	CS_LOCK(drvdata->base);
+
+	/* Disable CATU device if this ETR is connected to one */
+	tmc_etr_disable_catu(drvdata);
 }
 
 static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 3d40a2b135f1..d828a6efe0b1 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -65,6 +65,7 @@ enum coresight_dev_subtype_source {
 
 enum coresight_dev_subtype_helper {
 	CORESIGHT_DEV_SUBTYPE_HELPER_NONE,
+	CORESIGHT_DEV_SUBTYPE_HELPER_CATU,
 };
 
 /**
-- 
cgit v1.2.3


From 571d78bd458a831cf51dff2afa1dda3309bd82b2 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:09 +0800
Subject: fpga: mgr: add region_id to fpga_image_info

This patch adds region_id to fpga_image_info data structure, it
allows driver to pass region id information to fpga-mgr via
fpga_image_info for fpga reconfiguration function.

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Moritz Fischer <mdf@kernel.org>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fpga/fpga-mgr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index eec7c2478b0d..3eb6b9d60d65 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -77,6 +77,7 @@ enum fpga_mgr_states {
  * @sgt: scatter/gather table containing FPGA image
  * @buf: contiguous buffer containing FPGA image
  * @count: size of buf
+ * @region_id: id of target region
  * @dev: device that owns this
  * @overlay: Device Tree overlay
  */
@@ -89,6 +90,7 @@ struct fpga_image_info {
 	struct sg_table *sgt;
 	const char *buf;
 	size_t count;
+	int region_id;
 	struct device *dev;
 #ifdef CONFIG_OF
 	struct device_node *overlay;
-- 
cgit v1.2.3


From ecb5fbe299dfaad778033259f35bc696fa1fb743 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:10 +0800
Subject: fpga: mgr: add status for fpga-manager

This patch adds status sysfs interface for fpga manager, it's a
read only interface which allows user to get fpga manager status,
including full/partial reconfiguration error and other status
information. It adds a status callback to fpga_manager_ops too,
allows each fpga_manager driver to define its own method to
collect latest status from hardware.

The following sysfs file is created:
* /sys/class/fpga_manager/<fpga>/status
  Return status of fpga manager, including reconfiguration errors.

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-fpga-manager | 24 +++++++++++++++++++
 drivers/fpga/fpga-mgr.c                            | 28 ++++++++++++++++++++++
 include/linux/fpga/fpga-mgr.h                      |  9 +++++++
 3 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-fpga-manager b/Documentation/ABI/testing/sysfs-class-fpga-manager
index 23056c532fdd..5284fa33d4c5 100644
--- a/Documentation/ABI/testing/sysfs-class-fpga-manager
+++ b/Documentation/ABI/testing/sysfs-class-fpga-manager
@@ -35,3 +35,27 @@ Description:	Read fpga manager state as a string.
 		* write complete	= Doing post programming steps
 		* write complete error	= Error while doing post programming
 		* operating		= FPGA is programmed and operating
+
+What:		/sys/class/fpga_manager/<fpga>/status
+Date:		June 2018
+KernelVersion:	4.19
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read fpga manager status as a string.
+		If FPGA programming operation fails, it could be caused by crc
+		error or incompatible bitstream image. The intent of this
+		interface is to provide more detailed information for FPGA
+		programming errors to userspace. This is a list of strings for
+		the supported status.
+
+		* reconfig operation error 	- invalid operations detected by
+						  reconfiguration hardware.
+						  e.g. start reconfiguration
+						  with errors not cleared
+		* reconfig CRC error		- CRC error detected by
+						  reconfiguration hardware.
+		* reconfig incompatible image	- reconfiguration image is
+						  incompatible with hardware
+		* reconfig IP protocol error	- protocol errors detected by
+						  reconfiguration hardware
+		* reconfig fifo overflow error	- FIFO overflow detected by
+						  reconfiguration hardware
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index c1564cf827fe..a41b07e37884 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -406,12 +406,40 @@ static ssize_t state_show(struct device *dev,
 	return sprintf(buf, "%s\n", state_str[mgr->state]);
 }
 
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct fpga_manager *mgr = to_fpga_manager(dev);
+	u64 status;
+	int len = 0;
+
+	if (!mgr->mops->status)
+		return -ENOENT;
+
+	status = mgr->mops->status(mgr);
+
+	if (status & FPGA_MGR_STATUS_OPERATION_ERR)
+		len += sprintf(buf + len, "reconfig operation error\n");
+	if (status & FPGA_MGR_STATUS_CRC_ERR)
+		len += sprintf(buf + len, "reconfig CRC error\n");
+	if (status & FPGA_MGR_STATUS_INCOMPATIBLE_IMAGE_ERR)
+		len += sprintf(buf + len, "reconfig incompatible image\n");
+	if (status & FPGA_MGR_STATUS_IP_PROTOCOL_ERR)
+		len += sprintf(buf + len, "reconfig IP protocol error\n");
+	if (status & FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR)
+		len += sprintf(buf + len, "reconfig fifo overflow error\n");
+
+	return len;
+}
+
 static DEVICE_ATTR_RO(name);
 static DEVICE_ATTR_RO(state);
+static DEVICE_ATTR_RO(status);
 
 static struct attribute *fpga_mgr_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_state.attr,
+	&dev_attr_status.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(fpga_mgr);
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 3eb6b9d60d65..e249b7250345 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -101,6 +101,7 @@ struct fpga_image_info {
  * struct fpga_manager_ops - ops for low level fpga manager drivers
  * @initial_header_size: Maximum number of bytes that should be passed into write_init
  * @state: returns an enum value of the FPGA's state
+ * @status: returns status of the FPGA, including reconfiguration error code
  * @write_init: prepare the FPGA to receive confuration data
  * @write: write count bytes of configuration data to the FPGA
  * @write_sg: write the scatter list of configuration data to the FPGA
@@ -115,6 +116,7 @@ struct fpga_image_info {
 struct fpga_manager_ops {
 	size_t initial_header_size;
 	enum fpga_mgr_states (*state)(struct fpga_manager *mgr);
+	u64 (*status)(struct fpga_manager *mgr);
 	int (*write_init)(struct fpga_manager *mgr,
 			  struct fpga_image_info *info,
 			  const char *buf, size_t count);
@@ -126,6 +128,13 @@ struct fpga_manager_ops {
 	const struct attribute_group **groups;
 };
 
+/* FPGA manager status: Partial/Full Reconfiguration errors */
+#define FPGA_MGR_STATUS_OPERATION_ERR		BIT(0)
+#define FPGA_MGR_STATUS_CRC_ERR			BIT(1)
+#define FPGA_MGR_STATUS_INCOMPATIBLE_IMAGE_ERR	BIT(2)
+#define FPGA_MGR_STATUS_IP_PROTOCOL_ERR		BIT(3)
+#define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR	BIT(4)
+
 /**
  * struct fpga_manager - fpga manager structure
  * @name: name of low level fpga manager
-- 
cgit v1.2.3


From 99a560bde313892f87ca81db568a829d3d205882 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:11 +0800
Subject: fpga: mgr: add compat_id support

This patch introduces compat_id support to fpga manager, it adds
a fpga_compat_id pointer to fpga manager data structure to allow
fpga manager drivers to save the compatibility id. This compat_id
could be used for compatibility checking before doing partial
reconfiguration to associated fpga regions.

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fpga/fpga-mgr.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index e249b7250345..8942e61f0028 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -135,12 +135,24 @@ struct fpga_manager_ops {
 #define FPGA_MGR_STATUS_IP_PROTOCOL_ERR		BIT(3)
 #define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR	BIT(4)
 
+/**
+ * struct fpga_compat_id - id for compatibility check
+ *
+ * @id_h: high 64bit of the compat_id
+ * @id_l: low 64bit of the compat_id
+ */
+struct fpga_compat_id {
+	u64 id_h;
+	u64 id_l;
+};
+
 /**
  * struct fpga_manager - fpga manager structure
  * @name: name of low level fpga manager
  * @dev: fpga manager device
  * @ref_mutex: only allows one reference to fpga manager
  * @state: state of fpga manager
+ * @compat_id: FPGA manager id for compatibility check.
  * @mops: pointer to struct of fpga manager ops
  * @priv: low level driver private date
  */
@@ -149,6 +161,7 @@ struct fpga_manager {
 	struct device dev;
 	struct mutex ref_mutex;
 	enum fpga_mgr_states state;
+	struct fpga_compat_id *compat_id;
 	const struct fpga_manager_ops *mops;
 	void *priv;
 };
-- 
cgit v1.2.3


From 41a8b2c56470b7e4e3e2db93324d50bbbf60cdc4 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:12 +0800
Subject: fpga: region: add compat_id support

This patch introduces a compat_id pointer member and sysfs interface
for each fpga region, similar as compat_id for fpga manager, it allows
applications to read the per region compat_id for compatibility
checking before other actions on this fpga-region (e.g. PR).

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-fpga-region |  9 +++++++++
 drivers/fpga/fpga-region.c                        | 22 ++++++++++++++++++++++
 include/linux/fpga/fpga-region.h                  |  2 ++
 3 files changed, 33 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-fpga-region

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-fpga-region b/Documentation/ABI/testing/sysfs-class-fpga-region
new file mode 100644
index 000000000000..bc7ec644acc9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-fpga-region
@@ -0,0 +1,9 @@
+What:		/sys/class/fpga_region/<region>/compat_id
+Date:		June 2018
+KernelVersion:	4.19
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	FPGA region id for compatibility check, e.g. compatibility
+		of the FPGA reconfiguration hardware and image. This value
+		is defined or calculated by the layer that is creating the
+		FPGA region. This interface returns the compat_id value or
+		just error code -ENOENT in case compat_id is not used.
diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
index 6d214d75c7be..0d65220d5ec5 100644
--- a/drivers/fpga/fpga-region.c
+++ b/drivers/fpga/fpga-region.c
@@ -158,6 +158,27 @@ err_put_region:
 }
 EXPORT_SYMBOL_GPL(fpga_region_program_fpga);
 
+static ssize_t compat_id_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct fpga_region *region = to_fpga_region(dev);
+
+	if (!region->compat_id)
+		return -ENOENT;
+
+	return sprintf(buf, "%016llx%016llx\n",
+		       (unsigned long long)region->compat_id->id_h,
+		       (unsigned long long)region->compat_id->id_l);
+}
+
+static DEVICE_ATTR_RO(compat_id);
+
+static struct attribute *fpga_region_attrs[] = {
+	&dev_attr_compat_id.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(fpga_region);
+
 /**
  * fpga_region_create - alloc and init a struct fpga_region
  * @dev: device parent
@@ -258,6 +279,7 @@ static int __init fpga_region_init(void)
 	if (IS_ERR(fpga_region_class))
 		return PTR_ERR(fpga_region_class);
 
+	fpga_region_class->dev_groups = fpga_region_groups;
 	fpga_region_class->dev_release = fpga_region_dev_release;
 
 	return 0;
diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h
index d7071cddd727..0521b7f577a4 100644
--- a/include/linux/fpga/fpga-region.h
+++ b/include/linux/fpga/fpga-region.h
@@ -14,6 +14,7 @@
  * @bridge_list: list of FPGA bridges specified in region
  * @mgr: FPGA manager
  * @info: FPGA image info
+ * @compat_id: FPGA region id for compatibility check.
  * @priv: private data
  * @get_bridges: optional function to get bridges to a list
  */
@@ -23,6 +24,7 @@ struct fpga_region {
 	struct list_head bridge_list;
 	struct fpga_manager *mgr;
 	struct fpga_image_info *info;
+	struct fpga_compat_id *compat_id;
 	void *priv;
 	int (*get_bridges)(struct fpga_region *region);
 };
-- 
cgit v1.2.3


From 5fd778915ad29184a5ff8eb82d1118f6916b79e4 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Thu, 28 Jun 2018 17:45:14 +0200
Subject: sched/sysctl: Remove unused sched_time_avg_ms sysctl

/proc/sys/kernel/sched_time_avg_ms entry is not used anywhere,
remove it.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Morten.Rasmussen@arm.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: claudio@evidence.eu.com
Cc: daniel.lezcano@linaro.org
Cc: dietmar.eggemann@arm.com
Cc: joel@joelfernandes.org
Cc: juri.lelli@redhat.com
Cc: luca.abeni@santannapisa.it
Cc: patrick.bellasi@arm.com
Cc: quentin.perret@arm.com
Cc: rjw@rjwysocki.net
Cc: valentin.schneider@arm.com
Cc: viresh.kumar@linaro.org
Link: http://lkml.kernel.org/r/1530200714-4504-12-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/sysctl.h | 1 -
 kernel/sched/core.c          | 8 --------
 kernel/sched/sched.h         | 1 -
 kernel/sysctl.c              | 8 --------
 4 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 1c1a1512ec55..913488d828cb 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -40,7 +40,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
 #ifdef CONFIG_SCHED_DEBUG
 extern __read_mostly unsigned int sysctl_sched_migration_cost;
 extern __read_mostly unsigned int sysctl_sched_nr_migrate;
-extern __read_mostly unsigned int sysctl_sched_time_avg;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a691b07390ab..ba6bb805693a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -46,14 +46,6 @@ const_debug unsigned int sysctl_sched_features =
  */
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
-/*
- * period over which we average the RT time consumption, measured
- * in ms.
- *
- * default: 1s
- */
-const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
-
 /*
  * period over which we measure -rt task CPU usage in us.
  * default: 1s
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14aac2d2de80..ebb4b3c3ece7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1713,7 +1713,6 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
 
 extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
 
-extern const_debug unsigned int sysctl_sched_time_avg;
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2d9837c0aff4..f22f76b7a138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -368,14 +368,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "sched_time_avg_ms",
-		.data		= &sysctl_sched_time_avg,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-	},
 #ifdef CONFIG_SCHEDSTATS
 	{
 		.procname	= "sched_schedstats",
-- 
cgit v1.2.3


From 788faab70d5a882693286b8d5022779559c79904 Mon Sep 17 00:00:00 2001
From: Tobias Tefke <tobias.tefke@gmail.com>
Date: Mon, 9 Jul 2018 12:57:15 +0200
Subject: perf, tools: Use correct articles in comments

Some of the comments in the perf events code use articles incorrectly,
using 'a' for words beginning with a vowel sound, where 'an' should be
used.

Signed-off-by: Tobias Tefke <tobias.tefke@tutanota.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: jolsa@redhat.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/20180709105715.22938-1-tobias.tefke@tutanota.com
[ Fix a few more perf related 'a event' typo fixes from all around the kernel and tooling tree. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/perf/core-book3s.c          |  6 +++---
 include/linux/perf_event.h               |  2 +-
 kernel/events/core.c                     | 16 ++++++++--------
 kernel/events/uprobes.c                  |  6 +++---
 tools/perf/Documentation/perf-list.txt   |  2 +-
 tools/perf/Documentation/perf-record.txt |  4 ++--
 6 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3f66fcf8ad99..19d8ab49d1bd 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1469,7 +1469,7 @@ static int collect_events(struct perf_event *group, int max_count,
 }
 
 /*
- * Add a event to the PMU.
+ * Add an event to the PMU.
  * If all events are not already frozen, then we disable and
  * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
@@ -1548,7 +1548,7 @@ nocheck:
 }
 
 /*
- * Remove a event from the PMU.
+ * Remove an event from the PMU.
  */
 static void power_pmu_del(struct perf_event *event, int ef_flags)
 {
@@ -1742,7 +1742,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 /*
  * Return 1 if we might be able to put event on a limited PMC,
  * or 0 if not.
- * A event can only go on a limited PMC if it counts something
+ * An event can only go on a limited PMC if it counts something
  * that a limited PMC can count, doesn't require interrupts, and
  * doesn't exclude any processor mode.
  */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1fa12887ec02..e6dd3a2f8ec4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -490,7 +490,7 @@ struct perf_addr_filters_head {
 };
 
 /**
- * enum perf_event_state - the states of a event
+ * enum perf_event_state - the states of an event:
  */
 enum perf_event_state {
 	PERF_EVENT_STATE_DEAD		= -4,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a4d5ac6d74af..86b31e5a5a9a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1656,7 +1656,7 @@ perf_event_groups_next(struct perf_event *event)
 				typeof(*event), group_node))
 
 /*
- * Add a event from the lists for its context.
+ * Add an event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
  */
 static void
@@ -1844,7 +1844,7 @@ static void perf_group_attach(struct perf_event *event)
 }
 
 /*
- * Remove a event from the lists for its context.
+ * Remove an event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
  */
 static void
@@ -2148,7 +2148,7 @@ static void __perf_event_disable(struct perf_event *event,
 }
 
 /*
- * Disable a event.
+ * Disable an event.
  *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
@@ -2677,7 +2677,7 @@ static void __perf_event_enable(struct perf_event *event,
 }
 
 /*
- * Enable a event.
+ * Enable an event.
  *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
@@ -2755,7 +2755,7 @@ static int __perf_event_stop(void *info)
 	 * events will refuse to restart because of rb::aux_mmap_count==0,
 	 * see comments in perf_aux_output_begin().
 	 *
-	 * Since this is happening on a event-local CPU, no trace is lost
+	 * Since this is happening on an event-local CPU, no trace is lost
 	 * while restarting.
 	 */
 	if (sd->restart)
@@ -4827,7 +4827,7 @@ __perf_read(struct perf_event *event, char __user *buf, size_t count)
 	int ret;
 
 	/*
-	 * Return end-of-file for a read on a event that is in
+	 * Return end-of-file for a read on an event that is in
 	 * error state (i.e. because it was pinned but it couldn't be
 	 * scheduled on to the CPU at some point).
 	 */
@@ -9898,7 +9898,7 @@ enabled:
 }
 
 /*
- * Allocate and initialize a event structure
+ * Allocate and initialize an event structure
  */
 static struct perf_event *
 perf_event_alloc(struct perf_event_attr *attr, int cpu,
@@ -11229,7 +11229,7 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 }
 
 /*
- * Inherit a event from parent task to child task.
+ * Inherit an event from parent task to child task.
  *
  * Returns:
  *  - valid pointer on success
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ccc579a7d32e..aed1ba569954 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -918,7 +918,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
 EXPORT_SYMBOL_GPL(uprobe_register);
 
 /*
- * uprobe_apply - unregister a already registered probe.
+ * uprobe_apply - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
  * @offset: offset from the start of the file.
  * @uc: consumer which wants to add more or remove some breakpoints
@@ -947,7 +947,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
 }
 
 /*
- * uprobe_unregister - unregister a already registered probe.
+ * uprobe_unregister - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
  * @offset: offset from the start of the file.
  * @uc: identify which probe if multiple probes are colocated.
@@ -1403,7 +1403,7 @@ static struct return_instance *free_ret_instance(struct return_instance *ri)
 
 /*
  * Called with no locks held.
- * Called in context of a exiting or a exec-ing thread.
+ * Called in context of an exiting or an exec-ing thread.
  */
 void uprobe_free_utask(struct task_struct *t)
 {
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 11300dbe35c5..14e13512c05f 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -234,7 +234,7 @@ perf also supports group leader sampling using the :S specifier.
   perf record -e '{cycles,instructions}:S' ...
   perf report --group
 
-Normally all events in a event group sample, but with :S only
+Normally all events in an event group sample, but with :S only
 the first event (the leader) samples, and it only reads the values of the
 other events in the group.
 
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 04168da4268e..246dee081efd 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -94,7 +94,7 @@ OPTIONS
 	  "perf report" to view group events together.
 
 --filter=<filter>::
-        Event filter. This option should follow a event selector (-e) which
+        Event filter. This option should follow an event selector (-e) which
 	selects either tracepoint event(s) or a hardware trace PMU
 	(e.g. Intel PT or CoreSight).
 
@@ -153,7 +153,7 @@ OPTIONS
 
 --exclude-perf::
 	Don't record events issued by perf itself. This option should follow
-	a event selector (-e) which selects tracepoint event(s). It adds a
+	an event selector (-e) which selects tracepoint event(s). It adds a
 	filter expression 'common_pid != $PERFPID' to filters. If other
 	'--filter' exists, the new filter expression will be combined with
 	them by '&&'.
-- 
cgit v1.2.3


From 3eb420e70d879ce0e6bf752accf5cdedb0a59de8 Mon Sep 17 00:00:00 2001
From: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Date: Wed, 11 Jul 2018 11:40:35 +0200
Subject: efi: Use a work queue to invoke EFI Runtime Services

Presently, when a user process requests the kernel to execute any
UEFI runtime service, the kernel temporarily switches to a separate
set of page tables that describe the virtual mapping of the UEFI
runtime services regions in memory. Since UEFI runtime services are
typically invoked with interrupts enabled, any code that may be called
during this time, will have an incorrect view of the process's address
space. Although it is unusual for code running in interrupt context to
make assumptions about the process context it runs in, there are cases
(such as the perf subsystem taking samples) where this causes problems.

So let's set up a work queue for calling UEFI runtime services, so that
the actual calls are made when the work queue items are dispatched by a
work queue worker running in a separate kernel thread. Such threads are
not expected to have userland mappings in the first place, and so the
additional mappings created for the UEFI runtime services can never
clash with any.

The ResetSystem() runtime service is not covered by the work queue
handling, since it is not expected to return, and may be called at a
time when the kernel is torn down to the point where we cannot expect
work queues to still be operational.

The non-blocking variants of SetVariable() and QueryVariableInfo()
are also excluded: these are intended to be used from atomic context,
which obviously rules out waiting for a completion to be signalled by
another thread. Note that these variants are currently only used for
UEFI runtime services calls that occur very early in the boot, and
for ones that occur in critical conditions, e.g., to flush kernel logs
to UEFI variables via efi-pstore.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
[ardb: exclude ResetSystem() from the workqueue treatment
       merge from 2 separate patches and rewrite commit log]
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180711094040.12506-4-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c              |  14 +++
 drivers/firmware/efi/runtime-wrappers.c | 202 +++++++++++++++++++++++++++++---
 include/linux/efi.h                     |   3 +
 3 files changed, 204 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 232f4915223b..1379a375dfa8 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -84,6 +84,8 @@ struct mm_struct efi_mm = {
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
 };
 
+struct workqueue_struct *efi_rts_wq;
+
 static bool disable_runtime;
 static int __init setup_noefi(char *arg)
 {
@@ -337,6 +339,18 @@ static int __init efisubsys_init(void)
 	if (!efi_enabled(EFI_BOOT))
 		return 0;
 
+	/*
+	 * Since we process only one efi_runtime_service() at a time, an
+	 * ordered workqueue (which creates only one execution context)
+	 * should suffice all our needs.
+	 */
+	efi_rts_wq = alloc_ordered_workqueue("efi_rts_wq", 0);
+	if (!efi_rts_wq) {
+		pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n");
+		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+		return 0;
+	}
+
 	/* We register the efi directory at /sys/firmware/efi */
 	efi_kobj = kobject_create_and_add("efi", firmware_kobj);
 	if (!efi_kobj) {
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index ae54870b2788..aa66cbf23512 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -1,6 +1,15 @@
 /*
  * runtime-wrappers.c - Runtime Services function call wrappers
  *
+ * Implementation summary:
+ * -----------------------
+ * 1. When user/kernel thread requests to execute efi_runtime_service(),
+ * enqueue work to efi_rts_wq.
+ * 2. Caller thread waits for completion until the work is finished
+ * because it's dependent on the return status and execution of
+ * efi_runtime_service().
+ * For instance, get_variable() and get_next_variable().
+ *
  * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * Split off from arch/x86/platform/efi/efi.c
@@ -22,6 +31,9 @@
 #include <linux/mutex.h>
 #include <linux/semaphore.h>
 #include <linux/stringify.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
 #include <asm/efi.h>
 
 /*
@@ -33,6 +45,76 @@
 #define __efi_call_virt(f, args...) \
 	__efi_call_virt_pointer(efi.systab->runtime, f, args)
 
+/* efi_runtime_service() function identifiers */
+enum efi_rts_ids {
+	GET_TIME,
+	SET_TIME,
+	GET_WAKEUP_TIME,
+	SET_WAKEUP_TIME,
+	GET_VARIABLE,
+	GET_NEXT_VARIABLE,
+	SET_VARIABLE,
+	QUERY_VARIABLE_INFO,
+	GET_NEXT_HIGH_MONO_COUNT,
+	UPDATE_CAPSULE,
+	QUERY_CAPSULE_CAPS,
+};
+
+/*
+ * efi_runtime_work:	Details of EFI Runtime Service work
+ * @arg<1-5>:		EFI Runtime Service function arguments
+ * @status:		Status of executing EFI Runtime Service
+ * @efi_rts_id:		EFI Runtime Service function identifier
+ * @efi_rts_comp:	Struct used for handling completions
+ */
+struct efi_runtime_work {
+	void *arg1;
+	void *arg2;
+	void *arg3;
+	void *arg4;
+	void *arg5;
+	efi_status_t status;
+	struct work_struct work;
+	enum efi_rts_ids efi_rts_id;
+	struct completion efi_rts_comp;
+};
+
+/*
+ * efi_queue_work:	Queue efi_runtime_service() and wait until it's done
+ * @rts:		efi_runtime_service() function identifier
+ * @rts_arg<1-5>:	efi_runtime_service() function arguments
+ *
+ * Accesses to efi_runtime_services() are serialized by a binary
+ * semaphore (efi_runtime_lock) and caller waits until the work is
+ * finished, hence _only_ one work is queued at a time and the caller
+ * thread waits for completion.
+ */
+#define efi_queue_work(_rts, _arg1, _arg2, _arg3, _arg4, _arg5)		\
+({									\
+	struct efi_runtime_work efi_rts_work;				\
+	efi_rts_work.status = EFI_ABORTED;				\
+									\
+	init_completion(&efi_rts_work.efi_rts_comp);			\
+	INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts);		\
+	efi_rts_work.arg1 = _arg1;					\
+	efi_rts_work.arg2 = _arg2;					\
+	efi_rts_work.arg3 = _arg3;					\
+	efi_rts_work.arg4 = _arg4;					\
+	efi_rts_work.arg5 = _arg5;					\
+	efi_rts_work.efi_rts_id = _rts;					\
+									\
+	/*								\
+	 * queue_work() returns 0 if work was already on queue,         \
+	 * _ideally_ this should never happen.                          \
+	 */								\
+	if (queue_work(efi_rts_wq, &efi_rts_work.work))			\
+		wait_for_completion(&efi_rts_work.efi_rts_comp);	\
+	else								\
+		pr_err("Failed to queue work to efi_rts_wq.\n");	\
+									\
+	efi_rts_work.status;						\
+})
+
 void efi_call_virt_check_flags(unsigned long flags, const char *call)
 {
 	unsigned long cur_flags, mismatch;
@@ -90,13 +172,98 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
  */
 static DEFINE_SEMAPHORE(efi_runtime_lock);
 
+/*
+ * Calls the appropriate efi_runtime_service() with the appropriate
+ * arguments.
+ *
+ * Semantics followed by efi_call_rts() to understand efi_runtime_work:
+ * 1. If argument was a pointer, recast it from void pointer to original
+ * pointer type.
+ * 2. If argument was a value, recast it from void pointer to original
+ * pointer type and dereference it.
+ */
+static void efi_call_rts(struct work_struct *work)
+{
+	struct efi_runtime_work *efi_rts_work;
+	void *arg1, *arg2, *arg3, *arg4, *arg5;
+	efi_status_t status = EFI_NOT_FOUND;
+
+	efi_rts_work = container_of(work, struct efi_runtime_work, work);
+	arg1 = efi_rts_work->arg1;
+	arg2 = efi_rts_work->arg2;
+	arg3 = efi_rts_work->arg3;
+	arg4 = efi_rts_work->arg4;
+	arg5 = efi_rts_work->arg5;
+
+	switch (efi_rts_work->efi_rts_id) {
+	case GET_TIME:
+		status = efi_call_virt(get_time, (efi_time_t *)arg1,
+				       (efi_time_cap_t *)arg2);
+		break;
+	case SET_TIME:
+		status = efi_call_virt(set_time, (efi_time_t *)arg1);
+		break;
+	case GET_WAKEUP_TIME:
+		status = efi_call_virt(get_wakeup_time, (efi_bool_t *)arg1,
+				       (efi_bool_t *)arg2, (efi_time_t *)arg3);
+		break;
+	case SET_WAKEUP_TIME:
+		status = efi_call_virt(set_wakeup_time, *(efi_bool_t *)arg1,
+				       (efi_time_t *)arg2);
+		break;
+	case GET_VARIABLE:
+		status = efi_call_virt(get_variable, (efi_char16_t *)arg1,
+				       (efi_guid_t *)arg2, (u32 *)arg3,
+				       (unsigned long *)arg4, (void *)arg5);
+		break;
+	case GET_NEXT_VARIABLE:
+		status = efi_call_virt(get_next_variable, (unsigned long *)arg1,
+				       (efi_char16_t *)arg2,
+				       (efi_guid_t *)arg3);
+		break;
+	case SET_VARIABLE:
+		status = efi_call_virt(set_variable, (efi_char16_t *)arg1,
+				       (efi_guid_t *)arg2, *(u32 *)arg3,
+				       *(unsigned long *)arg4, (void *)arg5);
+		break;
+	case QUERY_VARIABLE_INFO:
+		status = efi_call_virt(query_variable_info, *(u32 *)arg1,
+				       (u64 *)arg2, (u64 *)arg3, (u64 *)arg4);
+		break;
+	case GET_NEXT_HIGH_MONO_COUNT:
+		status = efi_call_virt(get_next_high_mono_count, (u32 *)arg1);
+		break;
+	case UPDATE_CAPSULE:
+		status = efi_call_virt(update_capsule,
+				       (efi_capsule_header_t **)arg1,
+				       *(unsigned long *)arg2,
+				       *(unsigned long *)arg3);
+		break;
+	case QUERY_CAPSULE_CAPS:
+		status = efi_call_virt(query_capsule_caps,
+				       (efi_capsule_header_t **)arg1,
+				       *(unsigned long *)arg2, (u64 *)arg3,
+				       (int *)arg4);
+		break;
+	default:
+		/*
+		 * Ideally, we should never reach here because a caller of this
+		 * function should have put the right efi_runtime_service()
+		 * function identifier into efi_rts_work->efi_rts_id
+		 */
+		pr_err("Requested executing invalid EFI Runtime Service.\n");
+	}
+	efi_rts_work->status = status;
+	complete(&efi_rts_work->efi_rts_comp);
+}
+
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
 	efi_status_t status;
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_time, tm, tc);
+	status = efi_queue_work(GET_TIME, tm, tc, NULL, NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -107,7 +274,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(set_time, tm);
+	status = efi_queue_work(SET_TIME, tm, NULL, NULL, NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -120,7 +287,8 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
+	status = efi_queue_work(GET_WAKEUP_TIME, enabled, pending, tm, NULL,
+				NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -131,7 +299,8 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(set_wakeup_time, enabled, tm);
+	status = efi_queue_work(SET_WAKEUP_TIME, &enabled, tm, NULL, NULL,
+				NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -146,8 +315,8 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_variable, name, vendor, attr, data_size,
-			       data);
+	status = efi_queue_work(GET_VARIABLE, name, vendor, attr, data_size,
+				data);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -160,7 +329,8 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_next_variable, name_size, name, vendor);
+	status = efi_queue_work(GET_NEXT_VARIABLE, name_size, name, vendor,
+				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -175,8 +345,8 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
-			       data);
+	status = efi_queue_work(SET_VARIABLE, name, vendor, &attr, &data_size,
+				data);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -210,8 +380,8 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(query_variable_info, attr, storage_space,
-			       remaining_space, max_variable_size);
+	status = efi_queue_work(QUERY_VARIABLE_INFO, &attr, storage_space,
+				remaining_space, max_variable_size, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -242,7 +412,8 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_next_high_mono_count, count);
+	status = efi_queue_work(GET_NEXT_HIGH_MONO_COUNT, count, NULL, NULL,
+				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -272,7 +443,8 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(update_capsule, capsules, count, sg_list);
+	status = efi_queue_work(UPDATE_CAPSULE, capsules, &count, &sg_list,
+				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -289,8 +461,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(query_capsule_caps, capsules, count, max_size,
-			       reset_type);
+	status = efi_queue_work(QUERY_CAPSULE_CAPS, capsules, &count,
+				max_size, reset_type, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 56add823f190..8ba0cdd244b2 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1651,4 +1651,7 @@ struct linux_efi_tpm_eventlog {
 
 extern int efi_tpm_eventlog_init(void);
 
+/* Workqueue to queue EFI Runtime Services */
+extern struct workqueue_struct *efi_rts_wq;
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From f5dcc214aae29a68b37b2b4183f7171724e7b02d Mon Sep 17 00:00:00 2001
From: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Date: Wed, 11 Jul 2018 11:40:37 +0200
Subject: efi: Remove the declaration of efi_late_init() as the function is
 unused

The following commit:

  7b0a911478c74 ("efi/x86: Move the EFI BGRT init code to early init code")

... removed the implementation and all the references to
efi_late_init() but the function is still declared at
include/linux/efi.h.

Hence, remove the unnecessary declaration.

Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180711094040.12506-6-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8ba0cdd244b2..e190652f5ef9 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -988,14 +988,12 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec64 *ts);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
 #ifdef CONFIG_X86
-extern void efi_late_init(void);
 extern void efi_free_boot_services(void);
 extern efi_status_t efi_query_variable_store(u32 attributes,
 					     unsigned long size,
 					     bool nonblocking);
 extern void efi_find_mirror(void);
 #else
-static inline void efi_late_init(void) {}
 static inline void efi_free_boot_services(void) {}
 
 static inline efi_status_t efi_query_variable_store(u32 attributes,
-- 
cgit v1.2.3


From 784abe24c903b093af04cf1a043140faa556cad7 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:35 +0300
Subject: net: Add decrypted field to skb

The decrypted bit is propogated to cloned/copied skbs.
This will be used later by the inline crypto receive side offload
of tls.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++++++-
 net/core/skbuff.c      | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7601838c2513..3ceb8dcc54da 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -630,6 +630,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
+ *	@decrypted: Decrypted SKB
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -736,7 +737,11 @@ struct sk_buff {
 				peeked:1,
 				head_frag:1,
 				xmit_more:1,
-				__unused:1; /* one bit hole */
+#ifdef CONFIG_TLS_DEVICE
+				decrypted:1;
+#else
+				__unused:1;
+#endif
 
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c4e24ac27464..cfd6c6f35f9c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -805,6 +805,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	 * It is not yet because we do not want to have a 16 bit hole
 	 */
 	new->queue_mapping = old->queue_mapping;
+#ifdef CONFIG_TLS_DEVICE
+	new->decrypted = old->decrypted;
+#endif
 
 	memcpy(&new->headers_start, &old->headers_start,
 	       offsetof(struct sk_buff, headers_end) -
@@ -865,6 +868,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head_frag);
 	C(data);
 	C(truesize);
+#ifdef CONFIG_TLS_DEVICE
+	C(decrypted);
+#endif
 	refcount_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
-- 
cgit v1.2.3


From 14136564c8ee94566945e85014019cbdb1716dca Mon Sep 17 00:00:00 2001
From: Ilya Lesokhin <ilyal@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:36 +0300
Subject: net: Add TLS RX offload feature

This patch adds a netdev feature to configure TLS RX inline crypto offload.

Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 2 ++
 net/core/ethtool.c              | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 623bb8ced060..2b2a6dce1630 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -79,6 +79,7 @@ enum {
 	NETIF_F_HW_ESP_TX_CSUM_BIT,	/* ESP with TX checksum offload */
 	NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
 	NETIF_F_HW_TLS_TX_BIT,		/* Hardware TLS TX offload */
+	NETIF_F_HW_TLS_RX_BIT,		/* Hardware TLS RX offload */
 
 	NETIF_F_GRO_HW_BIT,		/* Hardware Generic receive offload */
 	NETIF_F_HW_TLS_RECORD_BIT,	/* Offload TLS record */
@@ -151,6 +152,7 @@ enum {
 #define NETIF_F_HW_TLS_RECORD	__NETIF_F(HW_TLS_RECORD)
 #define NETIF_F_GSO_UDP_L4	__NETIF_F(GSO_UDP_L4)
 #define NETIF_F_HW_TLS_TX	__NETIF_F(HW_TLS_TX)
+#define NETIF_F_HW_TLS_RX	__NETIF_F(HW_TLS_RX)
 
 #define for_each_netdev_feature(mask_addr, bit)	\
 	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e677a20180cf..c9993c6c2fd4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -111,6 +111,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =	 "rx-udp_tunnel-port-offload",
 	[NETIF_F_HW_TLS_RECORD_BIT] =	"tls-hw-record",
 	[NETIF_F_HW_TLS_TX_BIT] =	 "tls-hw-tx-offload",
+	[NETIF_F_HW_TLS_RX_BIT] =	 "tls-hw-rx-offload",
 };
 
 static const char
-- 
cgit v1.2.3


From 16e4edc297ffc9b643b8dd3da6b0d579753ea2b3 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:37 +0300
Subject: net: Add TLS rx resync NDO

Add new netdev tls op for resynchronizing HW tls context

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4fa7f7a3f8b3..3514d67112b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -903,6 +903,8 @@ struct tlsdev_ops {
 	void (*tls_dev_del)(struct net_device *netdev,
 			    struct tls_context *ctx,
 			    enum tls_offload_ctx_dir direction);
+	void (*tls_dev_resync_rx)(struct net_device *netdev,
+				  struct sock *sk, u32 seq, u64 rcd_sn);
 };
 #endif
 
-- 
cgit v1.2.3


From ab412e1dd7db132c2abeb9385b4bf0dc8e6c5a65 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:46 +0300
Subject: net/mlx5: Accel, add TLS rx offload routines

In Innova TLS, TLS contexts are added or deleted
via a command message over the SBU connection.
The HW then sends a response message over the same connection.

Complete the implementation for Innova TLS (FPGA-based) hardware by
adding support for rx inline crypto offload.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/accel/tls.c    |  23 +++--
 .../net/ethernet/mellanox/mlx5/core/accel/tls.h    |  26 +++--
 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 113 ++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h |  18 ++--
 include/linux/mlx5/mlx5_ifc_fpga.h                 |   1 +
 5 files changed, 135 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
index 77ac19f38cbe..da7bd26368f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -37,17 +37,26 @@
 #include "mlx5_core.h"
 #include "fpga/tls.h"
 
-int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			       struct tls_crypto_info *crypto_info,
-			       u32 start_offload_tcp_sn, u32 *p_swid)
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			    struct tls_crypto_info *crypto_info,
+			    u32 start_offload_tcp_sn, u32 *p_swid,
+			    bool direction_sx)
 {
-	return mlx5_fpga_tls_add_tx_flow(mdev, flow, crypto_info,
-					 start_offload_tcp_sn, p_swid);
+	return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
+				      start_offload_tcp_sn, p_swid,
+				      direction_sx);
 }
 
-void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid)
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			     bool direction_sx)
 {
-	mlx5_fpga_tls_del_tx_flow(mdev, swid, GFP_KERNEL);
+	mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
+}
+
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			     u64 rcd_sn)
+{
+	return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
 }
 
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index 6f9c9f446ecc..2228c1083528 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -60,10 +60,14 @@ struct mlx5_ifc_tls_flow_bits {
 	u8         reserved_at_2[0x1e];
 };
 
-int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			       struct tls_crypto_info *crypto_info,
-			       u32 start_offload_tcp_sn, u32 *p_swid);
-void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid);
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			    struct tls_crypto_info *crypto_info,
+			    u32 start_offload_tcp_sn, u32 *p_swid,
+			    bool direction_sx);
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			     bool direction_sx);
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			     u64 rcd_sn);
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
 u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
 int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
@@ -71,11 +75,15 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
 
 #else
 
-static inline int
-mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			   struct tls_crypto_info *crypto_info,
-			   u32 start_offload_tcp_sn, u32 *p_swid) { return 0; }
-static inline void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid) { }
+static int
+mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			struct tls_crypto_info *crypto_info,
+			u32 start_offload_tcp_sn, u32 *p_swid,
+			bool direction_sx) { return -ENOTSUPP; }
+static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+					   bool direction_sx) { }
+static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
+					   u32 seq, u64 rcd_sn) { return 0; }
 static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
 static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
 static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index c9736238604a..5cf5f2a9d51f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -129,6 +129,7 @@ static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
 static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
 				    void *ptr)
 {
+	unsigned long flags;
 	int ret;
 
 	/* TLS metadata format is 1 byte for syndrome followed
@@ -139,9 +140,9 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
 	BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
 
 	idr_preload(GFP_KERNEL);
-	spin_lock_irq(idr_spinlock);
+	spin_lock_irqsave(idr_spinlock, flags);
 	ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
-	spin_unlock_irq(idr_spinlock);
+	spin_unlock_irqrestore(idr_spinlock, flags);
 	idr_preload_end();
 
 	return ret;
@@ -157,6 +158,13 @@ static void mlx5_fpga_tls_release_swid(struct idr *idr,
 	spin_unlock_irqrestore(idr_spinlock, flags);
 }
 
+static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
+				   struct mlx5_fpga_device *fdev,
+				   struct mlx5_fpga_dma_buf *buf, u8 status)
+{
+	kfree(buf);
+}
+
 struct mlx5_teardown_stream_context {
 	struct mlx5_fpga_tls_command_context cmd;
 	u32 swid;
@@ -178,9 +186,13 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
 			mlx5_fpga_err(fdev,
 				      "Teardown stream failed with syndrome = %d",
 				      syndrome);
-		else
+		else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
 			mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
-						   &fdev->tls->idr_spinlock,
+						   &fdev->tls->tx_idr_spinlock,
+						   ctx->swid);
+		else
+			mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
+						   &fdev->tls->rx_idr_spinlock,
 						   ctx->swid);
 	}
 	mlx5_fpga_tls_put_command_ctx(cmd);
@@ -196,6 +208,40 @@ static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
 		 MLX5_GET(tls_flow, flow, direction_sx));
 }
 
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			    u64 rcd_sn)
+{
+	struct mlx5_fpga_dma_buf *buf;
+	int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
+	void *flow;
+	void *cmd;
+	int ret;
+
+	buf = kzalloc(size, GFP_ATOMIC);
+	if (!buf)
+		return -ENOMEM;
+
+	cmd = (buf + 1);
+
+	rcu_read_lock();
+	flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+	rcu_read_unlock();
+	mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+
+	MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
+	MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
+	MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
+	MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
+
+	buf->sg[0].data = cmd;
+	buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+	buf->complete = mlx_tls_kfree_complete;
+
+	ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+
+	return ret;
+}
+
 static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 					    void *flow, u32 swid, gfp_t flags)
 {
@@ -223,14 +269,18 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 			       mlx5_fpga_tls_teardown_completion);
 }
 
-void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
-			       gfp_t flags)
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			    gfp_t flags, bool direction_sx)
 {
 	struct mlx5_fpga_tls *tls = mdev->fpga->tls;
 	void *flow;
 
 	rcu_read_lock();
-	flow = idr_find(&tls->tx_idr, swid);
+	if (direction_sx)
+		flow = idr_find(&tls->tx_idr, swid);
+	else
+		flow = idr_find(&tls->rx_idr, swid);
+
 	rcu_read_unlock();
 
 	if (!flow) {
@@ -289,9 +339,11 @@ mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
 		 * the command context because we might not have received
 		 * the tx completion yet.
 		 */
-		mlx5_fpga_tls_del_tx_flow(fdev->mdev,
-					  MLX5_GET(tls_cmd, tls_cmd, swid),
-					  GFP_ATOMIC);
+		mlx5_fpga_tls_del_flow(fdev->mdev,
+				       MLX5_GET(tls_cmd, tls_cmd, swid),
+				       GFP_ATOMIC,
+				       MLX5_GET(tls_cmd, tls_cmd,
+						direction_sx));
 	}
 
 	mlx5_fpga_tls_put_command_ctx(cmd);
@@ -415,8 +467,7 @@ int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
 	if (err)
 		goto error;
 
-	if (!(tls->caps & (MLX5_ACCEL_TLS_TX | MLX5_ACCEL_TLS_V12 |
-				 MLX5_ACCEL_TLS_AES_GCM128))) {
+	if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
 		err = -ENOTSUPP;
 		goto error;
 	}
@@ -438,7 +489,9 @@ int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
 	INIT_LIST_HEAD(&tls->pending_cmds);
 
 	idr_init(&tls->tx_idr);
-	spin_lock_init(&tls->idr_spinlock);
+	idr_init(&tls->rx_idr);
+	spin_lock_init(&tls->tx_idr_spinlock);
+	spin_lock_init(&tls->rx_idr_spinlock);
 	fdev->tls = tls;
 	return 0;
 
@@ -500,9 +553,9 @@ static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
 	return 0;
 }
 
-static int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
-				  struct tls_crypto_info *crypto_info, u32 swid,
-				  u32 tcp_sn)
+static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+				   struct tls_crypto_info *crypto_info,
+				   u32 swid, u32 tcp_sn)
 {
 	u32 caps = mlx5_fpga_tls_device_caps(mdev);
 	struct mlx5_setup_stream_context *ctx;
@@ -533,30 +586,42 @@ out:
 	return ret;
 }
 
-int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			      struct tls_crypto_info *crypto_info,
-			      u32 start_offload_tcp_sn, u32 *p_swid)
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			   struct tls_crypto_info *crypto_info,
+			   u32 start_offload_tcp_sn, u32 *p_swid,
+			   bool direction_sx)
 {
 	struct mlx5_fpga_tls *tls = mdev->fpga->tls;
 	int ret = -ENOMEM;
 	u32 swid;
 
-	ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, &tls->idr_spinlock, flow);
+	if (direction_sx)
+		ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
+					       &tls->tx_idr_spinlock, flow);
+	else
+		ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
+					       &tls->rx_idr_spinlock, flow);
+
 	if (ret < 0)
 		return ret;
 
 	swid = ret;
-	MLX5_SET(tls_flow, flow, direction_sx, 1);
+	MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
 
-	ret = mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
-				     start_offload_tcp_sn);
+	ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
+				      start_offload_tcp_sn);
 	if (ret && ret != -EINTR)
 		goto free_swid;
 
 	*p_swid = swid;
 	return 0;
 free_swid:
-	mlx5_fpga_tls_release_swid(&tls->tx_idr, &tls->idr_spinlock, swid);
+	if (direction_sx)
+		mlx5_fpga_tls_release_swid(&tls->tx_idr,
+					   &tls->tx_idr_spinlock, swid);
+	else
+		mlx5_fpga_tls_release_swid(&tls->rx_idr,
+					   &tls->rx_idr_spinlock, swid);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
index 800a214e4e49..3b2e37bf76fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
@@ -46,15 +46,18 @@ struct mlx5_fpga_tls {
 	struct mlx5_fpga_conn *conn;
 
 	struct idr tx_idr;
-	spinlock_t idr_spinlock; /* protects the IDR */
+	struct idr rx_idr;
+	spinlock_t tx_idr_spinlock; /* protects the IDR */
+	spinlock_t rx_idr_spinlock; /* protects the IDR */
 };
 
-int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			      struct tls_crypto_info *crypto_info,
-			      u32 start_offload_tcp_sn, u32 *p_swid);
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			   struct tls_crypto_info *crypto_info,
+			   u32 start_offload_tcp_sn, u32 *p_swid,
+			   bool direction_sx);
 
-void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
-			       gfp_t flags);
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			    gfp_t flags, bool direction_sx);
 
 bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
 int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
@@ -65,4 +68,7 @@ static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
 	return mdev->fpga->tls->caps;
 }
 
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			    u64 rcd_sn);
+
 #endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 64d0f40d4cc3..37e065a80a43 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -576,6 +576,7 @@ struct mlx5_ifc_fpga_ipsec_sa {
 enum fpga_tls_cmds {
 	CMD_SETUP_STREAM		= 0x1001,
 	CMD_TEARDOWN_STREAM		= 0x1002,
+	CMD_RESYNC_RX			= 0x1003,
 };
 
 #define MLX5_TLS_1_2 (0)
-- 
cgit v1.2.3


From a94a7483a91cb6a0d15a4413e8fc853f33a4e1b9 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 30 May 2018 10:11:43 +0800
Subject: mmc: core: Adjust and reuse the macro of R1_STATUS(x)

R1_STATUS(x) now is only used by ioctl_rpmb_card_status_poll(),
which checks all bits as possible. But according to the spec,
bit 17 and bit 18 should be ignored, as well bit 14 which is
reserved(must be set to 0) quoting from the spec and these rule
apply to all places checking the device status. So change
its checking from 0xFFFFE000 to 0xFFF9A000.

As a bonus, we reuse it for mmc_do_erase() as well as
mmc_switch_status_error().
(1) Currently mmc_switch_status_error() doesn't check bit 25, but
it means device is locked but not unlocked by CMD42 prior to any
operations which need check busy, which is also not allowed.
(2) mmc_do_erase() also forgot to to check bit 15, WP_ERASE_SKIP.
The spec says "Only partial address space was erased due to existing
write protected blocks.", which obviously means we should fail this I/O.
Otherwise, the partial erased data stored in nonvalatile flash violates
the data integrity from the view of I/O owner, which probably confuse
it when further used.

So reusing R1_STATUS for them not only improve the readability but also
slove real problems.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c    | 2 +-
 drivers/mmc/core/mmc_ops.c | 2 +-
 include/linux/mmc/mmc.h    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 281826d1fcca..6780c2b81050 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2078,7 +2078,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 		/* Do not retry else we can't see errors */
 		err = mmc_wait_for_cmd(card->host, &cmd, 0);
-		if (err || (cmd.resp[0] & 0xFDF92000)) {
+		if (err || R1_STATUS(cmd.resp[0])) {
 			pr_err("error %d requesting status %#x\n",
 				err, cmd.resp[0]);
 			err = -EIO;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 42d6aa89a48a..873b2aa0c155 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -417,7 +417,7 @@ static int mmc_switch_status_error(struct mmc_host *host, u32 status)
 		if (status & R1_SPI_ILLEGAL_COMMAND)
 			return -EBADMSG;
 	} else {
-		if (status & 0xFDFFA000)
+		if (R1_STATUS(status))
 			pr_warn("%s: unexpected status %#x after switch\n",
 				mmc_hostname(host), status);
 		if (status & R1_SWITCH_ERROR)
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 3ffc27aaeeaf..897a87c4c827 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -144,7 +144,7 @@ static inline bool mmc_op_multi(u32 opcode)
 #define R1_WP_ERASE_SKIP	(1 << 15)	/* sx, c */
 #define R1_CARD_ECC_DISABLED	(1 << 14)	/* sx, a */
 #define R1_ERASE_RESET		(1 << 13)	/* sr, c */
-#define R1_STATUS(x)            (x & 0xFFFFE000)
+#define R1_STATUS(x)            (x & 0xFFF9A000)
 #define R1_CURRENT_STATE(x)	((x & 0x00001E00) >> 9)	/* sx, b (4 bits) */
 #define R1_READY_FOR_DATA	(1 << 8)	/* sx, a */
 #define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
-- 
cgit v1.2.3


From ba6c7ac3a2f421635ae4446269526359d8bff721 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Mon, 18 Jun 2018 14:57:49 +0200
Subject: mmc: core: more fine-grained hooks for HS400 tuning

This adds two new HS400 tuning operations:
* hs400_downgrade
* hs400_complete

These supplement the existing HS400 operation:
* prepare_hs400_tuning

This is motivated by a requirement of Renesas SDHI for the following:
1. Disabling SCC before selecting to HS if selection of HS400 has occurred.
   This can be done in an implementation of prepare_hs400_tuning_downgrade
2. Updating registers after switching to HS400
   This can be done in an implementation of complete_hs400_tuning

If hs400_downgrade or hs400_complete are not implemented then they are not
called. Thus means there should be no affect for existing drivers as none
implemt these ops.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c   | 10 ++++++++++
 include/linux/mmc/host.h |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 4466f5de54d4..63a52379e8ab 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1169,6 +1169,10 @@ static int mmc_select_hs400(struct mmc_card *card)
 	/* Set host controller to HS timing */
 	mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
 
+	/* Prepare host to downgrade to HS timing */
+	if (host->ops->hs400_downgrade)
+		host->ops->hs400_downgrade(host);
+
 	/* Reduce frequency to HS frequency */
 	max_dtr = card->ext_csd.hs_max_dtr;
 	mmc_set_clock(host, max_dtr);
@@ -1209,6 +1213,9 @@ static int mmc_select_hs400(struct mmc_card *card)
 	if (err)
 		goto out_err;
 
+	if (host->ops->hs400_complete)
+		host->ops->hs400_complete(host);
+
 	return 0;
 
 out_err:
@@ -1256,6 +1263,9 @@ int mmc_hs400_to_hs200(struct mmc_card *card)
 
 	mmc_set_timing(host, MMC_TIMING_MMC_HS);
 
+	if (host->ops->hs400_downgrade)
+		host->ops->hs400_downgrade(host);
+
 	err = mmc_switch_status(card);
 	if (err)
 		goto out_err;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 64300a48dcce..a39e2925c84c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -146,6 +146,13 @@ struct mmc_host_ops {
 
 	/* Prepare HS400 target operating frequency depending host driver */
 	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
+
+	/* Prepare for switching from HS400 to HS200 */
+	void	(*hs400_downgrade)(struct mmc_host *host);
+
+	/* Complete selection of HS400 */
+	void	(*hs400_complete)(struct mmc_host *host);
+
 	/* Prepare enhanced strobe depending host driver */
 	void	(*hs400_enhanced_strobe)(struct mmc_host *host,
 					 struct mmc_ios *ios);
-- 
cgit v1.2.3


From db924bba47c8031188545c84bf943f4058b659d0 Mon Sep 17 00:00:00 2001
From: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>
Date: Mon, 18 Jun 2018 14:57:50 +0200
Subject: mmc: tmio: add eMMC HS400 mode support

This patch adds processing for selecting HS400 mode.

Signed-off-by: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc.h      |  6 +++++
 drivers/mmc/host/tmio_mmc_core.c | 47 ++++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/tmio.h         |  3 +++
 3 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index e7d651352dc9..5d141f79e175 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -46,6 +46,7 @@
 #define CTL_DMA_ENABLE 0xd8
 #define CTL_RESET_SD 0xe0
 #define CTL_VERSION 0xe2
+#define CTL_SDIF_MODE 0xe6
 #define CTL_SDIO_REGS 0x100
 #define CTL_CLK_AND_WAIT_CTL 0x138
 #define CTL_RESET_SDIO 0x1e0
@@ -191,6 +192,11 @@ struct tmio_mmc_host {
 	/* Tuning values: 1 for success, 0 for failure */
 	DECLARE_BITMAP(taps, BITS_PER_BYTE * sizeof(long));
 	unsigned int tap_num;
+	unsigned long tap_set;
+
+	void (*prepare_hs400_tuning)(struct tmio_mmc_host *host);
+	void (*hs400_downgrade)(struct tmio_mmc_host *host);
+	void (*hs400_complete)(struct tmio_mmc_host *host);
 
 	const struct tmio_mmc_dma_ops *dma_ops;
 };
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 308029930304..416f9e078fda 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -199,6 +199,14 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 		tmio_mmc_clk_stop(host);
 		return;
 	}
+	/*
+	 * Both HS400 and HS200/SD104 set 200MHz, but some devices need to
+	 * set 400MHz to distinguish the CPG settings in HS400.
+	 */
+	if (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 &&
+	    host->pdata->flags & TMIO_MMC_HAVE_4TAP_HS400 &&
+	    new_clock == 200000000)
+		new_clock = 400000000;
 
 	if (host->clk_update)
 		clock = host->clk_update(host, new_clock) / 512;
@@ -209,8 +217,13 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 		clock <<= 1;
 
 	/* 1/1 clock is option */
-	if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) && ((clk >> 22) & 0x1))
-		clk |= 0xff;
+	if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
+	    ((clk >> 22) & 0x1)) {
+		if (!(host->mmc->ios.timing == MMC_TIMING_MMC_HS400))
+			clk |= 0xff;
+		else
+			clk &= ~0xff;
+	}
 
 	if (host->set_clk_div)
 		host->set_clk_div(host->pdev, (clk >> 22) & 1);
@@ -1087,6 +1100,33 @@ static int tmio_multi_io_quirk(struct mmc_card *card,
 	return blk_size;
 }
 
+static int tmio_mmc_prepare_hs400_tuning(struct mmc_host *mmc,
+					 struct mmc_ios *ios)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (host->prepare_hs400_tuning)
+		host->prepare_hs400_tuning(host);
+
+	return 0;
+}
+
+static void tmio_mmc_hs400_downgrade(struct mmc_host *mmc)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (host->hs400_downgrade)
+		host->hs400_downgrade(host);
+}
+
+static void tmio_mmc_hs400_complete(struct mmc_host *mmc)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (host->hs400_complete)
+		host->hs400_complete(host);
+}
+
 static const struct mmc_host_ops tmio_mmc_ops = {
 	.request	= tmio_mmc_request,
 	.set_ios	= tmio_mmc_set_ios,
@@ -1096,6 +1136,9 @@ static const struct mmc_host_ops tmio_mmc_ops = {
 	.multi_io_quirk	= tmio_multi_io_quirk,
 	.hw_reset	= tmio_mmc_hw_reset,
 	.execute_tuning = tmio_mmc_execute_tuning,
+	.prepare_hs400_tuning = tmio_mmc_prepare_hs400_tuning,
+	.hs400_downgrade = tmio_mmc_hs400_downgrade,
+	.hs400_complete	= tmio_mmc_hs400_complete,
 };
 
 static int tmio_mmc_init_ocr(struct tmio_mmc_host *host)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 91f92215ca74..77866214ab51 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -90,6 +90,9 @@
 /* Some controllers have a CBSY bit */
 #define TMIO_MMC_HAVE_CBSY		BIT(11)
 
+/* Some controllers that support HS400 use use 4 taps while others use 8. */
+#define TMIO_MMC_HAVE_4TAP_HS400	BIT(13)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From 29772f8a73d88a5de648177d9d822055ab7d1ba6 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 26 Jun 2018 16:51:31 +0200
Subject: mmc: core: Drop the unused mmc_power_save|restore_host()

The last user of mmc_power_save|restore_host() APIs is gone, hence let's
drop them. Drop also the corresponding bus_ops callback,
->power_save|restore() as those becomes redundant.

Cc: Tony Lindgren <tony@atomide.com>
Cc: Eyal Reizer <eyalreizer@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c  | 46 ----------------------------------------------
 drivers/mmc/core/core.h  |  2 --
 drivers/mmc/core/sdio.c  |  1 -
 include/linux/mmc/host.h |  3 ---
 4 files changed, 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 6780c2b81050..50a5c340307b 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2716,52 +2716,6 @@ void mmc_stop_host(struct mmc_host *host)
 	mmc_release_host(host);
 }
 
-int mmc_power_save_host(struct mmc_host *host)
-{
-	int ret = 0;
-
-	pr_debug("%s: %s: powering down\n", mmc_hostname(host), __func__);
-
-	mmc_bus_get(host);
-
-	if (!host->bus_ops || host->bus_dead) {
-		mmc_bus_put(host);
-		return -EINVAL;
-	}
-
-	if (host->bus_ops->power_save)
-		ret = host->bus_ops->power_save(host);
-
-	mmc_bus_put(host);
-
-	mmc_power_off(host);
-
-	return ret;
-}
-EXPORT_SYMBOL(mmc_power_save_host);
-
-int mmc_power_restore_host(struct mmc_host *host)
-{
-	int ret;
-
-	pr_debug("%s: %s: powering up\n", mmc_hostname(host), __func__);
-
-	mmc_bus_get(host);
-
-	if (!host->bus_ops || host->bus_dead) {
-		mmc_bus_put(host);
-		return -EINVAL;
-	}
-
-	mmc_power_up(host, host->card->ocr);
-	ret = host->bus_ops->power_restore(host);
-
-	mmc_bus_put(host);
-
-	return ret;
-}
-EXPORT_SYMBOL(mmc_power_restore_host);
-
 #ifdef CONFIG_PM_SLEEP
 /* Do the card removal on suspend if card is assumed removeable
  * Do that in pm notifier while userspace isn't yet frozen, so we will be able
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 9d8f09ac0821..087ba68b2920 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -28,8 +28,6 @@ struct mmc_bus_ops {
 	int (*resume)(struct mmc_host *);
 	int (*runtime_suspend)(struct mmc_host *);
 	int (*runtime_resume)(struct mmc_host *);
-	int (*power_save)(struct mmc_host *);
-	int (*power_restore)(struct mmc_host *);
 	int (*alive)(struct mmc_host *);
 	int (*shutdown)(struct mmc_host *);
 	int (*hw_reset)(struct mmc_host *);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index a86490dbca70..d8e17ea6126d 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -1076,7 +1076,6 @@ static const struct mmc_bus_ops mmc_sdio_ops = {
 	.resume = mmc_sdio_resume,
 	.runtime_suspend = mmc_sdio_runtime_suspend,
 	.runtime_resume = mmc_sdio_runtime_resume,
-	.power_restore = mmc_sdio_power_restore,
 	.alive = mmc_sdio_alive,
 	.hw_reset = mmc_sdio_hw_reset,
 	.sw_reset = mmc_sdio_sw_reset,
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a39e2925c84c..beed7121c781 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -481,9 +481,6 @@ static inline void *mmc_priv(struct mmc_host *host)
 #define mmc_classdev(x)	(&(x)->class_dev)
 #define mmc_hostname(x)	(dev_name(&(x)->class_dev))
 
-int mmc_power_save_host(struct mmc_host *host);
-int mmc_power_restore_host(struct mmc_host *host);
-
 void mmc_detect_change(struct mmc_host *, unsigned long delay);
 void mmc_request_done(struct mmc_host *, struct mmc_request *);
 void mmc_command_done(struct mmc_host *host, struct mmc_request *mrq);
-- 
cgit v1.2.3


From 86f495c57f7146bc5d363d27483957928bb9d1d8 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Thu, 28 Jun 2018 10:13:29 +0200
Subject: mmc: sdhci-esdhc-imx: get rid of support_vsel

The field support_vsel is currently only used in the device tree
case. Get rid of it. No change in behavior.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c          | 8 ++------
 include/linux/platform_data/mmc-esdhc-imx.h | 2 --
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index f9aa832735e4..85fd5a8b0b6d 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1151,18 +1151,14 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
 			     &boarddata->tuning_start_tap);
 
 	if (of_find_property(np, "no-1-8-v", NULL))
-		boarddata->support_vsel = false;
-	else
-		boarddata->support_vsel = true;
+		host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
 
 	if (of_property_read_u32(np, "fsl,delay-line", &boarddata->delay_line))
 		boarddata->delay_line = 0;
 
 	mmc_of_parse_voltage(np, &host->ocr_mask);
 
-	/* sdr50 and sdr104 need work on 1.8v signal voltage */
-	if ((boarddata->support_vsel) && esdhc_is_usdhc(imx_data) &&
-	    !IS_ERR(imx_data->pins_default)) {
+	if (esdhc_is_usdhc(imx_data) && !IS_ERR(imx_data->pins_default)) {
 		imx_data->pins_100mhz = pinctrl_lookup_state(imx_data->pinctrl,
 						ESDHC_PINCTRL_STATE_100MHZ);
 		imx_data->pins_200mhz = pinctrl_lookup_state(imx_data->pinctrl,
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index 7daa78a2f342..640dec8b5b0c 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -34,7 +34,6 @@ enum cd_types {
  * @cd_gpio:	gpio for card_detect interrupt
  * @wp_type:	type of write_protect method (see wp_types enum above)
  * @cd_type:	type of card_detect method (see cd_types enum above)
- * @support_vsel:  indicate it supports 1.8v switching
  */
 
 struct esdhc_platform_data {
@@ -43,7 +42,6 @@ struct esdhc_platform_data {
 	enum wp_types wp_type;
 	enum cd_types cd_type;
 	int max_bus_width;
-	bool support_vsel;
 	unsigned int delay_line;
 	unsigned int tuning_step;       /* The delay cell steps in tuning procedure */
 	unsigned int tuning_start_tap;	/* The start delay cell point in tuning procedure */
-- 
cgit v1.2.3


From 8b0bbd956228ae87139673e5611c4c880ddb9529 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 11 Jul 2018 09:41:30 -0500
Subject: serial: sh-sci: Add support for R7S9210

Add support for a "RZ_SCIFA" which is different than a traditional
SCIFA. It looks like a normal SCIF with FIFO data, but with a
compressed address space. Also, the break out of interrupts
are different then traditinal SCIF: ERI/BRI, RXI, TXI, TEI, DRI.
The R7S9210 (RZ/A2) contains this type of SCIF.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 77 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/serial_sci.h  |  1 +
 2 files changed, 76 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 6eb65160e015..5808edfe3f7b 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -65,6 +65,7 @@ enum {
 	SCIx_RXI_IRQ,
 	SCIx_TXI_IRQ,
 	SCIx_BRI_IRQ,
+	SCIx_TEIDRI_IRQ,
 	SCIx_NR_IRQS,
 
 	SCIx_MUX_IRQ = SCIx_NR_IRQS,	/* special case */
@@ -76,6 +77,9 @@ enum {
 	((port)->irqs[SCIx_ERI_IRQ] &&	\
 	 ((port)->irqs[SCIx_RXI_IRQ] < 0))
 
+#define SCIx_TEIDRI_IRQ_EXISTS(port)		\
+	((port)->irqs[SCIx_TEIDRI_IRQ] > 0)
+
 enum SCI_CLKS {
 	SCI_FCK,		/* Functional Clock */
 	SCI_SCK,		/* Optional External Clock */
@@ -289,6 +293,33 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
+	/*
+	 * The "SCIFA" that is in RZ/T and RZ/A2.
+	 * It looks like a normal SCIF with FIFO data, but with a
+	 * compressed address space. Also, the break out of interrupts
+	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
+	 */
+	[SCIx_RZ_SCIFA_REGTYPE] = {
+		.regs = {
+			[SCSMR]		= { 0x00, 16 },
+			[SCBRR]		= { 0x02,  8 },
+			[SCSCR]		= { 0x04, 16 },
+			[SCxTDR]	= { 0x06,  8 },
+			[SCxSR]		= { 0x08, 16 },
+			[SCxRDR]	= { 0x0A,  8 },
+			[SCFCR]		= { 0x0C, 16 },
+			[SCFDR]		= { 0x0E, 16 },
+			[SCSPTR]	= { 0x10, 16 },
+			[SCLSR]		= { 0x12, 16 },
+		},
+		.fifosize = 16,
+		.overrun_reg = SCLSR,
+		.overrun_mask = SCLSR_ORER,
+		.sampling_rate_mask = SCI_SR(32),
+		.error_mask = SCIF_DEFAULT_ERROR_MASK,
+		.error_clear = SCIF_ERROR_CLEAR,
+	},
+
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -1682,11 +1713,26 @@ static irqreturn_t sci_tx_interrupt(int irq, void *ptr)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t sci_br_interrupt(int irq, void *ptr);
+
 static irqreturn_t sci_er_interrupt(int irq, void *ptr)
 {
 	struct uart_port *port = ptr;
 	struct sci_port *s = to_sci_port(port);
 
+	if (SCIx_TEIDRI_IRQ_EXISTS(s)) {
+		/* Break and Error interrupts are muxed */
+		unsigned short ssr_status = serial_port_in(port, SCxSR);
+
+		/* Break Interrupt */
+		if (ssr_status & SCxSR_BRK(port))
+			sci_br_interrupt(irq, ptr);
+
+		/* Break only? */
+		if (!(ssr_status & SCxSR_ERRORS(port)))
+			return IRQ_HANDLED;
+	}
+
 	/* Handle errors */
 	if (port->type == PORT_SCI) {
 		if (sci_handle_errors(port)) {
@@ -1826,8 +1872,31 @@ static int sci_request_irq(struct sci_port *port)
 		}
 
 		desc = sci_irq_desc + i;
-		port->irqstr[j] = kasprintf(GFP_KERNEL, "%s:%s",
-					    dev_name(up->dev), desc->desc);
+		if (SCIx_TEIDRI_IRQ_EXISTS(port)) {
+			/*
+			 * ERI and BRI are muxed, just register ERI and
+			 * ignore BRI.
+			 * TEI and DRI are muxed, but only DRI
+			 * is enabled, so use RXI handler
+			 */
+			if (i == SCIx_ERI_IRQ)
+				port->irqstr[j] = kasprintf(GFP_KERNEL,
+							    "%s:err + break",
+							    dev_name(up->dev));
+			if (i == SCIx_BRI_IRQ)
+				continue;
+			if (i == SCIx_TEIDRI_IRQ) {
+				port->irqstr[j] = kasprintf(GFP_KERNEL,
+							    "%s:tx end + rx ready",
+							    dev_name(up->dev));
+				desc = sci_irq_desc + SCIx_RXI_IRQ;
+			}
+		}
+
+		if (!port->irqstr[j])
+			port->irqstr[j] = kasprintf(GFP_KERNEL, "%s:%s",
+						    dev_name(up->dev),
+						    desc->desc);
 		if (!port->irqstr[j]) {
 			ret = -ENOMEM;
 			goto out_nomem;
@@ -3074,6 +3143,10 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
+	{
+		.compatible = "renesas,scif-r7s9210",
+		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
+	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index c0e795d95477..1c89611e0e06 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,6 +36,7 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
+	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From 3ec78790ec9a1e23fd9b6c8681a7cd3b64d3ad23 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 15 Jul 2018 10:31:28 -0700
Subject: sysfs.h: fix non-kernel-doc comment

Don't use "/**" to begin this comment block since it is not a
kernel-doc comment block.

Also adjust comment line to fit in 80 characters.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b8bfdc173ec0..8e08a90b1d15 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -91,9 +91,9 @@ struct attribute_group {
 	struct bin_attribute	**bin_attrs;
 };
 
-/**
- * Use these macros to make defining attributes easier. See include/linux/device.h
- * for examples..
+/*
+ * Use these macros to make defining attributes easier.
+ * See include/linux/device.h for examples..
  */
 
 #define SYSFS_PREALLOC 010000
-- 
cgit v1.2.3


From 726e41097920a73e4c7c33385dcc0debb1281e18 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 10 Jul 2018 10:29:10 +1000
Subject: drivers: core: Remove glue dirs from sysfs earlier

For devices with a class, we create a "glue" directory between
the parent device and the new device with the class name.

This directory is never "explicitely" removed when empty however,
this is left to the implicit sysfs removal done by kobject_release()
when the object loses its last reference via kobject_put().

This is problematic because as long as it's not been removed from
sysfs, it is still present in the class kset and in sysfs directory
structure.

The presence in the class kset exposes a use after free bug fixed
by the previous patch, but the presence in sysfs means that until
the kobject is released, which can take a while (especially with
kobject debugging), any attempt at re-creating such as binding a
new device for that class/parent pair, will result in a sysfs
duplicate file name error.

This fixes it by instead doing an explicit kobject_del() when
the glue dir is empty, by keeping track of the number of
child devices of the gluedir.

This is made easy by the fact that all glue dir operations are
done with a global mutex, and there's already a function
(cleanup_glue_dir) called in all the right places taking that
mutex that can be enhanced for this. It appears that this was
in fact the intent of the function, but the implementation was
wrong.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c     |  2 ++
 include/linux/kobject.h | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 9a6c71038616..cf8c605ec32e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1597,6 +1597,8 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
 		return;
 
 	mutex_lock(&gdp_mutex);
+	if (!kobject_has_children(glue_dir))
+		kobject_del(glue_dir);
 	kobject_put(glue_dir);
 	mutex_unlock(&gdp_mutex);
 }
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 7f6f93c3df9c..270b40515e79 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -116,6 +116,23 @@ extern void kobject_put(struct kobject *kobj);
 extern const void *kobject_namespace(struct kobject *kobj);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
+/**
+ * kobject_has_children - Returns whether a kobject has children.
+ * @kobj: the object to test
+ *
+ * This will return whether a kobject has other kobjects as children.
+ *
+ * It does NOT account for the presence of attribute files, only sub
+ * directories. It also assumes there is no concurrent addition or
+ * removal of such children, and thus relies on external locking.
+ */
+static inline bool kobject_has_children(struct kobject *kobj)
+{
+	WARN_ON_ONCE(kref_read(&kobj->kref) == 0);
+
+	return kobj->sd && kobj->sd->dir.subdirs;
+}
+
 struct kobj_type {
 	void (*release)(struct kobject *kobj);
 	const struct sysfs_ops *sysfs_ops;
-- 
cgit v1.2.3


From a4f9edb29d9c19f9f8dcd2df7ddfe4eb7ad58996 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 30 May 2018 17:29:52 +0100
Subject: irqchip/gic-v3: Expose GICD_TYPER in the rdist structure

Instead of exposing the GIC distributor IntID field in the rdist
structure that is passed to the ITS, let's replace it with a
copy of the whole GICD_TYPER register. We are going to need
some of this information at a later time.

No functionnal change.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c   | 2 +-
 drivers/irqchip/irq-gic-v3.c       | 4 ++--
 include/linux/irqchip/arm-gic-v3.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 9c5b85577053..efe6d1a6c32e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1616,7 +1616,7 @@ static int __init its_alloc_lpi_tables(void)
 {
 	phys_addr_t paddr;
 
-	lpi_id_bits = gic_rdists->id_bits;
+	lpi_id_bits = GICD_TYPER_ID_BITS(gic_rdists->gicd_typer);
 	gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
 	if (!gic_rdists->prop_page) {
 		pr_err("Failed to allocate PROPBASE\n");
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 76ea56d779a1..e214181b77b7 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -877,7 +877,7 @@ static struct irq_chip gic_eoimode1_chip = {
 	.flags			= IRQCHIP_SET_TYPE_MASKED,
 };
 
-#define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
+#define GIC_ID_NR	(1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
 
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
@@ -1091,7 +1091,7 @@ static int __init gic_init_bases(void __iomem *dist_base,
 	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
 	 */
 	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
-	gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer);
+	gic_data.rdists.gicd_typer = typer;
 	gic_irqs = GICD_TYPER_IRQS(typer);
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbb872c1b607..396cd99af02f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -576,8 +576,8 @@ struct rdists {
 		phys_addr_t	phys_base;
 	} __percpu		*rdist;
 	struct page		*prop_page;
-	int			id_bits;
 	u64			flags;
+	u32			gicd_typer;
 	bool			has_vlpis;
 	bool			has_direct_lpi;
 };
-- 
cgit v1.2.3


From 12b2905af183c931bedcab4292c81d3a415e080f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 31 May 2018 09:01:59 +0100
Subject: irqchip/gic-v3-its: Honor hypervisor enforced LPI range

A recent extension to the GIC architecture allows a hypervisor to
arbitrarily reduce the number of LPIs available to a guest, no
matter what the GIC says about the valid range of IntIDs.

Let's factor in this information when computing the number of
available LPIs

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c   | 9 +++++++++
 include/linux/irqchip/arm-gic-v3.h | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index efe6d1a6c32e..f56c84977241 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1541,8 +1541,17 @@ out:
 static int __init its_lpi_init(u32 id_bits)
 {
 	u32 lpis = (1UL << id_bits) - 8192;
+	u32 numlpis;
 	int err;
 
+	numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer);
+
+	if (numlpis > 2 && !WARN_ON(numlpis > lpis)) {
+		lpis = numlpis;
+		pr_info("ITS: Using hypervisor restricted LPI range [%u]\n",
+			lpis);
+	}
+
 	/*
 	 * Initializing the allocator is just the same as freeing the
 	 * full range of LPIs.
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 396cd99af02f..9d2ea3e907d0 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -73,6 +73,7 @@
 #define GICD_TYPER_MBIS			(1U << 16)
 
 #define GICD_TYPER_ID_BITS(typer)	((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer)	((((typer) >> 11) & 0x1f) + 1)
 #define GICD_TYPER_IRQS(typer)		((((typer) & 0x1f) + 1) * 32)
 
 #define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
-- 
cgit v1.2.3


From 0f292f023ffcc67ec49d63dcb7fe388711cbb83a Mon Sep 17 00:00:00 2001
From: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Date: Wed, 11 Jul 2018 13:37:53 +0100
Subject: ALSA: usb-audio: Add support for Processing Units in UAC3

This patch adds support for the Processig Units defined in
the UAC3 spec. The main difference with the previous specs
is the lack of on/off switches in the controls for these
units and the addiction of the new Multi Function Processing
Unit.

The current version of the UAC3 spec doesn't define any
useful controls for the new Multi Function Processing Unit
so no control will get created once this unit is parsed.

Signed-off-by: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v3.h   | 15 +++++++++++++
 include/uapi/linux/usb/audio.h | 49 ++++++++++++++++++++++++++++++++--------
 sound/usb/mixer.c              | 51 ++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 104 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h
index a710e28b5215..334bfa6dfb47 100644
--- a/include/linux/usb/audio-v3.h
+++ b/include/linux/usb/audio-v3.h
@@ -387,6 +387,12 @@ struct uac3_interrupt_data_msg {
 #define UAC3_CONNECTORS			0x0f
 #define UAC3_POWER_DOMAIN		0x10
 
+/* A.20 PROCESSING UNIT PROCESS TYPES */
+#define UAC3_PROCESS_UNDEFINED		0x00
+#define UAC3_PROCESS_UP_DOWNMIX		0x01
+#define UAC3_PROCESS_STEREO_EXTENDER	0x02
+#define UAC3_PROCESS_MULTI_FUNCTION	0x03
+
 /* A.22 AUDIO CLASS-SPECIFIC REQUEST CODES */
 /* see audio-v2.h for the rest, which is identical to v2 */
 #define UAC3_CS_REQ_INTEN			0x04
@@ -406,6 +412,15 @@ struct uac3_interrupt_data_msg {
 #define UAC3_TE_OVERFLOW			0x04
 #define UAC3_TE_LATENCY 			0x05
 
+/* A.23.10 PROCESSING UNITS CONTROL SELECTROS */
+
+/* Up/Down Mixer */
+#define UAC3_UD_MODE_SELECT			0x01
+
+/* Stereo Extender */
+#define UAC3_EXT_WIDTH_CONTROL			0x01
+
+
 /* BADD predefined Unit/Terminal values */
 #define UAC3_BADD_IT_ID1	1  /* Input Terminal ID1: bTerminalID = 1 */
 #define UAC3_BADD_FU_ID2	2  /* Feature Unit ID2: bUnitID = 2 */
diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h
index 74e520fb944f..ddc5396800aa 100644
--- a/include/uapi/linux/usb/audio.h
+++ b/include/uapi/linux/usb/audio.h
@@ -390,33 +390,64 @@ static inline __u8 uac_processing_unit_iChannelNames(struct uac_processing_unit_
 static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_descriptor *desc,
 						    int protocol)
 {
-	return (protocol == UAC_VERSION_1) ?
-		desc->baSourceID[desc->bNrInPins + 4] :
-		2; /* in UAC2, this value is constant */
+	switch (protocol) {
+	case UAC_VERSION_1:
+		return desc->baSourceID[desc->bNrInPins + 4];
+	case UAC_VERSION_2:
+		return 2; /* in UAC2, this value is constant */
+	case UAC_VERSION_3:
+		return 4; /* in UAC3, this value is constant */
+	default:
+		return 1;
+	}
 }
 
 static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc,
 						   int protocol)
 {
-	return (protocol == UAC_VERSION_1) ?
-		&desc->baSourceID[desc->bNrInPins + 5] :
-		&desc->baSourceID[desc->bNrInPins + 6];
+	switch (protocol) {
+	case UAC_VERSION_1:
+		return &desc->baSourceID[desc->bNrInPins + 5];
+	case UAC_VERSION_2:
+		return &desc->baSourceID[desc->bNrInPins + 6];
+	case UAC_VERSION_3:
+		return &desc->baSourceID[desc->bNrInPins + 2];
+	default:
+		return NULL;
+	}
 }
 
 static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc,
 						   int protocol)
 {
 	__u8 control_size = uac_processing_unit_bControlSize(desc, protocol);
-	return *(uac_processing_unit_bmControls(desc, protocol)
-			+ control_size);
+
+	switch (protocol) {
+	case UAC_VERSION_1:
+	case UAC_VERSION_2:
+	default:
+		return *(uac_processing_unit_bmControls(desc, protocol)
+			 + control_size);
+	case UAC_VERSION_3:
+		return 0; /* UAC3 does not have this field */
+	}
 }
 
 static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_descriptor *desc,
 						 int protocol)
 {
 	__u8 control_size = uac_processing_unit_bControlSize(desc, protocol);
-	return uac_processing_unit_bmControls(desc, protocol)
+
+	switch (protocol) {
+	case UAC_VERSION_1:
+	case UAC_VERSION_2:
+	default:
+		return uac_processing_unit_bmControls(desc, protocol)
 			+ control_size + 1;
+	case UAC_VERSION_3:
+		return uac_processing_unit_bmControls(desc, protocol)
+			+ control_size;
+	}
 }
 
 /* 4.5.2 Class-Specific AS Interface Descriptor */
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index bfb3484096a6..39fde49e8749 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -953,6 +953,23 @@ static int check_input_term(struct mixer_build *state, int id,
 
 				return 0;
 			}
+			case UAC3_PROCESSING_UNIT: {
+				struct uac_processing_unit_descriptor *d = p1;
+
+				if (!d->bNrInPins)
+					return -EINVAL;
+
+				/* call recursively to retrieve the channel info */
+				err = check_input_term(state, d->baSourceID[0], term);
+				if (err < 0)
+					return err;
+
+				term->type = d->bDescriptorSubtype << 16; /* virtual type */
+				term->id = id;
+				term->name = 0; /* TODO: UAC3 Class-specific strings */
+
+				return 0;
+			}
 			default:
 				return -ENODEV;
 			}
@@ -2180,6 +2197,11 @@ struct procunit_info {
 	struct procunit_value_info *values;
 };
 
+static struct procunit_value_info undefined_proc_info[] = {
+	{ 0x00, "Control Undefined", 0 },
+	{ 0 }
+};
+
 static struct procunit_value_info updown_proc_info[] = {
 	{ UAC_UD_ENABLE, "Switch", USB_MIXER_BOOLEAN },
 	{ UAC_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 },
@@ -2228,6 +2250,23 @@ static struct procunit_info procunits[] = {
 	{ UAC_PROCESS_DYN_RANGE_COMP, "DCR", dcr_proc_info },
 	{ 0 },
 };
+
+static struct procunit_value_info uac3_updown_proc_info[] = {
+	{ UAC3_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 },
+	{ 0 }
+};
+static struct procunit_value_info uac3_stereo_ext_proc_info[] = {
+	{ UAC3_EXT_WIDTH_CONTROL, "Width Control", USB_MIXER_U8 },
+	{ 0 }
+};
+
+static struct procunit_info uac3_procunits[] = {
+	{ UAC3_PROCESS_UP_DOWNMIX, "Up Down", uac3_updown_proc_info },
+	{ UAC3_PROCESS_STEREO_EXTENDER, "3D Stereo Extender", uac3_stereo_ext_proc_info },
+	{ UAC3_PROCESS_MULTI_FUNCTION, "Multi-Function", undefined_proc_info },
+	{ 0 },
+};
+
 /*
  * predefined data for extension units
  */
@@ -2388,8 +2427,16 @@ static int build_audio_procunit(struct mixer_build *state, int unitid,
 static int parse_audio_processing_unit(struct mixer_build *state, int unitid,
 				       void *raw_desc)
 {
-	return build_audio_procunit(state, unitid, raw_desc,
-				    procunits, "Processing Unit");
+	switch (state->mixer->protocol) {
+	case UAC_VERSION_1:
+	case UAC_VERSION_2:
+	default:
+		return build_audio_procunit(state, unitid, raw_desc,
+				procunits, "Processing Unit");
+	case UAC_VERSION_3:
+		return build_audio_procunit(state, unitid, raw_desc,
+				uac3_procunits, "Processing Unit");
+	}
 }
 
 static int parse_audio_extension_unit(struct mixer_build *state, int unitid,
-- 
cgit v1.2.3


From d7e5a9a50245b91f016c814b0f076f7e55cbb980 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:49:43 +0200
Subject: netfilter: utils: move nf_ip_checksum* from ipv4 to utils

allows to make nf_ip_checksum_partial static, it no longer
has an external caller.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4.h | 11 ---------
 net/ipv4/netfilter.c           | 53 ----------------------------------------
 net/netfilter/utils.c          | 55 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index b31dabfdb453..95ab5cc64422 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -23,9 +23,6 @@ struct nf_queue_entry;
 #ifdef CONFIG_INET
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 		       unsigned int dataoff, u_int8_t protocol);
-__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
-			       unsigned int dataoff, unsigned int len,
-			       u_int8_t protocol);
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		bool strict);
 int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
@@ -35,14 +32,6 @@ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 {
 	return 0;
 }
-static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
-					     unsigned int hook,
-					     unsigned int dataoff,
-					     unsigned int len,
-					     u_int8_t protocol)
-{
-	return 0;
-}
 static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
 			      struct flowi *fl, bool strict)
 {
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index e6774ccb7731..8d2e5dc9a827 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -98,59 +98,6 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
 }
 EXPORT_SYMBOL_GPL(nf_ip_reroute);
 
-__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
-			    unsigned int dataoff, u_int8_t protocol)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
-			break;
-		if ((protocol == 0 && !csum_fold(skb->csum)) ||
-		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
-				       skb->len - dataoff, protocol,
-				       skb->csum)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			break;
-		}
-		/* fall through */
-	case CHECKSUM_NONE:
-		if (protocol == 0)
-			skb->csum = 0;
-		else
-			skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-						       skb->len - dataoff,
-						       protocol, 0);
-		csum = __skb_checksum_complete(skb);
-	}
-	return csum;
-}
-EXPORT_SYMBOL(nf_ip_checksum);
-
-__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
-			       unsigned int dataoff, unsigned int len,
-			       u_int8_t protocol)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (len == skb->len - dataoff)
-			return nf_ip_checksum(skb, hook, dataoff, protocol);
-		/* fall through */
-	case CHECKSUM_NONE:
-		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
-					       skb->len - dataoff, 0);
-		skb->ip_summed = CHECKSUM_NONE;
-		return __skb_checksum_complete_head(skb, dataoff + len);
-	}
-	return csum;
-}
-EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
-
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		bool strict __always_unused)
 {
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 0b660c568156..8980c8a0fe5c 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -1,9 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_queue.h>
 
+#ifdef CONFIG_INET
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+		       unsigned int dataoff, u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if ((protocol == 0 && !csum_fold(skb->csum)) ||
+		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
+				       skb->len - dataoff, protocol,
+				       skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		/* fall through */
+	case CHECKSUM_NONE:
+		if (protocol == 0)
+			skb->csum = 0;
+		else
+			skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+						       skb->len - dataoff,
+						       protocol, 0);
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip_checksum);
+#endif
+
+static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				      unsigned int dataoff, unsigned int len,
+				      u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip_checksum(skb, hook, dataoff, protocol);
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
+					       skb->len - dataoff, 0);
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+}
+
 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
 		    unsigned int dataoff, u_int8_t protocol,
 		    unsigned short family)
-- 
cgit v1.2.3


From ebee5a50d0b7cdc576aa8081f05b86971880054d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:49:59 +0200
Subject: netfilter: utils: move nf_ip6_checksum* from ipv6 to utils

similar to previous change, this also allows to remove it
from nf_ipv6_ops and avoid the indirection.

It also removes the bogus dependency of nf_conntrack_ipv6 on ipv6 module:
ipv6 checksum functions are built into kernel even if CONFIG_IPV6=m,
but ipv6/netfilter.o isn't.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h |  5 ---
 net/ipv6/netfilter.c           | 62 ----------------------------------
 net/netfilter/utils.c          | 76 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 65 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 288c597e75b3..c0dc4dd78887 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -30,11 +30,6 @@ struct nf_ipv6_ops {
 	void (*route_input)(struct sk_buff *skb);
 	int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
 			int (*output)(struct net *, struct sock *, struct sk_buff *));
-	__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
-			    unsigned int dataoff, u_int8_t protocol);
-	__sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
-				    unsigned int dataoff, unsigned int len,
-				    u_int8_t protocol);
 	int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		     bool strict);
 	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 531d6957af36..5ae8e1c51079 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -15,7 +15,6 @@
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/xfrm.h>
-#include <net/ip6_checksum.h>
 #include <net/netfilter/nf_queue.h>
 
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
@@ -106,71 +105,10 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
 	return err;
 }
 
-__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
-			     unsigned int dataoff, u_int8_t protocol)
-{
-	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
-			break;
-		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-				     skb->len - dataoff, protocol,
-				     csum_sub(skb->csum,
-					      skb_checksum(skb, 0,
-							   dataoff, 0)))) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			break;
-		}
-		/* fall through */
-	case CHECKSUM_NONE:
-		skb->csum = ~csum_unfold(
-				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-					     skb->len - dataoff,
-					     protocol,
-					     csum_sub(0,
-						      skb_checksum(skb, 0,
-								   dataoff, 0))));
-		csum = __skb_checksum_complete(skb);
-	}
-	return csum;
-}
-EXPORT_SYMBOL(nf_ip6_checksum);
-
-static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
-				       unsigned int dataoff, unsigned int len,
-				       u_int8_t protocol)
-{
-	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	__wsum hsum;
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (len == skb->len - dataoff)
-			return nf_ip6_checksum(skb, hook, dataoff, protocol);
-		/* fall through */
-	case CHECKSUM_NONE:
-		hsum = skb_checksum(skb, 0, dataoff, 0);
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
-							 &ip6h->daddr,
-							 skb->len - dataoff,
-							 protocol,
-							 csum_sub(0, hsum)));
-		skb->ip_summed = CHECKSUM_NONE;
-		return __skb_checksum_complete_head(skb, dataoff + len);
-	}
-	return csum;
-};
-
 static const struct nf_ipv6_ops ipv6ops = {
 	.chk_addr		= ipv6_chk_addr,
 	.route_input    	= ip6_route_input,
 	.fragment		= ip6_fragment,
-	.checksum		= nf_ip6_checksum,
-	.checksum_partial	= nf_ip6_checksum_partial,
 	.route			= nf_ip6_route,
 	.reroute		= nf_ip6_reroute,
 };
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 8980c8a0fe5c..e8da9a9bba73 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -4,6 +4,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/ip6_checksum.h>
 
 #ifdef CONFIG_INET
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
@@ -59,11 +60,69 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
 	return csum;
 }
 
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				     skb->len - dataoff, protocol,
+				     csum_sub(skb->csum,
+					      skb_checksum(skb, 0,
+							   dataoff, 0)))) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = ~csum_unfold(
+				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					     skb->len - dataoff,
+					     protocol,
+					     csum_sub(0,
+						      skb_checksum(skb, 0,
+								   dataoff, 0))));
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				       unsigned int dataoff, unsigned int len,
+				       u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__wsum hsum;
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip6_checksum(skb, hook, dataoff, protocol);
+		/* fall through */
+	case CHECKSUM_NONE:
+		hsum = skb_checksum(skb, 0, dataoff, 0);
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+							 &ip6h->daddr,
+							 skb->len - dataoff,
+							 protocol,
+							 csum_sub(0, hsum)));
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+};
+
 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
-		    unsigned int dataoff, u_int8_t protocol,
+		    unsigned int dataoff, u8 protocol,
 		    unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
 	__sum16 csum = 0;
 
 	switch (family) {
@@ -71,9 +130,7 @@ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
 		csum = nf_ip_checksum(skb, hook, dataoff, protocol);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			csum = v6ops->checksum(skb, hook, dataoff, protocol);
+		csum = nf_ip6_checksum(skb, hook, dataoff, protocol);
 		break;
 	}
 
@@ -83,9 +140,8 @@ EXPORT_SYMBOL_GPL(nf_checksum);
 
 __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 			    unsigned int dataoff, unsigned int len,
-			    u_int8_t protocol, unsigned short family)
+			    u8 protocol, unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
 	__sum16 csum = 0;
 
 	switch (family) {
@@ -94,10 +150,8 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 					      protocol);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			csum = v6ops->checksum_partial(skb, hook, dataoff, len,
-						       protocol);
+		csum = nf_ip6_checksum_partial(skb, hook, dataoff, len,
+					       protocol);
 		break;
 	}
 
-- 
cgit v1.2.3


From d1b47a7c9efcf3c3384b70f6e3c8f1423b44d8c7 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Mon, 16 Jul 2018 11:16:30 -0400
Subject: mm: don't do zero_resv_unavail if memmap is not allocated

Moving zero_resv_unavail before memmap_init_zone(), caused a regression on
x86-32.

The cause is that we access struct pages before they are allocated when
CONFIG_FLAT_NODE_MEM_MAP is used.

free_area_init_nodes()
  zero_resv_unavail()
    mm_zero_struct_page(pfn_to_page(pfn)); <- struct page is not alloced
  free_area_init_node()
    if CONFIG_FLAT_NODE_MEM_MAP
      alloc_node_mem_map()
        memblock_virt_alloc_node_nopanic() <- struct page alloced here

On the other hand memblock_virt_alloc_node_nopanic() zeroes all the memory
that it returns, so we do not need to do zero_resv_unavail() here.

Fixes: e181ae0c5db9 ("mm: zero unavailable pages before memmap init")
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Matt Hart <matt@mattface.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 mm/page_alloc.c    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0fbb9ffe380..3982c83fdcbf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2132,7 +2132,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
 					struct mminit_pfnnid_cache *state);
 #endif
 
-#ifdef CONFIG_HAVE_MEMBLOCK
+#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
 void zero_resv_unavail(void);
 #else
 static inline void zero_resv_unavail(void) {}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5d800d61ddb7..a790ef4be74e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6383,7 +6383,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 	free_area_init_core(pgdat);
 }
 
-#ifdef CONFIG_HAVE_MEMBLOCK
+#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
 /*
  * Only struct pages that are backed by physical memory are zeroed and
  * initialized by going through __init_single_page(). But, there are some
@@ -6421,7 +6421,7 @@ void __paginginit zero_resv_unavail(void)
 	if (pgcnt)
 		pr_info("Reserved but unavailable: %lld pages", pgcnt);
 }
-#endif /* CONFIG_HAVE_MEMBLOCK */
+#endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 
-- 
cgit v1.2.3


From 6e2059b53f9885f202b086d7b4ca10a98926e974 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 10 Jul 2018 22:41:26 +0800
Subject: ipv4/igmp: init group mode as INCLUDE when join source group

Based on RFC3376 5.1
   If no interface
   state existed for that multicast address before the change (i.e., the
   change consisted of creating a new per-interface record), or if no
   state exists after the change (i.e., the change consisted of deleting
   a per-interface record), then the "non-existent" state is considered
   to have a filter mode of INCLUDE and an empty source list.

Which means a new multicast group should start with state IN().

Function ip_mc_join_group() works correctly for IGMP ASM(Any-Source Multicast)
mode. It adds a group with state EX() and inits crcount to mc_qrv,
so the kernel will send a TO_EX() report message after adding group.

But for IGMPv3 SSM(Source-specific multicast) JOIN_SOURCE_GROUP mode, we
split the group joining into two steps. First we join the group like ASM,
i.e. via ip_mc_join_group(). So the state changes from IN() to EX().

Then we add the source-specific address with INCLUDE mode. So the state
changes from EX() to IN(A).

Before the first step sends a group change record, we finished the second
step. So we will only send the second change record. i.e. TO_IN(A).

Regarding the RFC stands, we should actually send an ALLOW(A) message for
SSM JOIN_SOURCE_GROUP as the state should mimic the 'IN() to IN(A)'
transition.

The issue was exposed by commit a052517a8ff65 ("net/multicast: should not
send source list records when have filter mode change"). Before this change,
we used to send both ALLOW(A) and TO_IN(A). After this change we only send
TO_IN(A).

Fix it by adding a new parameter to init group mode. Also add new wrapper
functions so we don't need to change too much code.

v1 -> v2:
In my first version I only cleared the group change record. But this is not
enough. Because when a new group join, it will init as EXCLUDE and trigger
an filter mode change in ip/ip6_mc_add_src(), which will clear all source
addresses' sf_crcount. This will prevent early joined address sending state
change records if multi source addressed joined at the same time.

In v2 patch, I fixed it by directly initializing the mode to INCLUDE for SSM
JOIN_SOURCE_GROUP. I also split the original patch into two separated patches
for IPv4 and IPv6.

Fixes: a052517a8ff65 ("net/multicast: should not send source list records when have filter mode change")
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h   |  2 ++
 net/ipv4/igmp.c        | 58 ++++++++++++++++++++++++++++++++++++--------------
 net/ipv4/ip_sockglue.c |  4 ++--
 3 files changed, 46 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f8231854b5d6..119f53941c12 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -109,6 +109,8 @@ struct ip_mc_list {
 extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto);
 extern int igmp_rcv(struct sk_buff *);
 extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
+extern int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
+				unsigned int mode);
 extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
 extern void ip_mc_drop_socket(struct sock *sk);
 extern int ip_mc_source(int add, int omode, struct sock *sk,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 85b617b655bc..b3c899a630a0 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1200,13 +1200,14 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	spin_lock_bh(&im->lock);
 	if (pmc) {
 		im->interface = pmc->interface;
-		im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		im->sfmode = pmc->sfmode;
 		if (pmc->sfmode == MCAST_INCLUDE) {
 			im->tomb = pmc->tomb;
 			im->sources = pmc->sources;
 			for (psf = im->sources; psf; psf = psf->sf_next)
-				psf->sf_crcount = im->crcount;
+				psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+		} else {
+			im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		}
 		in_dev_put(pmc->interface);
 		kfree(pmc);
@@ -1288,7 +1289,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
 #endif
 }
 
-static void igmp_group_added(struct ip_mc_list *im)
+static void igmp_group_added(struct ip_mc_list *im, unsigned int mode)
 {
 	struct in_device *in_dev = im->interface;
 #ifdef CONFIG_IP_MULTICAST
@@ -1316,7 +1317,13 @@ static void igmp_group_added(struct ip_mc_list *im)
 	}
 	/* else, v3 */
 
-	im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+	/* Based on RFC3376 5.1, for newly added INCLUDE SSM, we should
+	 * not send filter-mode change record as the mode should be from
+	 * IN() to IN(A).
+	 */
+	if (mode == MCAST_EXCLUDE)
+		im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+
 	igmp_ifc_event(in_dev);
 #endif
 }
@@ -1381,8 +1388,7 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
 /*
  *	A socket has joined a multicast group on device dev.
  */
-
-void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode)
 {
 	struct ip_mc_list *im;
 #ifdef CONFIG_IP_MULTICAST
@@ -1394,7 +1400,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	for_each_pmc_rtnl(in_dev, im) {
 		if (im->multiaddr == addr) {
 			im->users++;
-			ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
+			ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
 			goto out;
 		}
 	}
@@ -1408,8 +1414,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	in_dev_hold(in_dev);
 	im->multiaddr = addr;
 	/* initial mode is (EX, empty) */
-	im->sfmode = MCAST_EXCLUDE;
-	im->sfcount[MCAST_EXCLUDE] = 1;
+	im->sfmode = mode;
+	im->sfcount[mode] = 1;
 	refcount_set(&im->refcnt, 1);
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
@@ -1426,12 +1432,17 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 #ifdef CONFIG_IP_MULTICAST
 	igmpv3_del_delrec(in_dev, im);
 #endif
-	igmp_group_added(im);
+	igmp_group_added(im, mode);
 	if (!in_dev->dead)
 		ip_rt_multicast_event(in_dev);
 out:
 	return;
 }
+
+void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+{
+	__ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
+}
 EXPORT_SYMBOL(ip_mc_inc_group);
 
 static int ip_mc_check_iphdr(struct sk_buff *skb)
@@ -1688,7 +1699,7 @@ void ip_mc_remap(struct in_device *in_dev)
 #ifdef CONFIG_IP_MULTICAST
 		igmpv3_del_delrec(in_dev, pmc);
 #endif
-		igmp_group_added(pmc);
+		igmp_group_added(pmc, pmc->sfmode);
 	}
 }
 
@@ -1751,7 +1762,7 @@ void ip_mc_up(struct in_device *in_dev)
 #ifdef CONFIG_IP_MULTICAST
 		igmpv3_del_delrec(in_dev, pmc);
 #endif
-		igmp_group_added(pmc);
+		igmp_group_added(pmc, pmc->sfmode);
 	}
 }
 
@@ -2130,8 +2141,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
 
 /* Join a multicast group
  */
-
-int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
+			      unsigned int mode)
 {
 	__be32 addr = imr->imr_multiaddr.s_addr;
 	struct ip_mc_socklist *iml, *i;
@@ -2172,15 +2183,30 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
 	memcpy(&iml->multi, imr, sizeof(*imr));
 	iml->next_rcu = inet->mc_list;
 	iml->sflist = NULL;
-	iml->sfmode = MCAST_EXCLUDE;
+	iml->sfmode = mode;
 	rcu_assign_pointer(inet->mc_list, iml);
-	ip_mc_inc_group(in_dev, addr);
+	__ip_mc_inc_group(in_dev, addr, mode);
 	err = 0;
 done:
 	return err;
 }
+
+/* Join ASM (Any-Source Multicast) group
+ */
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+{
+	return __ip_mc_join_group(sk, imr, MCAST_EXCLUDE);
+}
 EXPORT_SYMBOL(ip_mc_join_group);
 
+/* Join SSM (Source-Specific Multicast) group
+ */
+int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
+			 unsigned int mode)
+{
+	return __ip_mc_join_group(sk, imr, mode);
+}
+
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc32fdbeefa6..64c76dcf7386 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -984,7 +984,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
 			mreq.imr_address.s_addr = mreqs.imr_interface;
 			mreq.imr_ifindex = 0;
-			err = ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
 			if (err && err != -EADDRINUSE)
 				break;
 			omode = MCAST_INCLUDE;
@@ -1061,7 +1061,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			mreq.imr_multiaddr = psin->sin_addr;
 			mreq.imr_address.s_addr = 0;
 			mreq.imr_ifindex = greqs.gsr_interface;
-			err = ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
 			if (err && err != -EADDRINUSE)
 				break;
 			greqs.gsr_interface = mreq.imr_ifindex;
-- 
cgit v1.2.3


From 377179cd28cd417dcfb4396edb824533431e607e Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 13 Jul 2018 14:05:56 -0400
Subject: security: define new LSM hook named security_kernel_load_data

Differentiate between the kernel reading a file specified by userspace
from the kernel loading a buffer containing data provided by userspace.
This patch defines a new LSM hook named security_kernel_load_data().

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/linux/lsm_hooks.h |  6 ++++++
 include/linux/security.h  | 27 +++++++++++++++++++++++++++
 security/security.c       |  5 +++++
 3 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 8f1131c8dd54..a08bc2587b96 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -576,6 +576,10 @@
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
+ * @kernel_load_data:
+ *	Load data provided by userspace.
+ *	@id kernel load data identifier
+ *	Return 0 if permission is granted.
  * @kernel_read_file:
  *	Read a file specified by userspace.
  *	@file contains the file structure pointing to the file being read
@@ -1582,6 +1586,7 @@ union security_list_options {
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_load_data)(enum kernel_load_data_id id);
 	int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id);
 	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size,
 				     enum kernel_read_file_id id);
@@ -1872,6 +1877,7 @@ struct security_hook_heads {
 	struct hlist_head cred_getsecid;
 	struct hlist_head kernel_act_as;
 	struct hlist_head kernel_create_files_as;
+	struct hlist_head kernel_load_data;
 	struct hlist_head kernel_read_file;
 	struct hlist_head kernel_post_read_file;
 	struct hlist_head kernel_module_request;
diff --git a/include/linux/security.h b/include/linux/security.h
index 63030c85ee19..3410acfe139c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -159,6 +159,27 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 typedef int (*initxattrs) (struct inode *inode,
 			   const struct xattr *xattr_array, void *fs_data);
 
+
+/* Keep the kernel_load_data_id enum in sync with kernel_read_file_id */
+#define __data_id_enumify(ENUM, dummy) LOADING_ ## ENUM,
+#define __data_id_stringify(dummy, str) #str,
+
+enum kernel_load_data_id {
+	__kernel_read_file_id(__data_id_enumify)
+};
+
+static const char * const kernel_load_data_str[] = {
+	__kernel_read_file_id(__data_id_stringify)
+};
+
+static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id)
+{
+	if ((unsigned)id >= LOADING_MAX_ID)
+		return kernel_load_data_str[LOADING_UNKNOWN];
+
+	return kernel_load_data_str[id];
+}
+
 #ifdef CONFIG_SECURITY
 
 struct security_mnt_opts {
@@ -320,6 +341,7 @@ void security_cred_getsecid(const struct cred *c, u32 *secid);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
+int security_kernel_load_data(enum kernel_load_data_id id);
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id);
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 				   enum kernel_read_file_id id);
@@ -909,6 +931,11 @@ static inline int security_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
+static inline int security_kernel_load_data(enum kernel_load_data_id id)
+{
+	return 0;
+}
+
 static inline int security_kernel_read_file(struct file *file,
 					    enum kernel_read_file_id id)
 {
diff --git a/security/security.c b/security/security.c
index 68f46d849abe..c2de2f134854 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1056,6 +1056,11 @@ int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 }
 EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
 
+int security_kernel_load_data(enum kernel_load_data_id id)
+{
+	return call_int_hook(kernel_load_data, 0, id);
+}
+
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {
-- 
cgit v1.2.3


From 16c267aac86b463b1fcccd43c89f4c8e5c5c86fa Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 13 Jul 2018 14:05:58 -0400
Subject: ima: based on policy require signed kexec kernel images

The original kexec_load syscall can not verify file signatures, nor can
the kexec image be measured.  Based on policy, deny the kexec_load
syscall.

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/linux/ima.h                 |  7 +++++++
 security/integrity/ima/ima.h        |  1 +
 security/integrity/ima/ima_main.c   | 27 +++++++++++++++++++++++++++
 security/integrity/ima/ima_policy.c |  2 ++
 security/security.c                 |  7 ++++++-
 5 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e4647e0eb60..84806b54b50a 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,6 +11,7 @@
 #define _LINUX_IMA_H
 
 #include <linux/fs.h>
+#include <linux/security.h>
 #include <linux/kexec.h>
 struct linux_binprm;
 
@@ -19,6 +20,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask, int opened);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
+extern int ima_load_data(enum kernel_load_data_id id);
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 			      enum kernel_read_file_id id);
@@ -49,6 +51,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
+static inline int ima_load_data(enum kernel_load_data_id id)
+{
+	return 0;
+}
+
 static inline int ima_read_file(struct file *file, enum kernel_read_file_id id)
 {
 	return 0;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 354bb5716ce3..78c15264b17b 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -232,6 +232,7 @@ int ima_policy_show(struct seq_file *m, void *v);
 #define IMA_APPRAISE_MODULES	0x08
 #define IMA_APPRAISE_FIRMWARE	0x10
 #define IMA_APPRAISE_POLICY	0x20
+#define IMA_APPRAISE_KEXEC	0x40
 
 #ifdef CONFIG_IMA_APPRAISE
 int ima_appraise_measurement(enum ima_hooks func,
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index dca44cf7838e..71fecfef0939 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -496,6 +496,33 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 				   MAY_READ, func, 0);
 }
 
+/**
+ * ima_load_data - appraise decision based on policy
+ * @id: kernel load data caller identifier
+ *
+ * Callers of this LSM hook can not measure, appraise, or audit the
+ * data provided by userspace.  Enforce policy rules requring a file
+ * signature (eg. kexec'ed kernel image).
+ *
+ * For permission return 0, otherwise return -EACCES.
+ */
+int ima_load_data(enum kernel_load_data_id id)
+{
+	if ((ima_appraise & IMA_APPRAISE_ENFORCE) != IMA_APPRAISE_ENFORCE)
+		return 0;
+
+	switch (id) {
+	case LOADING_KEXEC_IMAGE:
+		if (ima_appraise & IMA_APPRAISE_KEXEC) {
+			pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
+			return -EACCES;	/* INTEGRITY_UNKNOWN */
+		}
+	default:
+		break;
+	}
+	return 0;
+}
+
 static int __init init_ima(void)
 {
 	int error;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index cdcc9a7b4e24..d5b4958decc5 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -448,6 +448,8 @@ static int ima_appraise_flag(enum ima_hooks func)
 		return IMA_APPRAISE_FIRMWARE;
 	else if (func == POLICY_CHECK)
 		return IMA_APPRAISE_POLICY;
+	else if (func == KEXEC_KERNEL_CHECK)
+		return IMA_APPRAISE_KEXEC;
 	return 0;
 }
 
diff --git a/security/security.c b/security/security.c
index c2de2f134854..4927e7cc7d96 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1058,7 +1058,12 @@ EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
 
 int security_kernel_load_data(enum kernel_load_data_id id)
 {
-	return call_int_hook(kernel_load_data, 0, id);
+	int ret;
+
+	ret = call_int_hook(kernel_load_data, 0, id);
+	if (ret)
+		return ret;
+	return ima_load_data(id);
 }
 
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
-- 
cgit v1.2.3


From 2b9672ddb6f347467d7b33b86c5dfc4d5c0501a8 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 12 Jul 2018 21:32:53 +0200
Subject: net: phy: add phy_speed_down and phy_speed_up

Some network drivers include functionality to speed down the PHY when
suspending and just waiting for a WoL packet because this saves energy.
This functionality is quite generic, therefore let's factor it out to
phylib.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h   |  2 ++
 2 files changed, 80 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c4aa360dedff..d2baedc4ea91 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -551,6 +551,84 @@ int phy_start_aneg(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_start_aneg);
 
+static int phy_poll_aneg_done(struct phy_device *phydev)
+{
+	unsigned int retries = 100;
+	int ret;
+
+	do {
+		msleep(100);
+		ret = phy_aneg_done(phydev);
+	} while (!ret && --retries);
+
+	if (!ret)
+		return -ETIMEDOUT;
+
+	return ret < 0 ? ret : 0;
+}
+
+/**
+ * phy_speed_down - set speed to lowest speed supported by both link partners
+ * @phydev: the phy_device struct
+ * @sync: perform action synchronously
+ *
+ * Description: Typically used to save energy when waiting for a WoL packet
+ *
+ * WARNING: Setting sync to false may cause the system being unable to suspend
+ * in case the PHY generates an interrupt when finishing the autonegotiation.
+ * This interrupt may wake up the system immediately after suspend.
+ * Therefore use sync = false only if you're sure it's safe with the respective
+ * network chip.
+ */
+int phy_speed_down(struct phy_device *phydev, bool sync)
+{
+	u32 adv = phydev->lp_advertising & phydev->supported;
+	u32 adv_old = phydev->advertising;
+	int ret;
+
+	if (phydev->autoneg != AUTONEG_ENABLE)
+		return 0;
+
+	if (adv & PHY_10BT_FEATURES)
+		phydev->advertising &= ~(PHY_100BT_FEATURES |
+					 PHY_1000BT_FEATURES);
+	else if (adv & PHY_100BT_FEATURES)
+		phydev->advertising &= ~PHY_1000BT_FEATURES;
+
+	if (phydev->advertising == adv_old)
+		return 0;
+
+	ret = phy_config_aneg(phydev);
+	if (ret)
+		return ret;
+
+	return sync ? phy_poll_aneg_done(phydev) : 0;
+}
+EXPORT_SYMBOL_GPL(phy_speed_down);
+
+/**
+ * phy_speed_up - (re)set advertised speeds to all supported speeds
+ * @phydev: the phy_device struct
+ *
+ * Description: Used to revert the effect of phy_speed_down
+ */
+int phy_speed_up(struct phy_device *phydev)
+{
+	u32 mask = PHY_10BT_FEATURES | PHY_100BT_FEATURES | PHY_1000BT_FEATURES;
+	u32 adv_old = phydev->advertising;
+
+	if (phydev->autoneg != AUTONEG_ENABLE)
+		return 0;
+
+	phydev->advertising = (adv_old & ~mask) | (phydev->supported & mask);
+
+	if (phydev->advertising == adv_old)
+		return 0;
+
+	return phy_config_aneg(phydev);
+}
+EXPORT_SYMBOL_GPL(phy_speed_up);
+
 /**
  * phy_start_machine - start PHY state machine tracking
  * @phydev: the phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6cd09098427c..075c2f770d3e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -942,6 +942,8 @@ void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
 int phy_aneg_done(struct phy_device *phydev);
+int phy_speed_down(struct phy_device *phydev, bool sync);
+int phy_speed_up(struct phy_device *phydev);
 
 int phy_stop_interrupts(struct phy_device *phydev);
 int phy_restart_aneg(struct phy_device *phydev);
-- 
cgit v1.2.3


From d9f37d01e294e5338aa3e9d3b2eda61b59b619df Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 13 Jul 2018 14:41:36 +0800
Subject: net: convert gro_count to bitmask

gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
flows, gro_hash may be not fully used, so it is unnecessary to iterate
all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.

convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
represents a element of gro_hash, only flush a gro_hash element if the
related bit is set, to speed up napi_gro_flush().

and update gro_bitmask only if it will be changed, to reduce cache
update

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Cc: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 +++++++--
 net/core/dev.c            | 36 ++++++++++++++++++++++++------------
 2 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3514d67112b3..c1295c7a452e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -308,9 +308,14 @@ struct gro_list {
 };
 
 /*
- * Structure for NAPI scheduling similar to tasklet but with weighting
+ * size of gro hash buckets, must less than bit number of
+ * napi_struct::gro_bitmask
  */
 #define GRO_HASH_BUCKETS	8
+
+/*
+ * Structure for NAPI scheduling similar to tasklet but with weighting
+ */
 struct napi_struct {
 	/* The poll_list must only be managed by the entity which
 	 * changes the state of the NAPI_STATE_SCHED bit.  This means
@@ -322,7 +327,7 @@ struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
-	unsigned int		gro_count;
+	unsigned long		gro_bitmask;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
 	int			poll_owner;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0df1771a12f9..c883b17ee0fe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5282,9 +5282,11 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 		list_del(&skb->list);
 		skb->next = NULL;
 		napi_gro_complete(skb);
-		napi->gro_count--;
 		napi->gro_hash[index].count--;
 	}
+
+	if (!napi->gro_hash[index].count)
+		__clear_bit(index, &napi->gro_bitmask);
 }
 
 /* napi->gro_hash[].list contains packets ordered by age.
@@ -5295,8 +5297,10 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
 	u32 i;
 
-	for (i = 0; i < GRO_HASH_BUCKETS; i++)
-		__napi_gro_flush_chain(napi, i, flush_old);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		if (test_bit(i, &napi->gro_bitmask))
+			__napi_gro_flush_chain(napi, i, flush_old);
+	}
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
@@ -5388,8 +5392,8 @@ static void gro_flush_oldest(struct list_head *head)
 	if (WARN_ON_ONCE(!oldest))
 		return;
 
-	/* Do not adjust napi->gro_count, caller is adding a new SKB to
-	 * the chain.
+	/* Do not adjust napi->gro_hash[].count, caller is adding a new
+	 * SKB to the chain.
 	 */
 	list_del(&oldest->list);
 	napi_gro_complete(oldest);
@@ -5464,7 +5468,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		list_del(&pp->list);
 		pp->next = NULL;
 		napi_gro_complete(pp);
-		napi->gro_count--;
 		napi->gro_hash[hash].count--;
 	}
 
@@ -5477,7 +5480,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
 		gro_flush_oldest(gro_head);
 	} else {
-		napi->gro_count++;
 		napi->gro_hash[hash].count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
@@ -5492,6 +5494,13 @@ pull:
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
+	if (napi->gro_hash[hash].count) {
+		if (!test_bit(hash, &napi->gro_bitmask))
+			__set_bit(hash, &napi->gro_bitmask);
+	} else if (test_bit(hash, &napi->gro_bitmask)) {
+		__clear_bit(hash, &napi->gro_bitmask);
+	}
+
 	return ret;
 
 normal:
@@ -5890,7 +5899,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -6099,7 +6108,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_count && !napi_disable_pending(napi) &&
+	if (napi->gro_bitmask && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -6114,7 +6123,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	INIT_LIST_HEAD(&napi->poll_list);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 		INIT_LIST_HEAD(&napi->gro_hash[i].list);
 		napi->gro_hash[i].count = 0;
@@ -6174,7 +6183,7 @@ void netif_napi_del(struct napi_struct *napi)
 	napi_free_frags(napi);
 
 	flush_gro_hash(napi);
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 
@@ -6216,7 +6225,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
@@ -9272,6 +9281,9 @@ static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
+	BUILD_BUG_ON(GRO_HASH_BUCKETS >
+			FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+
 	if (net != &init_net)
 		INIT_LIST_HEAD(&net->dev_base_head);
 
-- 
cgit v1.2.3


From c133459765fae249ba482f62e12f987aec4376f0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 13 Jul 2018 21:25:19 -0700
Subject: net/ethernet/freescale/fman: fix cross-build error

  CC [M]  drivers/net/ethernet/freescale/fman/fman.o
In file included from ../drivers/net/ethernet/freescale/fman/fman.c:35:
../include/linux/fsl/guts.h: In function 'guts_set_dmacr':
../include/linux/fsl/guts.h:165:2: error: implicit declaration of function 'clrsetbits_be32' [-Werror=implicit-function-declaration]
  clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift);
  ^~~~~~~~~~~~~~~

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Madalin Bucur <madalin.bucur@nxp.com>
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fsl/guts.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h
index 3efa3b861d44..941b11811f85 100644
--- a/include/linux/fsl/guts.h
+++ b/include/linux/fsl/guts.h
@@ -16,6 +16,7 @@
 #define __FSL_GUTS_H__
 
 #include <linux/types.h>
+#include <linux/io.h>
 
 /**
  * Global Utility Registers.
-- 
cgit v1.2.3


From 3d85b2703783636366560c94842affd8608ec9d1 Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Mon, 16 Jul 2018 11:06:02 -0700
Subject: locking/spinlock, sched/core: Clarify requirements for
 smp_mb__after_spinlock()

There are 11 interpretations of the requirements described in the header
comment for smp_mb__after_spinlock(): one for each LKMM maintainer, and
one currently encoded in the Cat file. Stick to the latter (until a more
satisfactory solution is available).

This also reworks some snippets related to the barrier to illustrate the
requirements and to link them to the idioms which are relied upon at its
call sites.

Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: akiyks@gmail.com
Cc: dhowells@redhat.com
Cc: j.alglave@ucl.ac.uk
Cc: linux-arch@vger.kernel.org
Cc: luc.maranget@inria.fr
Cc: npiggin@gmail.com
Cc: parri.andrea@gmail.com
Cc: stern@rowland.harvard.edu
Link: http://lkml.kernel.org/r/20180716180605.16115-11-paulmck@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/spinlock.h | 53 ++++++++++++++++++++++++++++++++----------------
 kernel/sched/core.c      | 41 +++++++++++++++++++------------------
 2 files changed, 57 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index fd57888d4942..3190997df9ca 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -114,29 +114,48 @@ do {								\
 #endif /*arch_spin_is_contended*/
 
 /*
- * This barrier must provide two things:
+ * smp_mb__after_spinlock() provides the equivalent of a full memory barrier
+ * between program-order earlier lock acquisitions and program-order later
+ * memory accesses.
  *
- *   - it must guarantee a STORE before the spin_lock() is ordered against a
- *     LOAD after it, see the comments at its two usage sites.
+ * This guarantees that the following two properties hold:
  *
- *   - it must ensure the critical section is RCsc.
+ *   1) Given the snippet:
  *
- * The latter is important for cases where we observe values written by other
- * CPUs in spin-loops, without barriers, while being subject to scheduling.
+ *	  { X = 0;  Y = 0; }
  *
- * CPU0			CPU1			CPU2
+ *	  CPU0				CPU1
  *
- *			for (;;) {
- *			  if (READ_ONCE(X))
- *			    break;
- *			}
- * X=1
- *			<sched-out>
- *						<sched-in>
- *						r = X;
+ *	  WRITE_ONCE(X, 1);		WRITE_ONCE(Y, 1);
+ *	  spin_lock(S);			smp_mb();
+ *	  smp_mb__after_spinlock();	r1 = READ_ONCE(X);
+ *	  r0 = READ_ONCE(Y);
+ *	  spin_unlock(S);
  *
- * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
- * we get migrated and CPU2 sees X==0.
+ *      it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0)
+ *      and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments
+ *      preceding the call to smp_mb__after_spinlock() in __schedule() and in
+ *      try_to_wake_up().
+ *
+ *   2) Given the snippet:
+ *
+ *  { X = 0;  Y = 0; }
+ *
+ *  CPU0		CPU1				CPU2
+ *
+ *  spin_lock(S);	spin_lock(S);			r1 = READ_ONCE(Y);
+ *  WRITE_ONCE(X, 1);	smp_mb__after_spinlock();	smp_rmb();
+ *  spin_unlock(S);	r0 = READ_ONCE(X);		r2 = READ_ONCE(X);
+ *			WRITE_ONCE(Y, 1);
+ *			spin_unlock(S);
+ *
+ *      it is forbidden that CPU0's critical section executes before CPU1's
+ *      critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1)
+ *      and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments
+ *      preceding the calls to smp_rmb() in try_to_wake_up() for similar
+ *      snippets but "projected" onto two CPUs.
+ *
+ * Property (2) upgrades the lock to an RCsc lock.
  *
  * Since most load-store architectures implement ACQUIRE with an smp_mb() after
  * the LL/SC loop, they need no further barriers. Similarly all our TSO
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..0c5ec2abdf93 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1998,21 +1998,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
 	 * in smp_cond_load_acquire() below.
 	 *
-	 * sched_ttwu_pending()                 try_to_wake_up()
-	 *   [S] p->on_rq = 1;                  [L] P->state
-	 *       UNLOCK rq->lock  -----.
-	 *                              \
-	 *				 +---   RMB
-	 * schedule()                   /
-	 *       LOCK rq->lock    -----'
-	 *       UNLOCK rq->lock
+	 * sched_ttwu_pending()			try_to_wake_up()
+	 *   STORE p->on_rq = 1			  LOAD p->state
+	 *   UNLOCK rq->lock
+	 *
+	 * __schedule() (switch to task 'p')
+	 *   LOCK rq->lock			  smp_rmb();
+	 *   smp_mb__after_spinlock();
+	 *   UNLOCK rq->lock
 	 *
 	 * [task p]
-	 *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
 	 *
-	 * Pairs with the UNLOCK+LOCK on rq->lock from the
-	 * last wakeup of our task and the schedule that got our task
-	 * current.
+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+	 * __schedule().  See the comment for smp_mb__after_spinlock().
 	 */
 	smp_rmb();
 	if (p->on_rq && ttwu_remote(p, wake_flags))
@@ -2026,15 +2025,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 * One must be running (->on_cpu == 1) in order to remove oneself
 	 * from the runqueue.
 	 *
-	 *  [S] ->on_cpu = 1;	[L] ->on_rq
-	 *      UNLOCK rq->lock
-	 *			RMB
-	 *      LOCK   rq->lock
-	 *  [S] ->on_rq = 0;    [L] ->on_cpu
+	 * __schedule() (switch to task 'p')	try_to_wake_up()
+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
+	 *   UNLOCK rq->lock
+	 *
+	 * __schedule() (put 'p' to sleep)
+	 *   LOCK rq->lock			  smp_rmb();
+	 *   smp_mb__after_spinlock();
+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
 	 *
-	 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
-	 * from the consecutive calls to schedule(); the first switching to our
-	 * task, the second putting it to sleep.
+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+	 * __schedule().  See the comment for smp_mb__after_spinlock().
 	 */
 	smp_rmb();
 
-- 
cgit v1.2.3


From 7696f9910a9a40b8a952f57d3428515fabd2d889 Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Mon, 16 Jul 2018 11:06:03 -0700
Subject: sched/Documentation: Update wake_up() & co. memory-barrier guarantees

Both the implementation and the users' expectation [1] for the various
wakeup primitives have evolved over time, but the documentation has not
kept up with these changes: brings it into 2018.

[1] http://lkml.kernel.org/r/20180424091510.GB4064@hirez.programming.kicks-ass.net

Also applied feedback from Alan Stern.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Akira Yokosawa <akiyks@gmail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Daniel Lustig <dlustig@nvidia.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Jade Alglave <j.alglave@ucl.ac.uk>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luc Maranget <luc.maranget@inria.fr>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: parri.andrea@gmail.com
Link: http://lkml.kernel.org/r/20180716180605.16115-12-paulmck@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/memory-barriers.txt | 43 ++++++++++++++++++++++++---------------
 include/linux/sched.h             |  4 ++--
 kernel/sched/completion.c         |  8 ++++----
 kernel/sched/core.c               | 30 +++++++++++----------------
 kernel/sched/wait.c               |  8 ++++----
 5 files changed, 49 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index a02d6bbfc9d0..0d8d7ef131e9 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2179,32 +2179,41 @@ or:
 	event_indicated = 1;
 	wake_up_process(event_daemon);
 
-A write memory barrier is implied by wake_up() and co.  if and only if they
-wake something up.  The barrier occurs before the task state is cleared, and so
-sits between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+A general memory barrier is executed by wake_up() if it wakes something up.
+If it doesn't wake anything up then a memory barrier may or may not be
+executed; you must not rely on it.  The barrier occurs before the task state
+is accessed, in particular, it sits between the STORE to indicate the event
+and the STORE to set TASK_RUNNING:
 
-	CPU 1				CPU 2
+	CPU 1 (Sleeper)			CPU 2 (Waker)
 	===============================	===============================
 	set_current_state();		STORE event_indicated
 	  smp_store_mb();		wake_up();
-	    STORE current->state	  <write barrier>
-	    <general barrier>		  STORE current->state
-	LOAD event_indicated
+	    STORE current->state	  ...
+	    <general barrier>		  <general barrier>
+	LOAD event_indicated		  if ((LOAD task->state) & TASK_NORMAL)
+					    STORE task->state
 
-To repeat, this write memory barrier is present if and only if something
-is actually awakened.  To see this, consider the following sequence of
-events, where X and Y are both initially zero:
+where "task" is the thread being woken up and it equals CPU 1's "current".
+
+To repeat, a general memory barrier is guaranteed to be executed by wake_up()
+if something is actually awakened, but otherwise there is no such guarantee.
+To see this, consider the following sequence of events, where X and Y are both
+initially zero:
 
 	CPU 1				CPU 2
 	===============================	===============================
-	X = 1;				STORE event_indicated
+	X = 1;				Y = 1;
 	smp_mb();			wake_up();
-	Y = 1;				wait_event(wq, Y == 1);
-	wake_up();			  load from Y sees 1, no memory barrier
-					load from X might see 0
+	LOAD Y				LOAD X
+
+If a wakeup does occur, one (at least) of the two loads must see 1.  If, on
+the other hand, a wakeup does not occur, both loads might see 0.
 
-In contrast, if a wakeup does occur, CPU 2's load from X would be guaranteed
-to see 1.
+wake_up_process() always executes a general memory barrier.  The barrier again
+occurs before the task state is accessed.  In particular, if the wake_up() in
+the previous snippet were replaced by a call to wake_up_process() then one of
+the two loads would be guaranteed to see 1.
 
 The available waker functions include:
 
@@ -2224,6 +2233,8 @@ The available waker functions include:
 	wake_up_poll();
 	wake_up_process();
 
+In terms of memory ordering, these functions all provide the same guarantees of
+a wake_up() (or stronger).
 
 [!] Note that the memory barriers implied by the sleeper and the waker do _not_
 order multiple stores before the wake-up with respect to loads of those stored
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..05cd419f962d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -167,8 +167,8 @@ struct task_group;
  *   need_sleep = false;
  *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
  *
- * Where wake_up_state() (and all other wakeup primitives) imply enough
- * barriers to order the store of the variable against wakeup.
+ * where wake_up_state() executes a full memory barrier before accessing the
+ * task state.
  *
  * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
  * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index e426b0cb9ac6..a1ad5b7d5521 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -22,8 +22,8 @@
  *
  * See also complete_all(), wait_for_completion() and related routines.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  */
 void complete(struct completion *x)
 {
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(complete);
  *
  * This will wake up all threads waiting on this particular completion event.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  *
  * Since complete_all() sets the completion of @x permanently to done
  * to allow multiple waiters to finish, a call to reinit_completion()
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0c5ec2abdf93..a0065c84e73f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -412,8 +412,8 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 	 * its already queued (either by us or someone else) and will get the
 	 * wakeup due to that.
 	 *
-	 * This cmpxchg() implies a full barrier, which pairs with the write
-	 * barrier implied by the wakeup in wake_up_q().
+	 * This cmpxchg() executes a full barrier, which pairs with the full
+	 * barrier executed by the wakeup in wake_up_q().
 	 */
 	if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
 		return;
@@ -441,8 +441,8 @@ void wake_up_q(struct wake_q_head *head)
 		task->wake_q.next = NULL;
 
 		/*
-		 * wake_up_process() implies a wmb() to pair with the queueing
-		 * in wake_q_add() so as not to miss wakeups.
+		 * wake_up_process() executes a full barrier, which pairs with
+		 * the queueing in wake_q_add() so as not to miss wakeups.
 		 */
 		wake_up_process(task);
 		put_task_struct(task);
@@ -1879,8 +1879,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  *     rq(c1)->lock (if not at the same time, then in that order).
  *  C) LOCK of the rq(c1)->lock scheduling in task
  *
- * Transitivity guarantees that B happens after A and C after B.
- * Note: we only require RCpc transitivity.
+ * Release/acquire chaining guarantees that B happens after A and C after B.
  * Note: the CPU doing B need not be c0 or c1
  *
  * Example:
@@ -1942,16 +1941,9 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  *   UNLOCK rq(0)->lock
  *
  *
- * However; for wakeups there is a second guarantee we must provide, namely we
- * must observe the state that lead to our wakeup. That is, not only must our
- * task observe its own prior state, it must also observe the stores prior to
- * its wakeup.
- *
- * This means that any means of doing remote wakeups must order the CPU doing
- * the wakeup against the CPU the task is going to end up running on. This,
- * however, is already required for the regular Program-Order guarantee above,
- * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
- *
+ * However, for wakeups there is a second guarantee we must provide, namely we
+ * must ensure that CONDITION=1 done by the caller can not be reordered with
+ * accesses to the task state; see try_to_wake_up() and set_current_state().
  */
 
 /**
@@ -1967,6 +1959,9 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * Atomic against schedule() which would dequeue a task, also see
  * set_current_state().
  *
+ * This function executes a full memory barrier before accessing the task
+ * state; see set_current_state().
+ *
  * Return: %true if @p->state changes (an actual wakeup was done),
  *	   %false otherwise.
  */
@@ -2141,8 +2136,7 @@ out:
  *
  * Return: 1 if the process was woken up, 0 if it was already running.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * This function executes a full memory barrier before accessing the task state.
  */
 int wake_up_process(struct task_struct *p)
 {
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index a7a2aaa3026a..870f97b313e3 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -134,8 +134,8 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  */
 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
@@ -180,8 +180,8 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
  *
  * On UP it can prevent extra preemption.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  */
 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
-- 
cgit v1.2.3


From c1a2f7f0c06454387c2cd7b93ff1491c715a8c69 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Mon, 16 Jul 2018 15:03:31 -0400
Subject: mm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on
 nr_cpu_ids

The mm_struct always contains a cpumask bitmap, regardless of
CONFIG_CPUMASK_OFFSTACK. That means the first step can be to
simplify things, and simply have one bitmask at the end of the
mm_struct for the mm_cpumask.

This does necessitate moving everything else in mm_struct into
an anonymous sub-structure, which can be randomized when struct
randomization is enabled.

The second step is to determine the correct size for the
mm_struct slab object from the size of the mm_struct
(excluding the CPU bitmap) and the size the cpumask.

For init_mm we can simply allocate the maximum size this
kernel is compiled for, since we only have one init_mm
in the system, anyway.

Pointer magic by Mike Galbraith, to evade -Wstringop-overflow
getting confused by the dynamically sized array.

Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Rik van Riel <riel@surriel.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-team@fb.com
Cc: luto@kernel.org
Link: http://lkml.kernel.org/r/20180716190337.26133-2-riel@surriel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c |   1 +
 include/linux/mm_types.h   | 241 +++++++++++++++++++++++----------------------
 kernel/fork.c              |  15 +--
 mm/init-mm.c               |  11 +++
 4 files changed, 145 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 232f4915223b..7f0b19410a95 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -82,6 +82,7 @@ struct mm_struct efi_mm = {
 	.mmap_sem		= __RWSEM_INITIALIZER(efi_mm.mmap_sem),
 	.page_table_lock	= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
+	.cpu_bitmap		= { [BITS_TO_LONGS(NR_CPUS)] = 0},
 };
 
 static bool disable_runtime;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 99ce070e7dcb..efdc24dd9e97 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -335,176 +335,183 @@ struct core_state {
 
 struct kioctx_table;
 struct mm_struct {
-	struct vm_area_struct *mmap;		/* list of VMAs */
-	struct rb_root mm_rb;
-	u32 vmacache_seqnum;                   /* per-thread vmacache */
+	struct {
+		struct vm_area_struct *mmap;		/* list of VMAs */
+		struct rb_root mm_rb;
+		u32 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
-	unsigned long (*get_unmapped_area) (struct file *filp,
+		unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 #endif
-	unsigned long mmap_base;		/* base of mmap area */
-	unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
+		unsigned long mmap_base;	/* base of mmap area */
+		unsigned long mmap_legacy_base;	/* base of mmap area in bottom-up allocations */
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
-	/* Base adresses for compatible mmap() */
-	unsigned long mmap_compat_base;
-	unsigned long mmap_compat_legacy_base;
+		/* Base adresses for compatible mmap() */
+		unsigned long mmap_compat_base;
+		unsigned long mmap_compat_legacy_base;
 #endif
-	unsigned long task_size;		/* size of task vm space */
-	unsigned long highest_vm_end;		/* highest vma end address */
-	pgd_t * pgd;
-
-	/**
-	 * @mm_users: The number of users including userspace.
-	 *
-	 * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
-	 * to 0 (i.e. when the task exits and there are no other temporary
-	 * reference holders), we also release a reference on @mm_count
-	 * (which may then free the &struct mm_struct if @mm_count also
-	 * drops to 0).
-	 */
-	atomic_t mm_users;
-
-	/**
-	 * @mm_count: The number of references to &struct mm_struct
-	 * (@mm_users count as 1).
-	 *
-	 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
-	 * &struct mm_struct is freed.
-	 */
-	atomic_t mm_count;
+		unsigned long task_size;	/* size of task vm space */
+		unsigned long highest_vm_end;	/* highest vma end address */
+		pgd_t * pgd;
+
+		/**
+		 * @mm_users: The number of users including userspace.
+		 *
+		 * Use mmget()/mmget_not_zero()/mmput() to modify. When this
+		 * drops to 0 (i.e. when the task exits and there are no other
+		 * temporary reference holders), we also release a reference on
+		 * @mm_count (which may then free the &struct mm_struct if
+		 * @mm_count also drops to 0).
+		 */
+		atomic_t mm_users;
+
+		/**
+		 * @mm_count: The number of references to &struct mm_struct
+		 * (@mm_users count as 1).
+		 *
+		 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
+		 * &struct mm_struct is freed.
+		 */
+		atomic_t mm_count;
 
 #ifdef CONFIG_MMU
-	atomic_long_t pgtables_bytes;		/* PTE page table pages */
+		atomic_long_t pgtables_bytes;	/* PTE page table pages */
 #endif
-	int map_count;				/* number of VMAs */
+		int map_count;			/* number of VMAs */
 
-	spinlock_t page_table_lock;		/* Protects page tables and some counters */
-	struct rw_semaphore mmap_sem;
+		spinlock_t page_table_lock; /* Protects page tables and some
+					     * counters
+					     */
+		struct rw_semaphore mmap_sem;
 
-	struct list_head mmlist;		/* List of maybe swapped mm's.	These are globally strung
-						 * together off init_mm.mmlist, and are protected
-						 * by mmlist_lock
-						 */
+		struct list_head mmlist; /* List of maybe swapped mm's.	These
+					  * are globally strung together off
+					  * init_mm.mmlist, and are protected
+					  * by mmlist_lock
+					  */
 
 
-	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
-	unsigned long hiwater_vm;	/* High-water virtual memory usage */
+		unsigned long hiwater_rss; /* High-watermark of RSS usage */
+		unsigned long hiwater_vm;  /* High-water virtual memory usage */
 
-	unsigned long total_vm;		/* Total pages mapped */
-	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
-	unsigned long pinned_vm;	/* Refcount permanently increased */
-	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED & ~VM_STACK */
-	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE & ~VM_STACK */
-	unsigned long stack_vm;		/* VM_STACK */
-	unsigned long def_flags;
+		unsigned long total_vm;	   /* Total pages mapped */
+		unsigned long locked_vm;   /* Pages that have PG_mlocked set */
+		unsigned long pinned_vm;   /* Refcount permanently increased */
+		unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+		unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+		unsigned long stack_vm;	   /* VM_STACK */
+		unsigned long def_flags;
 
-	spinlock_t arg_lock; /* protect the below fields */
-	unsigned long start_code, end_code, start_data, end_data;
-	unsigned long start_brk, brk, start_stack;
-	unsigned long arg_start, arg_end, env_start, env_end;
+		spinlock_t arg_lock; /* protect the below fields */
+		unsigned long start_code, end_code, start_data, end_data;
+		unsigned long start_brk, brk, start_stack;
+		unsigned long arg_start, arg_end, env_start, env_end;
 
-	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+		unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
-	/*
-	 * Special counters, in some configurations protected by the
-	 * page_table_lock, in other configurations by being atomic.
-	 */
-	struct mm_rss_stat rss_stat;
-
-	struct linux_binfmt *binfmt;
+		/*
+		 * Special counters, in some configurations protected by the
+		 * page_table_lock, in other configurations by being atomic.
+		 */
+		struct mm_rss_stat rss_stat;
 
-	cpumask_var_t cpu_vm_mask_var;
+		struct linux_binfmt *binfmt;
 
-	/* Architecture-specific MM context */
-	mm_context_t context;
+		/* Architecture-specific MM context */
+		mm_context_t context;
 
-	unsigned long flags; /* Must use atomic bitops to access the bits */
+		unsigned long flags; /* Must use atomic bitops to access */
 
-	struct core_state *core_state; /* coredumping support */
+		struct core_state *core_state; /* coredumping support */
 #ifdef CONFIG_MEMBARRIER
-	atomic_t membarrier_state;
+		atomic_t membarrier_state;
 #endif
 #ifdef CONFIG_AIO
-	spinlock_t			ioctx_lock;
-	struct kioctx_table __rcu	*ioctx_table;
+		spinlock_t			ioctx_lock;
+		struct kioctx_table __rcu	*ioctx_table;
 #endif
 #ifdef CONFIG_MEMCG
-	/*
-	 * "owner" points to a task that is regarded as the canonical
-	 * user/owner of this mm. All of the following must be true in
-	 * order for it to be changed:
-	 *
-	 * current == mm->owner
-	 * current->mm != mm
-	 * new_owner->mm == mm
-	 * new_owner->alloc_lock is held
-	 */
-	struct task_struct __rcu *owner;
+		/*
+		 * "owner" points to a task that is regarded as the canonical
+		 * user/owner of this mm. All of the following must be true in
+		 * order for it to be changed:
+		 *
+		 * current == mm->owner
+		 * current->mm != mm
+		 * new_owner->mm == mm
+		 * new_owner->alloc_lock is held
+		 */
+		struct task_struct __rcu *owner;
 #endif
-	struct user_namespace *user_ns;
+		struct user_namespace *user_ns;
 
-	/* store ref to file /proc/<pid>/exe symlink points to */
-	struct file __rcu *exe_file;
+		/* store ref to file /proc/<pid>/exe symlink points to */
+		struct file __rcu *exe_file;
 #ifdef CONFIG_MMU_NOTIFIER
-	struct mmu_notifier_mm *mmu_notifier_mm;
+		struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
-	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
-#endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	struct cpumask cpumask_allocation;
+		pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
 #ifdef CONFIG_NUMA_BALANCING
-	/*
-	 * numa_next_scan is the next time that the PTEs will be marked
-	 * pte_numa. NUMA hinting faults will gather statistics and migrate
-	 * pages to new nodes if necessary.
-	 */
-	unsigned long numa_next_scan;
+		/*
+		 * numa_next_scan is the next time that the PTEs will be marked
+		 * pte_numa. NUMA hinting faults will gather statistics and
+		 * migrate pages to new nodes if necessary.
+		 */
+		unsigned long numa_next_scan;
 
-	/* Restart point for scanning and setting pte_numa */
-	unsigned long numa_scan_offset;
+		/* Restart point for scanning and setting pte_numa */
+		unsigned long numa_scan_offset;
 
-	/* numa_scan_seq prevents two threads setting pte_numa */
-	int numa_scan_seq;
+		/* numa_scan_seq prevents two threads setting pte_numa */
+		int numa_scan_seq;
 #endif
-	/*
-	 * An operation with batched TLB flushing is going on. Anything that
-	 * can move process memory needs to flush the TLB when moving a
-	 * PROT_NONE or PROT_NUMA mapped page.
-	 */
-	atomic_t tlb_flush_pending;
+		/*
+		 * An operation with batched TLB flushing is going on. Anything
+		 * that can move process memory needs to flush the TLB when
+		 * moving a PROT_NONE or PROT_NUMA mapped page.
+		 */
+		atomic_t tlb_flush_pending;
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-	/* See flush_tlb_batched_pending() */
-	bool tlb_flush_batched;
+		/* See flush_tlb_batched_pending() */
+		bool tlb_flush_batched;
 #endif
-	struct uprobes_state uprobes_state;
+		struct uprobes_state uprobes_state;
 #ifdef CONFIG_HUGETLB_PAGE
-	atomic_long_t hugetlb_usage;
+		atomic_long_t hugetlb_usage;
 #endif
-	struct work_struct async_put_work;
+		struct work_struct async_put_work;
 
 #if IS_ENABLED(CONFIG_HMM)
-	/* HMM needs to track a few things per mm */
-	struct hmm *hmm;
+		/* HMM needs to track a few things per mm */
+		struct hmm *hmm;
 #endif
-} __randomize_layout;
+	} __randomize_layout;
+
+	/*
+	 * The mm_cpumask needs to be at the end of mm_struct, because it
+	 * is dynamically sized based on nr_cpu_ids.
+	 */
+	unsigned long cpu_bitmap[];
+};
 
 extern struct mm_struct init_mm;
 
+/* Pointer magic because the dynamic array size confuses some compilers. */
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	mm->cpu_vm_mask_var = &mm->cpumask_allocation;
-#endif
-	cpumask_clear(mm->cpu_vm_mask_var);
+	unsigned long cpu_bitmap = (unsigned long)mm;
+
+	cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
+	cpumask_clear((struct cpumask *)cpu_bitmap);
 }
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
 static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 {
-	return mm->cpu_vm_mask_var;
+	return (struct cpumask *)&mm->cpu_bitmap;
 }
 
 struct mmu_gather;
diff --git a/kernel/fork.c b/kernel/fork.c
index 9440d61b925c..5b64c1b8461e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2253,6 +2253,8 @@ static void sighand_ctor(void *data)
 
 void __init proc_caches_init(void)
 {
+	unsigned int mm_size;
+
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -2269,15 +2271,16 @@ void __init proc_caches_init(void)
 			sizeof(struct fs_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
 			NULL);
+
 	/*
-	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
-	 * whole struct cpumask for the OFFSTACK case. We could change
-	 * this to *only* allocate as much of it as required by the
-	 * maximum number of CPU's we can ever have.  The cpumask_allocation
-	 * is at the end of the structure, exactly for that reason.
+	 * The mm_cpumask is located at the end of mm_struct, and is
+	 * dynamically sized based on the maximum CPU number this system
+	 * can have, taking hotplug into account (nr_cpu_ids).
 	 */
+	mm_size = sizeof(struct mm_struct) + cpumask_size();
+
 	mm_cachep = kmem_cache_create_usercopy("mm_struct",
-			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+			mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
 			offsetof(struct mm_struct, saved_auxv),
 			sizeof_field(struct mm_struct, saved_auxv),
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f0179c9c04c2..a787a319211e 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,16 @@
 #define INIT_MM_CONTEXT(name)
 #endif
 
+/*
+ * For dynamically allocated mm_structs, there is a dynamically sized cpumask
+ * at the end of the structure, the size of which depends on the maximum CPU
+ * number the system can see. That way we allocate only as much memory for
+ * mm_cpumask() as needed for the hundreds, or thousands of processes that
+ * a system typically runs.
+ *
+ * Since there is only one init_mm in the entire system, keep it simple
+ * and size this cpu_bitmask to NR_CPUS.
+ */
 struct mm_struct init_mm = {
 	.mm_rb		= RB_ROOT,
 	.pgd		= swapper_pg_dir,
@@ -25,5 +35,6 @@ struct mm_struct init_mm = {
 	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
 	.user_ns	= &init_user_ns,
+	.cpu_bitmap	= { [BITS_TO_LONGS(NR_CPUS)] = 0},
 	INIT_MM_CONTEXT(init_mm)
 };
-- 
cgit v1.2.3


From ffc59c496bf8498657321c59433f55bbcf2d9c38 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 10 Jul 2018 23:42:16 +0200
Subject: i2c: recovery: require either get_sda or set_sda

For bus recovery, we either need to bail out early if we can read SDA or
we need to send STOP after every pulse. Otherwise recovery might be
misinterpreted as an unwanted write. So, require one of those SDA
handling functions to avoid this problem.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c |  7 ++++++-
 include/linux/i2c.h         | 12 ++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 301285c54603..871a9731894f 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -202,7 +202,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 		/*
 		 * If we can set SDA, we will always create STOP here to ensure
 		 * the additional pulses will do no harm. This is achieved by
-		 * letting SDA follow SCL half a cycle later.
+		 * letting SDA follow SCL half a cycle later. Check the
+		 * 'incomplete_write_byte' fault injector for details.
 		 */
 		ndelay(RECOVERY_NDELAY / 2);
 		if (bri->set_sda)
@@ -274,6 +275,10 @@ static void i2c_init_recovery(struct i2c_adapter *adap)
 			err_str = "no {get|set}_scl() found";
 			goto err;
 		}
+		if (!bri->set_sda && !bri->get_sda) {
+			err_str = "either get_sda() or set_sda() needed";
+			goto err;
+		}
 	}
 
 	return;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 24acd69c8874..4c4360f94786 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -581,12 +581,12 @@ struct i2c_timings {
  *      recovery. Populated internally for generic GPIO recovery.
  * @set_scl: This sets/clears the SCL line. Mandatory for generic SCL recovery.
  *      Populated internally for generic GPIO recovery.
- * @get_sda: This gets current value of SDA line. Optional for generic SCL
- *      recovery. Populated internally, if sda_gpio is a valid GPIO, for generic
- *      GPIO recovery.
- * @set_sda: This sets/clears the SDA line. Optional for generic SCL recovery.
- *	Populated internally, if sda_gpio is a valid GPIO, for generic GPIO
- *	recovery.
+ * @get_sda: This gets current value of SDA line. This or set_sda() is mandatory
+ *	for generic SCL recovery. Populated internally, if sda_gpio is a valid
+ *	GPIO, for generic GPIO recovery.
+ * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for
+ *	generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO,
+ *	for generic GPIO recovery.
  * @prepare_recovery: This will be called before starting recovery. Platform may
  *	configure padmux here for SDA/SCL line or something else they want.
  * @unprepare_recovery: This will be called after completing recovery. Platform
-- 
cgit v1.2.3


From 7ca5f6be7900ca753ed01c0202dc5f998a41f4ee Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 11 Jul 2018 00:24:22 +0200
Subject: i2c: recovery: add get_bus_free callback

Some IP cores have an internal 'bus free' logic which may be more
advanced than just checking if SDA is high. Add a separate callback to
get this status. Filling it is optional.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 27 +++++++++++++++++++++++----
 include/linux/i2c.h         |  3 +++
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index c7995efd58ea..59f8dfc5be36 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -158,6 +158,22 @@ static void set_sda_gpio_value(struct i2c_adapter *adap, int val)
 	gpiod_set_value_cansleep(adap->bus_recovery_info->sda_gpiod, val);
 }
 
+static int i2c_generic_bus_free(struct i2c_adapter *adap)
+{
+	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
+	int ret = -EOPNOTSUPP;
+
+	if (bri->get_bus_free)
+		ret = bri->get_bus_free(adap);
+	else if (bri->get_sda)
+		ret = bri->get_sda(adap);
+
+	if (ret < 0)
+		return ret;
+
+	return ret ? 0 : -EBUSY;
+}
+
 /*
  * We are generating clock pulses. ndelay() determines durating of clk pulses.
  * We will generate clock with rate 100 KHz and so duration of both clock levels
@@ -169,7 +185,7 @@ static void set_sda_gpio_value(struct i2c_adapter *adap, int val)
 int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 {
 	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
-	int i = 0, val = 1, ret = 0;
+	int i = 0, val = 1, ret;
 
 	if (bri->prepare_recovery)
 		bri->prepare_recovery(adap);
@@ -207,14 +223,17 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 			bri->set_sda(adap, val);
 		ndelay(RECOVERY_NDELAY / 2);
 
-		/* Break if SDA is high */
-		if (val && bri->get_sda) {
-			ret = bri->get_sda(adap) ? 0 : -EBUSY;
+		if (val) {
+			ret = i2c_generic_bus_free(adap);
 			if (ret == 0)
 				break;
 		}
 	}
 
+	/* If we can't check bus status, assume recovery worked */
+	if (ret == -EOPNOTSUPP)
+		ret = 0;
+
 	if (bri->unprepare_recovery)
 		bri->unprepare_recovery(adap);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4c4360f94786..bc8d42f8544f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -587,6 +587,8 @@ struct i2c_timings {
  * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for
  *	generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO,
  *	for generic GPIO recovery.
+ * @get_bus_free: Returns the bus free state as seen from the IP core in case it
+ *	has a more complex internal logic than just reading SDA. Optional.
  * @prepare_recovery: This will be called before starting recovery. Platform may
  *	configure padmux here for SDA/SCL line or something else they want.
  * @unprepare_recovery: This will be called after completing recovery. Platform
@@ -601,6 +603,7 @@ struct i2c_bus_recovery_info {
 	void (*set_scl)(struct i2c_adapter *adap, int val);
 	int (*get_sda)(struct i2c_adapter *adap);
 	void (*set_sda)(struct i2c_adapter *adap, int val);
+	int (*get_bus_free)(struct i2c_adapter *adap);
 
 	void (*prepare_recovery)(struct i2c_adapter *adap);
 	void (*unprepare_recovery)(struct i2c_adapter *adap);
-- 
cgit v1.2.3


From 4aaf448fa9754e2d5ee188d32327b24ffc15ca4d Mon Sep 17 00:00:00 2001
From: Jim Qu <Jim.Qu@amd.com>
Date: Tue, 17 Jul 2018 16:20:50 +0800
Subject: vga_switcheroo: set audio client id according to bound GPU id

On modern laptop, there are more and more platforms
have two GPUs, and each of them maybe have audio codec
for HDMP/DP output. For some dGPU which is no output,
audio codec usually is disabled.

In currect HDA audio driver, it will set all codec as
VGA_SWITCHEROO_DIS, the audio which is binded to UMA
will be suspended if user use debugfs to contorl power

In HDA driver side, it is difficult to know which GPU
the audio has binded to. So set the bound gpu pci dev
to vga_switcheroo.

if the audio client is not the third registration, audio
id will set in vga_switcheroo enable function. if the
audio client is the last registration when vga_switcheroo
_ready() get true, we should get audio client id from bound
GPU directly.

Signed-off-by: Jim Qu <Jim.Qu@amd.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/vga/vga_switcheroo.c | 63 +++++++++++++++++++++++++++++++++-------
 include/linux/vga_switcheroo.h   |  8 ++---
 sound/pci/hda/hda_intel.c        | 11 +++----
 3 files changed, 62 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index fc4adf3d34e8..a96bf46bc483 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -103,9 +103,11 @@
  *	runtime pm. If true, writing ON and OFF to the vga_switcheroo debugfs
  *	interface is a no-op so as not to interfere with runtime pm
  * @list: client list
+ * @vga_dev: pci device, indicate which GPU is bound to current audio client
  *
  * Registered client. A client can be either a GPU or an audio device on a GPU.
- * For audio clients, the @fb_info and @active members are bogus.
+ * For audio clients, the @fb_info and @active members are bogus. For GPU
+ * clients, the @vga_dev is bogus.
  */
 struct vga_switcheroo_client {
 	struct pci_dev *pdev;
@@ -116,6 +118,7 @@ struct vga_switcheroo_client {
 	bool active;
 	bool driver_power_control;
 	struct list_head list;
+	struct pci_dev *vga_dev;
 };
 
 /*
@@ -161,9 +164,8 @@ struct vgasr_priv {
 };
 
 #define ID_BIT_AUDIO		0x100
-#define client_is_audio(c)	((c)->id & ID_BIT_AUDIO)
-#define client_is_vga(c)	((c)->id == VGA_SWITCHEROO_UNKNOWN_ID || \
-				 !client_is_audio(c))
+#define client_is_audio(c)		((c)->id & ID_BIT_AUDIO)
+#define client_is_vga(c)		(!client_is_audio(c))
 #define client_id(c)		((c)->id & ~ID_BIT_AUDIO)
 
 static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
@@ -192,14 +194,29 @@ static void vga_switcheroo_enable(void)
 		vgasr_priv.handler->init();
 
 	list_for_each_entry(client, &vgasr_priv.clients, list) {
-		if (client->id != VGA_SWITCHEROO_UNKNOWN_ID)
+		if (!client_is_vga(client) ||
+		     client_id(client) != VGA_SWITCHEROO_UNKNOWN_ID)
 			continue;
+
 		ret = vgasr_priv.handler->get_client_id(client->pdev);
 		if (ret < 0)
 			return;
 
 		client->id = ret;
 	}
+
+	list_for_each_entry(client, &vgasr_priv.clients, list) {
+		if (!client_is_audio(client) ||
+		     client_id(client) != VGA_SWITCHEROO_UNKNOWN_ID)
+			continue;
+
+		ret = vgasr_priv.handler->get_client_id(client->vga_dev);
+		if (ret < 0)
+			return;
+
+		client->id = ret | ID_BIT_AUDIO;
+	}
+
 	vga_switcheroo_debugfs_init(&vgasr_priv);
 	vgasr_priv.active = true;
 }
@@ -272,7 +289,9 @@ EXPORT_SYMBOL(vga_switcheroo_handler_flags);
 
 static int register_client(struct pci_dev *pdev,
 			   const struct vga_switcheroo_client_ops *ops,
-			   enum vga_switcheroo_client_id id, bool active,
+			   enum vga_switcheroo_client_id id,
+			   struct pci_dev *vga_dev,
+			   bool active,
 			   bool driver_power_control)
 {
 	struct vga_switcheroo_client *client;
@@ -287,6 +306,7 @@ static int register_client(struct pci_dev *pdev,
 	client->id = id;
 	client->active = active;
 	client->driver_power_control = driver_power_control;
+	client->vga_dev = vga_dev;
 
 	mutex_lock(&vgasr_mutex);
 	list_add_tail(&client->list, &vgasr_priv.clients);
@@ -319,7 +339,7 @@ int vga_switcheroo_register_client(struct pci_dev *pdev,
 				   const struct vga_switcheroo_client_ops *ops,
 				   bool driver_power_control)
 {
-	return register_client(pdev, ops, VGA_SWITCHEROO_UNKNOWN_ID,
+	return register_client(pdev, ops, VGA_SWITCHEROO_UNKNOWN_ID, NULL,
 			       pdev == vga_default_device(),
 			       driver_power_control);
 }
@@ -329,19 +349,40 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  * vga_switcheroo_register_audio_client - register audio client
  * @pdev: client pci device
  * @ops: client callbacks
- * @id: client identifier
+ * @vga_dev:  pci device which is bound to current audio client
  *
  * Register audio client (audio device on a GPU). The client is assumed
  * to use runtime PM. Beforehand, vga_switcheroo_client_probe_defer()
  * shall be called to ensure that all prerequisites are met.
  *
- * Return: 0 on success, -ENOMEM on memory allocation error.
+ * Return: 0 on success, -ENOMEM on memory allocation error, -EINVAL on getting
+ * client id error.
  */
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 			const struct vga_switcheroo_client_ops *ops,
-			enum vga_switcheroo_client_id id)
+			struct pci_dev *vga_dev)
 {
-	return register_client(pdev, ops, id | ID_BIT_AUDIO, false, true);
+	enum vga_switcheroo_client_id id = VGA_SWITCHEROO_UNKNOWN_ID;
+
+	/*
+	 * if vga_switcheroo has enabled, that mean two GPU clients and also
+	 * handler are registered. Get audio client id from bound GPU client
+	 * id directly, otherwise, set it as VGA_SWITCHEROO_UNKNOWN_ID,
+	 * it will set to correct id in later when vga_switcheroo_enable()
+	 * is called.
+	 */
+	mutex_lock(&vgasr_mutex);
+	if (vgasr_priv.active) {
+		id = vgasr_priv.handler->get_client_id(vga_dev);
+		if (id < 0) {
+			mutex_unlock(&vgasr_mutex);
+			return -EINVAL;
+		}
+	}
+	mutex_unlock(&vgasr_mutex);
+
+	return register_client(pdev, ops, id | ID_BIT_AUDIO, vga_dev,
+			       false, true);
 }
 EXPORT_SYMBOL(vga_switcheroo_register_audio_client);
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 77f0f0af3a71..a34539b7f750 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -84,8 +84,8 @@ enum vga_switcheroo_state {
  * Client identifier. Audio clients use the same identifier & 0x100.
  */
 enum vga_switcheroo_client_id {
-	VGA_SWITCHEROO_UNKNOWN_ID = -1,
-	VGA_SWITCHEROO_IGD,
+	VGA_SWITCHEROO_UNKNOWN_ID = 0x1000,
+	VGA_SWITCHEROO_IGD = 0,
 	VGA_SWITCHEROO_DIS,
 	VGA_SWITCHEROO_MAX_CLIENTS,
 };
@@ -151,7 +151,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev,
 				   bool driver_power_control);
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 enum vga_switcheroo_client_id id);
+					 struct pci_dev *vga_dev);
 
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
@@ -180,7 +180,7 @@ static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_ha
 		enum vga_switcheroo_handler_flags_t handler_flags) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
-	enum vga_switcheroo_client_id id) { return 0; }
+	struct pci_dev *vga_dev) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void) { return 0; }
 static inline int vga_switcheroo_lock_ddc(struct pci_dev *pdev) { return -ENODEV; }
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 1ae1850b3bfd..1967a5537d68 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1319,15 +1319,16 @@ static const struct vga_switcheroo_client_ops azx_vs_ops = {
 static int register_vga_switcheroo(struct azx *chip)
 {
 	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
+	struct pci_dev *p;
 	int err;
 
 	if (!hda->use_vga_switcheroo)
 		return 0;
-	/* FIXME: currently only handling DIS controller
-	 * is there any machine with two switchable HDMI audio controllers?
-	 */
-	err = vga_switcheroo_register_audio_client(chip->pci, &azx_vs_ops,
-						   VGA_SWITCHEROO_DIS);
+
+	p = get_bound_vga(chip->pci);
+	err = vga_switcheroo_register_audio_client(chip->pci, &azx_vs_ops, p);
+	pci_dev_put(p);
+
 	if (err < 0)
 		return err;
 	hda->vga_switcheroo_registered = 1;
-- 
cgit v1.2.3


From 5b56c182edb1224bc1a97a1c74003eaa0eb59daf Mon Sep 17 00:00:00 2001
From: Wenyou Yang <wenyou.yang@atmel.com>
Date: Tue, 17 Jul 2018 11:26:55 +0300
Subject: ARM: at91: pm: Add ULP1 mode support

In the ULP1 mode, in order to achieve the lowest power consumption
with the system in retention mode and be able to resume on the wake
up events, all the clocks are shut off, inclusive the embedded 12MHz
RC oscillator, and the number of wake up sources is limited as well.
When the wake up event is asserted, the embedded 12MHz RC oscillator
restarts automatically.

The ULP1 (Ultra Low-power mode 1) is introduced by SAMA5D2.

The previous size of pm_suspend.o was 2148 bytes. With the addition of
ULP1 mode the new size of pm_suspend.o raised at 2456 bytes.

Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com>
Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
[claudiu.beznea@microchip.com: aligned with 4.18-rc1]
Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 arch/arm/mach-at91/pm.c         |   1 +
 arch/arm/mach-at91/pm.h         |   3 +-
 arch/arm/mach-at91/pm_suspend.S | 142 ++++++++++++++++++++++++++++++++++------
 include/linux/clk/at91_pmc.h    |   2 +
 4 files changed, 127 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index d43f00a715d7..099d8094018c 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -42,6 +42,7 @@ extern void at91_pinctrl_gpio_resume(void);
 static const match_table_t pm_modes __initconst = {
 	{ AT91_PM_STANDBY, "standby" },
 	{ AT91_PM_ULP0, "ulp0" },
+	{ AT91_PM_ULP1, "ulp1" },
 	{ AT91_PM_BACKUP, "backup" },
 	{ -1, NULL },
 };
diff --git a/arch/arm/mach-at91/pm.h b/arch/arm/mach-at91/pm.h
index c44eaf17db86..9bd4e6ca672a 100644
--- a/arch/arm/mach-at91/pm.h
+++ b/arch/arm/mach-at91/pm.h
@@ -23,7 +23,8 @@
 
 #define	AT91_PM_STANDBY		0x00
 #define AT91_PM_ULP0		0x01
-#define	AT91_PM_BACKUP		0x02
+#define AT91_PM_ULP1		0x02
+#define	AT91_PM_BACKUP		0x03
 
 #ifndef __ASSEMBLY__
 struct at91_pm_data {
diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
index 821322d1a64d..a7c6ae13c945 100644
--- a/arch/arm/mach-at91/pm_suspend.S
+++ b/arch/arm/mach-at91/pm_suspend.S
@@ -41,6 +41,15 @@ tmp2	.req	r5
 	beq	1b
 	.endm
 
+/*
+ * Wait for main oscillator selection is done
+ */
+	.macro wait_moscsels
+1:	ldr	tmp1, [pmc, #AT91_PMC_SR]
+	tst	tmp1, #AT91_PMC_MOSCSELS
+	beq	1b
+	.endm
+
 /*
  * Wait until PLLA has locked.
  */
@@ -112,19 +121,20 @@ ENTRY(at91_pm_suspend_in_sram)
 	bl	at91_sramc_self_refresh
 
 	ldr	r0, .pm_mode
-	cmp	r0, #AT91_PM_ULP0
-	beq	ulp0_mode
+	cmp	r0, #AT91_PM_STANDBY
+	beq	standby
 	cmp	r0, #AT91_PM_BACKUP
 	beq	backup_mode
 
+	bl	at91_ulp_mode
+	b	exit_suspend
+
+standby:
 	/* Wait for interrupt */
 	ldr	pmc, .pmc_base
 	at91_cpu_idle
 	b	exit_suspend
 
-ulp0_mode:
-	bl	at91_ulp0_mode
-	b	exit_suspend
 backup_mode:
 	bl	at91_backup_mode
 	b	exit_suspend
@@ -151,7 +161,102 @@ ENTRY(at91_backup_mode)
 	str	tmp1, [r0, #0]
 ENDPROC(at91_backup_mode)
 
-ENTRY(at91_ulp0_mode)
+.macro at91_pm_ulp0_mode
+	ldr	pmc, .pmc_base
+
+	/* Turn off the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	/* Wait for interrupt */
+	at91_cpu_idle
+
+	/* Turn on the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscrdy
+.endm
+
+/**
+ * Note: This procedure only applies on the platform which uses
+ * the external crystal oscillator as a main clock source.
+ */
+.macro at91_pm_ulp1_mode
+	ldr	pmc, .pmc_base
+
+	/* Switch the main clock source to 12-MHz RC oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	bic	tmp1, tmp1, #AT91_PMC_MOSCSEL
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscsels
+
+	/* Disable the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	/* Switch the master clock source to main clock */
+	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
+	bic	tmp1, tmp1, #AT91_PMC_CSS
+	orr	tmp1, tmp1, #AT91_PMC_CSS_MAIN
+	str	tmp1, [pmc, #AT91_PMC_MCKR]
+
+	wait_mckrdy
+
+	/* Enter the ULP1 mode by set WAITMODE bit in CKGR_MOR */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_WAITMODE
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_mckrdy
+
+	/* Enable the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscrdy
+
+	/* Switch the master clock source to slow clock */
+	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
+	bic	tmp1, tmp1, #AT91_PMC_CSS
+	str	tmp1, [pmc, #AT91_PMC_MCKR]
+
+	wait_mckrdy
+
+	/* Switch main clock source to crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_MOSCSEL
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscsels
+
+	/* Switch the master clock source to main clock */
+	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
+	bic	tmp1, tmp1, #AT91_PMC_CSS
+	orr	tmp1, tmp1, #AT91_PMC_CSS_MAIN
+	str	tmp1, [pmc, #AT91_PMC_MCKR]
+
+	wait_mckrdy
+.endm
+
+ENTRY(at91_ulp_mode)
 	ldr	pmc, .pmc_base
 
 	/* Save Master clock setting */
@@ -174,22 +279,19 @@ ENTRY(at91_ulp0_mode)
 	orr	tmp1, tmp1, #(1 << 29)		/* bit 29 always set */
 	str	tmp1, [pmc, #AT91_CKGR_PLLAR]
 
-	/* Turn off the main oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
+	ldr	r0, .pm_mode
+	cmp	r0, #AT91_PM_ULP1
+	beq	ulp1_mode
 
-	/* Wait for interrupt */
-	at91_cpu_idle
+	at91_pm_ulp0_mode
+	b	ulp_exit
 
-	/* Turn on the main oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
+ulp1_mode:
+	at91_pm_ulp1_mode
+	b	ulp_exit
 
-	wait_moscrdy
+ulp_exit:
+	ldr	pmc, .pmc_base
 
 	/* Restore PLLA setting */
 	ldr	tmp1, .saved_pllar
@@ -212,7 +314,7 @@ ENTRY(at91_ulp0_mode)
 	wait_mckrdy
 
 	mov	pc, lr
-ENDPROC(at91_ulp0_mode)
+ENDPROC(at91_ulp_mode)
 
 /*
  * void at91_sramc_self_refresh(unsigned int is_active)
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 6aca5ce8a99a..4ea2cbf9b50d 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -47,8 +47,10 @@
 #define	AT91_CKGR_MOR		0x20			/* Main Oscillator Register [not on SAM9RL] */
 #define		AT91_PMC_MOSCEN		(1    <<  0)		/* Main Oscillator Enable */
 #define		AT91_PMC_OSCBYPASS	(1    <<  1)		/* Oscillator Bypass */
+#define		AT91_PMC_WAITMODE	(1    <<  2)		/* Wait Mode Command */
 #define		AT91_PMC_MOSCRCEN	(1    <<  3)		/* Main On-Chip RC Oscillator Enable [some SAM9] */
 #define		AT91_PMC_OSCOUNT	(0xff <<  8)		/* Main Oscillator Start-up Time */
+#define		AT91_PMC_KEY_MASK	(0xff << 16)
 #define		AT91_PMC_KEY		(0x37 << 16)		/* MOR Writing Key */
 #define		AT91_PMC_MOSCSEL	(1    << 24)		/* Main Oscillator Selection [some SAM9] */
 #define		AT91_PMC_CFDEN		(1    << 25)		/* Clock Failure Detector Enable [some SAM9] */
-- 
cgit v1.2.3


From 3abd729aa468d7346f12d7dfc8f81aba653f6c88 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Tue, 17 Jul 2018 11:26:56 +0300
Subject: ARM: at91: pm: add PMC fast startup registers defines

Add PMC fast startup registers defines.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/clk/at91_pmc.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 4ea2cbf9b50d..931ab05f771d 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -157,6 +157,19 @@
 #define		AT91_PMC_GCKRDY		(1 << 24)		/* Generated Clocks */
 #define	AT91_PMC_IMR		0x6c			/* Interrupt Mask Register */
 
+#define AT91_PMC_FSMR		0x70		/* Fast Startup Mode Register */
+#define AT91_PMC_FSTT(n)	BIT(n)
+#define AT91_PMC_RTCAL		BIT(17)		/* RTC Alarm Enable */
+#define AT91_PMC_USBAL		BIT(18)		/* USB Resume Enable */
+#define AT91_PMC_SDMMC_CD	BIT(19)		/* SDMMC Card Detect Enable */
+#define AT91_PMC_LPM		BIT(20)		/* Low-power Mode */
+#define AT91_PMC_RXLP_MCE	BIT(24)		/* Backup UART Receive Enable */
+#define AT91_PMC_ACC_CE		BIT(25)		/* ACC Enable */
+
+#define AT91_PMC_FSPR		0x74		/* Fast Startup Polarity Reg */
+
+#define AT91_PMC_FS_INPUT_MASK  0x7ff
+
 #define AT91_PMC_PLLICPR	0x80			/* PLL Charge Pump Current Register */
 
 #define AT91_PMC_PROT		0xe4			/* Write Protect Mode Register [some SAM9] */
-- 
cgit v1.2.3


From ba6b055e0f3b4ff4942e4ab273260affcfad9bff Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 13 Jul 2018 16:13:49 +0200
Subject: HID: input: enable Totem on the Dell Canvas 27

The Dell Canvas 27 has a tool that can be put on the surface and acts
as a dial. The firmware processes the detection of the tool and forward
regular HID reports with X, Y, Azimuth, rotation, width/height.

The firmware also exports Contact ID, Countact Count which may hint that
several totems can be used at the same time (the FW only supports one).

We can tell that MT_TOOL_DIAL will be reported by setting the min/max
of ABS_MT_TOOL_TYPE to MT_TOOL_DIAL.

This tool is aimed at being used by the system and not the applications,
so the user space processing should not go through the regular touch
inputs.
We set INPUT_PROP_DIRECT which applies ID_INPUT_TOUCHSCREEN to this new
type of devices, but we will counter this for the time being with the
special udev hwdb entry mentioned above.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1511846

Acked-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c      |  3 +++
 drivers/hid/hid-multitouch.c | 48 +++++++++++++++++++++++++++++++-------------
 include/linux/hid.h          |  6 ++++++
 3 files changed, 43 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index ab93dd5927c3..4e94ea3e280a 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1550,6 +1550,9 @@ static struct hid_input *hidinput_allocate(struct hid_device *hid,
 		case HID_GD_WIRELESS_RADIO_CTLS:
 			suffix = "Wireless Radio Control";
 			break;
+		case HID_GD_SYSTEM_MULTIAXIS:
+			suffix = "System Multi Axis";
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 346e9caef6f3..a4a274ebfbef 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -665,7 +665,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 	/*
 	 * Model touchscreens providing buttons as touchpads.
 	 */
-	if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) {
+	if (field->application == HID_DG_TOUCHSCREEN &&
+	    (usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) {
 		app->mt_flags |= INPUT_MT_POINTER;
 		td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
 	}
@@ -692,6 +693,19 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 
 			set_abs(hi->input, code, field, cls->sn_move);
 
+			/*
+			 * A system multi-axis that exports X and Y has a high
+			 * chance of being used directly on a surface
+			 */
+			if (field->application == HID_GD_SYSTEM_MULTIAXIS) {
+				__set_bit(INPUT_PROP_DIRECT,
+					  hi->input->propbit);
+				input_set_abs_params(hi->input,
+						     ABS_MT_TOOL_TYPE,
+						     MT_TOOL_DIAL,
+						     MT_TOOL_DIAL, 0, 0);
+			}
+
 			return 1;
 		case HID_GD_Y:
 			if (prev_usage && (prev_usage->hid == usage->hid)) {
@@ -725,7 +739,9 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			MT_STORE_FIELD(confidence_state);
 			return 1;
 		case HID_DG_TIPSWITCH:
-			input_set_capability(hi->input, EV_KEY, BTN_TOUCH);
+			if (field->application != HID_GD_SYSTEM_MULTIAXIS)
+				input_set_capability(hi->input,
+						     EV_KEY, BTN_TOUCH);
 			MT_STORE_FIELD(tip_state);
 			return 1;
 		case HID_DG_CONTACTID:
@@ -802,6 +818,10 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 		    field->application == HID_DG_TOUCHPAD &&
 		    (usage->hid & HID_USAGE) > 1)
 			code--;
+
+		if (field->application == HID_GD_SYSTEM_MULTIAXIS)
+			code = BTN_0  + ((usage->hid - 1) & HID_USAGE);
+
 		hid_map_usage(hi, usage, bit, max, EV_KEY, code);
 		input_set_capability(hi->input, EV_KEY, code);
 		return 1;
@@ -899,6 +919,7 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
 	bool inrange_state = false;
 	int active;
 	int slotnum;
+	int tool = MT_TOOL_FINGER;
 
 	if (!slot)
 		return -EINVAL;
@@ -939,8 +960,11 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
 
 	active = (*slot->tip_state || inrange_state) && confidence_state;
 
+	if (app->application == HID_GD_SYSTEM_MULTIAXIS)
+		tool = MT_TOOL_DIAL;
+
 	input_mt_slot(input, slotnum);
-	input_mt_report_slot_state(input, MT_TOOL_FINGER, active);
+	input_mt_report_slot_state(input, tool, active);
 	if (active) {
 		/* this finger is in proximity of the sensor */
 		int wide = (*slot->w > *slot->h);
@@ -1203,6 +1227,7 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 	    field->application != HID_GD_SYSTEM_CONTROL &&
 	    field->application != HID_CP_CONSUMER_CONTROL &&
 	    field->application != HID_GD_WIRELESS_RADIO_CTLS &&
+	    field->application != HID_GD_SYSTEM_MULTIAXIS &&
 	    !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS &&
 	      application->quirks & MT_QUIRK_ASUS_CUSTOM_UP))
 		return -1;
@@ -1230,9 +1255,7 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 		return 1;
 	}
 
-	if (rdata->is_mt_collection &&
-	    (field->application == HID_DG_TOUCHSCREEN ||
-	     field->application == HID_DG_TOUCHPAD))
+	if (rdata->is_mt_collection)
 		return mt_touch_input_mapping(hdev, hi, field, usage, bit, max,
 					      application);
 
@@ -1244,15 +1267,11 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi,
 		struct hid_field *field, struct hid_usage *usage,
 		unsigned long **bit, int *max)
 {
-	/*
-	 * some egalax touchscreens have "application == HID_DG_TOUCHSCREEN"
-	 * for the stylus.
-	 */
-	if (field->physical == HID_DG_STYLUS)
-		return 0;
+	struct mt_device *td = hid_get_drvdata(hdev);
+	struct mt_report_data *rdata;
 
-	if (field->application == HID_DG_TOUCHSCREEN ||
-	    field->application == HID_DG_TOUCHPAD) {
+	rdata = mt_find_report_data(td, field->report);
+	if (rdata && rdata->is_mt_collection) {
 		/* We own these mappings, tell hid-input to ignore them */
 		return -1;
 	}
@@ -1460,6 +1479,7 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
 		case HID_GD_SYSTEM_CONTROL:
 		case HID_CP_CONSUMER_CONTROL:
 		case HID_GD_WIRELESS_RADIO_CTLS:
+		case HID_GD_SYSTEM_MULTIAXIS:
 			/* already handled by hid core */
 			break;
 		case HID_DG_TOUCHSCREEN:
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 773bcb1d4044..2e4498d52a2f 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -190,6 +190,12 @@ struct hid_item {
  * http://www.usb.org/developers/hidpage/HUTRR40RadioHIDUsagesFinal.pdf
  */
 #define HID_GD_WIRELESS_RADIO_CTLS	0x0001000c
+/*
+ * System Multi-Axis, see:
+ * http://www.usb.org/developers/hidpage/HUTRR62_-_Generic_Desktop_CA_for_System_Multi-Axis_Controllers.txt
+ */
+#define HID_GD_SYSTEM_MULTIAXIS	0x0001000e
+
 #define HID_GD_X		0x00010030
 #define HID_GD_Y		0x00010031
 #define HID_GD_Z		0x00010032
-- 
cgit v1.2.3


From 08a8a7cf14595f95d5cbb28ef5c15c56a6255fb4 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 13 Jul 2018 16:13:50 +0200
Subject: HID: core: do not upper bound the collection stack

Looks like 4 was sufficient until now. However, the Surface Dial needs
a stack of 5 and simply fails at probing.
Dynamically add HID_COLLECTION_STACK_SIZE to the size of the stack if
we hit the upper bound.

Checkpatch complains about bare unsigned, so converting those to
'unsigned int' in struct hid_parser

Acked-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 17 ++++++++++++++---
 include/linux/hid.h    |  9 +++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3942ee61bd1c..5de6f18c9bf7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -128,9 +128,19 @@ static int open_collection(struct hid_parser *parser, unsigned type)
 
 	usage = parser->local.usage[0];
 
-	if (parser->collection_stack_ptr == HID_COLLECTION_STACK_SIZE) {
-		hid_err(parser->device, "collection stack overflow\n");
-		return -EINVAL;
+	if (parser->collection_stack_ptr == parser->collection_stack_size) {
+		unsigned int *collection_stack;
+		unsigned int new_size = parser->collection_stack_size +
+					HID_COLLECTION_STACK_SIZE;
+
+		collection_stack = krealloc(parser->collection_stack,
+					    new_size * sizeof(unsigned int),
+					    GFP_KERNEL);
+		if (!collection_stack)
+			return -ENOMEM;
+
+		parser->collection_stack = collection_stack;
+		parser->collection_stack_size = new_size;
 	}
 
 	if (parser->device->maxcollection == parser->device->collection_size) {
@@ -840,6 +850,7 @@ static int hid_scan_report(struct hid_device *hid)
 		break;
 	}
 
+	kfree(parser->collection_stack);
 	vfree(parser);
 	return 0;
 }
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2e4498d52a2f..aee281522c6d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -644,12 +644,13 @@ static inline void hid_set_drvdata(struct hid_device *hdev, void *data)
 struct hid_parser {
 	struct hid_global     global;
 	struct hid_global     global_stack[HID_GLOBAL_STACK_SIZE];
-	unsigned              global_stack_ptr;
+	unsigned int          global_stack_ptr;
 	struct hid_local      local;
-	unsigned              collection_stack[HID_COLLECTION_STACK_SIZE];
-	unsigned              collection_stack_ptr;
+	unsigned int         *collection_stack;
+	unsigned int          collection_stack_ptr;
+	unsigned int          collection_stack_size;
 	struct hid_device    *device;
-	unsigned              scan_flags;
+	unsigned int          scan_flags;
 };
 
 struct hid_class_descriptor {
-- 
cgit v1.2.3


From 9afb719e7046c4f2462278862ab3db2961cc141c Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Thu, 5 Jul 2018 17:49:37 -0700
Subject: kbuild: Add build salt to the kernel and modules

In Fedora, the debug information is packaged separately (foo-debuginfo) and
can be installed separately. There's been a long standing issue where only
one version of a debuginfo info package can be installed at a time. There's
been an effort for Fedora for parallel debuginfo to rectify this problem.

Part of the requirement to allow parallel debuginfo to work is that build ids
are unique between builds. The existing upstream rpm implementation ensures
this by re-calculating the build-id using the version and release as a
seed. This doesn't work 100% for the kernel because of the vDSO which is
its own binary and doesn't get updated when embedded.

Fix this by adding some data in an ELF note for both the kernel and modules.
The data is controlled via a Kconfig option so distributions can set it
to an appropriate value to ensure uniqueness between builds.

Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/linux/build-salt.h | 20 ++++++++++++++++++++
 init/Kconfig               |  9 +++++++++
 init/version.c             |  3 +++
 scripts/mod/modpost.c      |  3 +++
 4 files changed, 35 insertions(+)
 create mode 100644 include/linux/build-salt.h

(limited to 'include/linux')

diff --git a/include/linux/build-salt.h b/include/linux/build-salt.h
new file mode 100644
index 000000000000..bb007bd05e7a
--- /dev/null
+++ b/include/linux/build-salt.h
@@ -0,0 +1,20 @@
+#ifndef __BUILD_SALT_H
+#define __BUILD_SALT_H
+
+#include <linux/elfnote.h>
+
+#define LINUX_ELFNOTE_BUILD_SALT       0x100
+
+#ifdef __ASSEMBLER__
+
+#define BUILD_SALT \
+       ELFNOTE(Linux, LINUX_ELFNOTE_BUILD_SALT, .asciz CONFIG_BUILD_SALT)
+
+#else
+
+#define BUILD_SALT \
+       ELFNOTE32("Linux", LINUX_ELFNOTE_BUILD_SALT, CONFIG_BUILD_SALT)
+
+#endif
+
+#endif /* __BUILD_SALT_H */
diff --git a/init/Kconfig b/init/Kconfig
index 041f3a022122..d39b31484c52 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -107,6 +107,15 @@ config LOCALVERSION_AUTO
 
 	  which is done within the script "scripts/setlocalversion".)
 
+config BUILD_SALT
+       string "Build ID Salt"
+       default ""
+       help
+          The build ID is used to link binaries and their debug info. Setting
+          this option will use the value in the calculation of the build id.
+          This is mostly useful for distributions which want to ensure the
+          build is unique between builds. It's safe to leave the default.
+
 config HAVE_KERNEL_GZIP
 	bool
 
diff --git a/init/version.c b/init/version.c
index bfb4e3f4955e..ef4012ec4375 100644
--- a/init/version.c
+++ b/init/version.c
@@ -7,6 +7,7 @@
  */
 
 #include <generated/compile.h>
+#include <linux/build-salt.h>
 #include <linux/export.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
@@ -49,3 +50,5 @@ const char linux_proc_banner[] =
 	"%s version %s"
 	" (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
 	" (" LINUX_COMPILER ") %s\n";
+
+BUILD_SALT;
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 1663fb19343a..dc6d714e4dcb 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2125,10 +2125,13 @@ static int check_modname_len(struct module *mod)
  **/
 static void add_header(struct buffer *b, struct module *mod)
 {
+	buf_printf(b, "#include <linux/build-salt.h>\n");
 	buf_printf(b, "#include <linux/module.h>\n");
 	buf_printf(b, "#include <linux/vermagic.h>\n");
 	buf_printf(b, "#include <linux/compiler.h>\n");
 	buf_printf(b, "\n");
+	buf_printf(b, "BUILD_SALT;\n");
+	buf_printf(b, "\n");
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
 	buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
 	buf_printf(b, "\n");
-- 
cgit v1.2.3


From 2fe31e43124040a67495fa81f7ac67bc4c09ebc8 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:10 +0200
Subject: hwmon: Add missing HWMON_T_LCRIT_ALARM define

The enum hwmon_temp_lcrit_alarm exists, but the BIT definition is
missing.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/hwmon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index e5fd2707b6df..1b74ad11a5a4 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -93,6 +93,7 @@ enum hwmon_temp_attributes {
 #define HWMON_T_MIN_ALARM	BIT(hwmon_temp_min_alarm)
 #define HWMON_T_MAX_ALARM	BIT(hwmon_temp_max_alarm)
 #define HWMON_T_CRIT_ALARM	BIT(hwmon_temp_crit_alarm)
+#define HWMON_T_LCRIT_ALARM	BIT(hwmon_temp_lcrit_alarm)
 #define HWMON_T_EMERGENCY_ALARM	BIT(hwmon_temp_emergency_alarm)
 #define HWMON_T_FAULT		BIT(hwmon_temp_fault)
 #define HWMON_T_OFFSET		BIT(hwmon_temp_offset)
-- 
cgit v1.2.3


From aa7f29b07c8702127124d0522e3cd46850cdbc41 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:11 +0200
Subject: hwmon: Add support for power min, lcrit, min_alarm and lcrit_alarm

Some sensors support reporting minimal and lower critical power, as
well as alarms when these thresholds are reached. Add support for
these attributes to the hwmon core.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/hwmon/hwmon.c | 4 ++++
 include/linux/hwmon.h | 8 ++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index e88c01961948..33d51281272b 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -394,12 +394,16 @@ static const char * const hwmon_power_attr_templates[] = {
 	[hwmon_power_cap_hyst] = "power%d_cap_hyst",
 	[hwmon_power_cap_max] = "power%d_cap_max",
 	[hwmon_power_cap_min] = "power%d_cap_min",
+	[hwmon_power_min] = "power%d_min",
 	[hwmon_power_max] = "power%d_max",
+	[hwmon_power_lcrit] = "power%d_lcrit",
 	[hwmon_power_crit] = "power%d_crit",
 	[hwmon_power_label] = "power%d_label",
 	[hwmon_power_alarm] = "power%d_alarm",
 	[hwmon_power_cap_alarm] = "power%d_cap_alarm",
+	[hwmon_power_min_alarm] = "power%d_min_alarm",
 	[hwmon_power_max_alarm] = "power%d_max_alarm",
+	[hwmon_power_lcrit_alarm] = "power%d_lcrit_alarm",
 	[hwmon_power_crit_alarm] = "power%d_crit_alarm",
 };
 
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 1b74ad11a5a4..b217101ca76e 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -188,12 +188,16 @@ enum hwmon_power_attributes {
 	hwmon_power_cap_hyst,
 	hwmon_power_cap_max,
 	hwmon_power_cap_min,
+	hwmon_power_min,
 	hwmon_power_max,
 	hwmon_power_crit,
+	hwmon_power_lcrit,
 	hwmon_power_label,
 	hwmon_power_alarm,
 	hwmon_power_cap_alarm,
+	hwmon_power_min_alarm,
 	hwmon_power_max_alarm,
+	hwmon_power_lcrit_alarm,
 	hwmon_power_crit_alarm,
 };
 
@@ -214,12 +218,16 @@ enum hwmon_power_attributes {
 #define HWMON_P_CAP_HYST		BIT(hwmon_power_cap_hyst)
 #define HWMON_P_CAP_MAX			BIT(hwmon_power_cap_max)
 #define HWMON_P_CAP_MIN			BIT(hwmon_power_cap_min)
+#define HWMON_P_MIN			BIT(hwmon_power_min)
 #define HWMON_P_MAX			BIT(hwmon_power_max)
+#define HWMON_P_LCRIT			BIT(hwmon_power_lcrit)
 #define HWMON_P_CRIT			BIT(hwmon_power_crit)
 #define HWMON_P_LABEL			BIT(hwmon_power_label)
 #define HWMON_P_ALARM			BIT(hwmon_power_alarm)
 #define HWMON_P_CAP_ALARM		BIT(hwmon_power_cap_alarm)
+#define HWMON_P_MIN_ALARM		BIT(hwmon_power_max_alarm)
 #define HWMON_P_MAX_ALARM		BIT(hwmon_power_max_alarm)
+#define HWMON_P_LCRIT_ALARM		BIT(hwmon_power_lcrit_alarm)
 #define HWMON_P_CRIT_ALARM		BIT(hwmon_power_crit_alarm)
 
 enum hwmon_energy_attributes {
-- 
cgit v1.2.3


From dcb5d0fcaa1ddd0af9c18ef51e6b05e38a6cec71 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:12 +0200
Subject: hwmon: Add helper to tell if a char is invalid in a name

HWMON device names are not allowed to contain "-* \t\n". Add a helper
which will return true if passed an invalid character. It can be used
to massage a string into a hwmon compatible name by replacing invalid
characters with '_'.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/hwmon.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index b217101ca76e..9493d4a388db 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -398,4 +398,27 @@ devm_hwmon_device_register_with_info(struct device *dev,
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
 
+/**
+ * hwmon_is_bad_char - Is the char invalid in a hwmon name
+ * @ch: the char to be considered
+ *
+ * hwmon_is_bad_char() can be used to determine if the given character
+ * may not be used in a hwmon name.
+ *
+ * Returns true if the char is invalid, false otherwise.
+ */
+static inline bool hwmon_is_bad_char(const char ch)
+{
+	switch (ch) {
+	case '-':
+	case '*':
+	case ' ':
+	case '\t':
+	case '\n':
+		return true;
+	default:
+		return false;
+	}
+}
+
 #endif
-- 
cgit v1.2.3


From 1323061a018a7514287894a552c4ec2a5f0cb0cd Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:13 +0200
Subject: net: phy: sfp: Add HWMON support for module sensors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SFP modules can contain a number of sensors. The EEPROM also contains
recommended alarm and critical values for each sensor, and indications
of if these have been exceeded. Export this information via
HWMON. Currently temperature, VCC, bias current, transmit power, and
possibly receiver power is supported.

The sensors in the modules can either return calibrate or uncalibrated
values. Uncalibrated values need to be manipulated, using coefficients
provided in the SFP EEPROM. Uncalibrated receive power values require
floating point maths in order to calibrate them. Performing this in
the kernel is hard. So if the SFP module indicates it uses
uncalibrated values, RX power is not made available.

With this hwmon device, it is possible to view the sensor values using
lm-sensors programs:

in0:          +3.29 V  (crit min =  +2.90 V, min =  +3.00 V)
                       (max =  +3.60 V, crit max =  +3.70 V)
temp1:        +33.0°C  (low  =  -5.0°C, high = +80.0°C)
                       (crit low = -10.0°C, crit = +85.0°C)
power1:      1000.00 nW (max = 794.00 uW, min =  50.00 uW)  ALARM (LCRIT)
                       (lcrit =  40.00 uW, crit = 1000.00 uW)
curr1:        +0.00 A  (crit min =  +0.00 A, min =  +0.00 A)  ALARM (LCRIT, MIN)
                       (max =  +0.01 A, crit max =  +0.01 A)

The scaling sensors performs on the bias current is not particularly
good. The raw values are more useful:

curr1:
  curr1_input: 0.000
  curr1_min: 0.002
  curr1_max: 0.010
  curr1_lcrit: 0.000
  curr1_crit: 0.011
  curr1_min_alarm: 1.000
  curr1_max_alarm: 0.000
  curr1_lcrit_alarm: 1.000
  curr1_crit_alarm: 0.000

In order to keep the I2C overhead to a minimum, the constant values,
such as limits and calibration coefficients are read once at module
insertion time. Thus only reading *_input and *_alarm properties
requires i2c read operations.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig |   1 +
 drivers/net/phy/sfp.c   | 727 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sfp.h     |  72 ++++-
 3 files changed, 799 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index ceede09a2845..9beac427f9e8 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -215,6 +215,7 @@ config SFP
 	tristate "SFP cage support"
 	depends on I2C && PHYLINK
 	select MDIO_I2C
+	imply HWMON
 
 config AMD_PHY
 	tristate "AMD PHYs"
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index c4c92db86dfa..5661226cf75b 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1,5 +1,7 @@
+#include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/hwmon.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
@@ -131,6 +133,12 @@ struct sfp {
 	unsigned int sm_retries;
 
 	struct sfp_eeprom_id id;
+#if IS_ENABLED(CONFIG_HWMON)
+	struct sfp_diag diag;
+	struct device *hwmon_dev;
+	char *hwmon_name;
+#endif
+
 };
 
 static bool sff_module_supported(const struct sfp_eeprom_id *id)
@@ -316,6 +324,719 @@ static unsigned int sfp_check(void *buf, size_t len)
 	return check;
 }
 
+/* hwmon */
+#if IS_ENABLED(CONFIG_HWMON)
+static umode_t sfp_hwmon_is_visible(const void *data,
+				    enum hwmon_sensor_types type,
+				    u32 attr, int channel)
+{
+	const struct sfp *sfp = data;
+
+	switch (type) {
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+		case hwmon_temp_min_alarm:
+		case hwmon_temp_max_alarm:
+		case hwmon_temp_lcrit_alarm:
+		case hwmon_temp_crit_alarm:
+		case hwmon_temp_min:
+		case hwmon_temp_max:
+		case hwmon_temp_lcrit:
+		case hwmon_temp_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_input:
+		case hwmon_in_min_alarm:
+		case hwmon_in_max_alarm:
+		case hwmon_in_lcrit_alarm:
+		case hwmon_in_crit_alarm:
+		case hwmon_in_min:
+		case hwmon_in_max:
+		case hwmon_in_lcrit:
+		case hwmon_in_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	case hwmon_curr:
+		switch (attr) {
+		case hwmon_curr_input:
+		case hwmon_curr_min_alarm:
+		case hwmon_curr_max_alarm:
+		case hwmon_curr_lcrit_alarm:
+		case hwmon_curr_crit_alarm:
+		case hwmon_curr_min:
+		case hwmon_curr_max:
+		case hwmon_curr_lcrit:
+		case hwmon_curr_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	case hwmon_power:
+		/* External calibration of receive power requires
+		 * floating point arithmetic. Doing that in the kernel
+		 * is not easy, so just skip it. If the module does
+		 * not require external calibration, we can however
+		 * show receiver power, since FP is then not needed.
+		 */
+		if (sfp->id.ext.diagmon & SFP_DIAGMON_EXT_CAL &&
+		    channel == 1)
+			return 0;
+		switch (attr) {
+		case hwmon_power_input:
+		case hwmon_power_min_alarm:
+		case hwmon_power_max_alarm:
+		case hwmon_power_lcrit_alarm:
+		case hwmon_power_crit_alarm:
+		case hwmon_power_min:
+		case hwmon_power_max:
+		case hwmon_power_lcrit:
+		case hwmon_power_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	default:
+		return 0;
+	}
+}
+
+static int sfp_hwmon_read_sensor(struct sfp *sfp, int reg, long *value)
+{
+	__be16 val;
+	int err;
+
+	err = sfp_read(sfp, true, reg, &val, sizeof(val));
+	if (err < 0)
+		return err;
+
+	*value = be16_to_cpu(val);
+
+	return 0;
+}
+
+static void sfp_hwmon_to_rx_power(long *value)
+{
+	*value = DIV_ROUND_CLOSEST(*value, 100);
+}
+
+static void sfp_hwmon_calibrate(struct sfp *sfp, unsigned int slope, int offset,
+				long *value)
+{
+	if (sfp->id.ext.diagmon & SFP_DIAGMON_EXT_CAL)
+		*value = DIV_ROUND_CLOSEST(*value * slope, 256) + offset;
+}
+
+static void sfp_hwmon_calibrate_temp(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_t_slope),
+			    be16_to_cpu(sfp->diag.cal_t_offset), value);
+
+	if (*value >= 0x8000)
+		*value -= 0x10000;
+
+	*value = DIV_ROUND_CLOSEST(*value * 1000, 256);
+}
+
+static void sfp_hwmon_calibrate_vcc(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_v_slope),
+			    be16_to_cpu(sfp->diag.cal_v_offset), value);
+
+	*value = DIV_ROUND_CLOSEST(*value, 10);
+}
+
+static void sfp_hwmon_calibrate_bias(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_txi_slope),
+			    be16_to_cpu(sfp->diag.cal_txi_offset), value);
+
+	*value = DIV_ROUND_CLOSEST(*value, 500);
+}
+
+static void sfp_hwmon_calibrate_tx_power(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_txpwr_slope),
+			    be16_to_cpu(sfp->diag.cal_txpwr_offset), value);
+
+	*value = DIV_ROUND_CLOSEST(*value, 10);
+}
+
+static int sfp_hwmon_read_temp(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_temp(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_vcc(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_vcc(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_bias(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_bias(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_tx_power(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_tx_power(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_rx_power(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_to_rx_power(value);
+
+	return 0;
+}
+
+static int sfp_hwmon_temp(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		return sfp_hwmon_read_temp(sfp, SFP_TEMP, value);
+
+	case hwmon_temp_lcrit:
+		*value = be16_to_cpu(sfp->diag.temp_low_alarm);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+
+	case hwmon_temp_min:
+		*value = be16_to_cpu(sfp->diag.temp_low_warn);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+	case hwmon_temp_max:
+		*value = be16_to_cpu(sfp->diag.temp_high_warn);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+
+	case hwmon_temp_crit:
+		*value = be16_to_cpu(sfp->diag.temp_high_alarm);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+
+	case hwmon_temp_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TEMP_LOW);
+		return 0;
+
+	case hwmon_temp_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TEMP_LOW);
+		return 0;
+
+	case hwmon_temp_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TEMP_HIGH);
+		return 0;
+
+	case hwmon_temp_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TEMP_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_vcc(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_in_input:
+		return sfp_hwmon_read_vcc(sfp, SFP_VCC, value);
+
+	case hwmon_in_lcrit:
+		*value = be16_to_cpu(sfp->diag.volt_low_alarm);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_min:
+		*value = be16_to_cpu(sfp->diag.volt_low_warn);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_max:
+		*value = be16_to_cpu(sfp->diag.volt_high_warn);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_crit:
+		*value = be16_to_cpu(sfp->diag.volt_high_alarm);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_VCC_LOW);
+		return 0;
+
+	case hwmon_in_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_VCC_LOW);
+		return 0;
+
+	case hwmon_in_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_VCC_HIGH);
+		return 0;
+
+	case hwmon_in_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_VCC_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_bias(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_curr_input:
+		return sfp_hwmon_read_bias(sfp, SFP_TX_BIAS, value);
+
+	case hwmon_curr_lcrit:
+		*value = be16_to_cpu(sfp->diag.bias_low_alarm);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_min:
+		*value = be16_to_cpu(sfp->diag.bias_low_warn);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_max:
+		*value = be16_to_cpu(sfp->diag.bias_high_warn);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_crit:
+		*value = be16_to_cpu(sfp->diag.bias_high_alarm);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TX_BIAS_LOW);
+		return 0;
+
+	case hwmon_curr_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TX_BIAS_LOW);
+		return 0;
+
+	case hwmon_curr_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TX_BIAS_HIGH);
+		return 0;
+
+	case hwmon_curr_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TX_BIAS_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_tx_power(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_power_input:
+		return sfp_hwmon_read_tx_power(sfp, SFP_TX_POWER, value);
+
+	case hwmon_power_lcrit:
+		*value = be16_to_cpu(sfp->diag.txpwr_low_alarm);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_min:
+		*value = be16_to_cpu(sfp->diag.txpwr_low_warn);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_max:
+		*value = be16_to_cpu(sfp->diag.txpwr_high_warn);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_crit:
+		*value = be16_to_cpu(sfp->diag.txpwr_high_alarm);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TXPWR_LOW);
+		return 0;
+
+	case hwmon_power_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TXPWR_LOW);
+		return 0;
+
+	case hwmon_power_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TXPWR_HIGH);
+		return 0;
+
+	case hwmon_power_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TXPWR_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_rx_power(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_power_input:
+		return sfp_hwmon_read_rx_power(sfp, SFP_RX_POWER, value);
+
+	case hwmon_power_lcrit:
+		*value = be16_to_cpu(sfp->diag.rxpwr_low_alarm);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_min:
+		*value = be16_to_cpu(sfp->diag.rxpwr_low_warn);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_max:
+		*value = be16_to_cpu(sfp->diag.rxpwr_high_warn);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_crit:
+		*value = be16_to_cpu(sfp->diag.rxpwr_high_alarm);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM1_RXPWR_LOW);
+		return 0;
+
+	case hwmon_power_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN1_RXPWR_LOW);
+		return 0;
+
+	case hwmon_power_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN1_RXPWR_HIGH);
+		return 0;
+
+	case hwmon_power_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM1_RXPWR_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			  u32 attr, int channel, long *value)
+{
+	struct sfp *sfp = dev_get_drvdata(dev);
+
+	switch (type) {
+	case hwmon_temp:
+		return sfp_hwmon_temp(sfp, attr, value);
+	case hwmon_in:
+		return sfp_hwmon_vcc(sfp, attr, value);
+	case hwmon_curr:
+		return sfp_hwmon_bias(sfp, attr, value);
+	case hwmon_power:
+		switch (channel) {
+		case 0:
+			return sfp_hwmon_tx_power(sfp, attr, value);
+		case 1:
+			return sfp_hwmon_rx_power(sfp, attr, value);
+		default:
+			return -EOPNOTSUPP;
+		}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_ops sfp_hwmon_ops = {
+	.is_visible = sfp_hwmon_is_visible,
+	.read = sfp_hwmon_read,
+};
+
+static u32 sfp_hwmon_chip_config[] = {
+	HWMON_C_REGISTER_TZ,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_chip = {
+	.type = hwmon_chip,
+	.config = sfp_hwmon_chip_config,
+};
+
+static u32 sfp_hwmon_temp_config[] = {
+	HWMON_T_INPUT |
+	HWMON_T_MAX | HWMON_T_MIN |
+	HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM |
+	HWMON_T_CRIT | HWMON_T_LCRIT |
+	HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_temp_channel_info = {
+	.type = hwmon_temp,
+	.config = sfp_hwmon_temp_config,
+};
+
+static u32 sfp_hwmon_vcc_config[] = {
+	HWMON_I_INPUT |
+	HWMON_I_MAX | HWMON_I_MIN |
+	HWMON_I_MAX_ALARM | HWMON_I_MIN_ALARM |
+	HWMON_I_CRIT | HWMON_I_LCRIT |
+	HWMON_I_CRIT_ALARM | HWMON_I_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_vcc_channel_info = {
+	.type = hwmon_in,
+	.config = sfp_hwmon_vcc_config,
+};
+
+static u32 sfp_hwmon_bias_config[] = {
+	HWMON_C_INPUT |
+	HWMON_C_MAX | HWMON_C_MIN |
+	HWMON_C_MAX_ALARM | HWMON_C_MIN_ALARM |
+	HWMON_C_CRIT | HWMON_C_LCRIT |
+	HWMON_C_CRIT_ALARM | HWMON_C_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_bias_channel_info = {
+	.type = hwmon_curr,
+	.config = sfp_hwmon_bias_config,
+};
+
+static u32 sfp_hwmon_power_config[] = {
+	/* Transmit power */
+	HWMON_P_INPUT |
+	HWMON_P_MAX | HWMON_P_MIN |
+	HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+	HWMON_P_CRIT | HWMON_P_LCRIT |
+	HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM,
+	/* Receive power */
+	HWMON_P_INPUT |
+	HWMON_P_MAX | HWMON_P_MIN |
+	HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+	HWMON_P_CRIT | HWMON_P_LCRIT |
+	HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_power_channel_info = {
+	.type = hwmon_power,
+	.config = sfp_hwmon_power_config,
+};
+
+static const struct hwmon_channel_info *sfp_hwmon_info[] = {
+	&sfp_hwmon_chip,
+	&sfp_hwmon_vcc_channel_info,
+	&sfp_hwmon_temp_channel_info,
+	&sfp_hwmon_bias_channel_info,
+	&sfp_hwmon_power_channel_info,
+	NULL,
+};
+
+static const struct hwmon_chip_info sfp_hwmon_chip_info = {
+	.ops = &sfp_hwmon_ops,
+	.info = sfp_hwmon_info,
+};
+
+static int sfp_hwmon_insert(struct sfp *sfp)
+{
+	int err, i;
+
+	if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE)
+		return 0;
+
+	if (!(sfp->id.ext.diagmon & SFP_DIAGMON_DDM))
+		return 0;
+
+	if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
+		/* This driver in general does not support address
+		 * change.
+		 */
+		return 0;
+
+	err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag));
+	if (err < 0)
+		return err;
+
+	sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL);
+	if (!sfp->hwmon_name)
+		return -ENODEV;
+
+	for (i = 0; sfp->hwmon_name[i]; i++)
+		if (hwmon_is_bad_char(sfp->hwmon_name[i]))
+			sfp->hwmon_name[i] = '_';
+
+	sfp->hwmon_dev = hwmon_device_register_with_info(sfp->dev,
+							 sfp->hwmon_name, sfp,
+							 &sfp_hwmon_chip_info,
+							 NULL);
+
+	return PTR_ERR_OR_ZERO(sfp->hwmon_dev);
+}
+
+static void sfp_hwmon_remove(struct sfp *sfp)
+{
+	hwmon_device_unregister(sfp->hwmon_dev);
+	kfree(sfp->hwmon_name);
+}
+#else
+static int sfp_hwmon_insert(struct sfp *sfp)
+{
+	return 0;
+}
+
+static void sfp_hwmon_remove(struct sfp *sfp)
+{
+}
+#endif
+
 /* Helpers */
 static void sfp_module_tx_disable(struct sfp *sfp)
 {
@@ -636,6 +1357,10 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		dev_warn(sfp->dev,
 			 "module address swap to access page 0xA2 is not supported.\n");
 
+	ret = sfp_hwmon_insert(sfp);
+	if (ret < 0)
+		return ret;
+
 	ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
 	if (ret < 0)
 		return ret;
@@ -647,6 +1372,8 @@ static void sfp_sm_mod_remove(struct sfp *sfp)
 {
 	sfp_module_remove(sfp->sfp_bus);
 
+	sfp_hwmon_remove(sfp);
+
 	if (sfp->mod_phy)
 		sfp_sm_phy_detach(sfp);
 
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index ebce9e24906a..d37518e89db2 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -231,6 +231,50 @@ struct sfp_eeprom_id {
 	struct sfp_eeprom_ext ext;
 } __packed;
 
+struct sfp_diag {
+	__be16 temp_high_alarm;
+	__be16 temp_low_alarm;
+	__be16 temp_high_warn;
+	__be16 temp_low_warn;
+	__be16 volt_high_alarm;
+	__be16 volt_low_alarm;
+	__be16 volt_high_warn;
+	__be16 volt_low_warn;
+	__be16 bias_high_alarm;
+	__be16 bias_low_alarm;
+	__be16 bias_high_warn;
+	__be16 bias_low_warn;
+	__be16 txpwr_high_alarm;
+	__be16 txpwr_low_alarm;
+	__be16 txpwr_high_warn;
+	__be16 txpwr_low_warn;
+	__be16 rxpwr_high_alarm;
+	__be16 rxpwr_low_alarm;
+	__be16 rxpwr_high_warn;
+	__be16 rxpwr_low_warn;
+	__be16 laser_temp_high_alarm;
+	__be16 laser_temp_low_alarm;
+	__be16 laser_temp_high_warn;
+	__be16 laser_temp_low_warn;
+	__be16 tec_cur_high_alarm;
+	__be16 tec_cur_low_alarm;
+	__be16 tec_cur_high_warn;
+	__be16 tec_cur_low_warn;
+	__be32 cal_rxpwr4;
+	__be32 cal_rxpwr3;
+	__be32 cal_rxpwr2;
+	__be32 cal_rxpwr1;
+	__be32 cal_rxpwr0;
+	__be16 cal_txi_slope;
+	__be16 cal_txi_offset;
+	__be16 cal_txpwr_slope;
+	__be16 cal_txpwr_offset;
+	__be16 cal_t_slope;
+	__be16 cal_t_offset;
+	__be16 cal_v_slope;
+	__be16 cal_v_offset;
+} __packed;
+
 /* SFP EEPROM registers */
 enum {
 	SFP_PHYS_ID			= 0x00,
@@ -384,7 +428,33 @@ enum {
 	SFP_TEC_CUR			= 0x6c,
 
 	SFP_STATUS			= 0x6e,
-	SFP_ALARM			= 0x70,
+	SFP_ALARM0			= 0x70,
+	SFP_ALARM0_TEMP_HIGH		= BIT(7),
+	SFP_ALARM0_TEMP_LOW		= BIT(6),
+	SFP_ALARM0_VCC_HIGH		= BIT(5),
+	SFP_ALARM0_VCC_LOW		= BIT(4),
+	SFP_ALARM0_TX_BIAS_HIGH		= BIT(3),
+	SFP_ALARM0_TX_BIAS_LOW		= BIT(2),
+	SFP_ALARM0_TXPWR_HIGH		= BIT(1),
+	SFP_ALARM0_TXPWR_LOW		= BIT(0),
+
+	SFP_ALARM1			= 0x71,
+	SFP_ALARM1_RXPWR_HIGH		= BIT(7),
+	SFP_ALARM1_RXPWR_LOW		= BIT(6),
+
+	SFP_WARN0			= 0x74,
+	SFP_WARN0_TEMP_HIGH		= BIT(7),
+	SFP_WARN0_TEMP_LOW		= BIT(6),
+	SFP_WARN0_VCC_HIGH		= BIT(5),
+	SFP_WARN0_VCC_LOW		= BIT(4),
+	SFP_WARN0_TX_BIAS_HIGH		= BIT(3),
+	SFP_WARN0_TX_BIAS_LOW		= BIT(2),
+	SFP_WARN0_TXPWR_HIGH		= BIT(1),
+	SFP_WARN0_TXPWR_LOW		= BIT(0),
+
+	SFP_WARN1			= 0x75,
+	SFP_WARN1_RXPWR_HIGH		= BIT(7),
+	SFP_WARN1_RXPWR_LOW		= BIT(6),
 
 	SFP_EXT_STATUS			= 0x76,
 	SFP_VSL				= 0x78,
-- 
cgit v1.2.3


From 753d433b586d1d43c487e3d660f5778c7c8d58ea Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Fri, 22 Jun 2018 09:15:32 +1000
Subject: random: Return nbytes filled from hw RNG

Currently the function get_random_bytes_arch() has return value 'void'.
If the hw RNG fails we currently fall back to using get_random_bytes().
This defeats the purpose of requesting random material from the hw RNG
in the first place.

There are currently no intree users of get_random_bytes_arch().

Only get random bytes from the hw RNG, make function return the number
of bytes retrieved from the hw RNG.

Acked-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Tobin C. Harding <me@tobin.cc>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 16 +++++++++-------
 include/linux/random.h |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 8e53e9515a1d..34ddfd57419b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1725,26 +1725,28 @@ EXPORT_SYMBOL(del_random_ready_callback);
  * key known by the NSA).  So it's useful if we need the speed, but
  * only if we're willing to trust the hardware manufacturer not to
  * have put in a back door.
+ *
+ * Return number of bytes filled in.
  */
-void get_random_bytes_arch(void *buf, int nbytes)
+int __must_check get_random_bytes_arch(void *buf, int nbytes)
 {
+	int left = nbytes;
 	char *p = buf;
 
-	trace_get_random_bytes_arch(nbytes, _RET_IP_);
-	while (nbytes) {
+	trace_get_random_bytes_arch(left, _RET_IP_);
+	while (left) {
 		unsigned long v;
-		int chunk = min(nbytes, (int)sizeof(unsigned long));
+		int chunk = min_t(int, left, sizeof(unsigned long));
 
 		if (!arch_get_random_long(&v))
 			break;
 
 		memcpy(p, &v, chunk);
 		p += chunk;
-		nbytes -= chunk;
+		left -= chunk;
 	}
 
-	if (nbytes)
-		get_random_bytes(p, nbytes);
+	return nbytes - left;
 }
 EXPORT_SYMBOL(get_random_bytes_arch);
 
diff --git a/include/linux/random.h b/include/linux/random.h
index 2ddf13b4281e..f1c9bc5cd231 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -38,7 +38,7 @@ extern void get_random_bytes(void *buf, int nbytes);
 extern int wait_for_random_bytes(void);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
-extern void get_random_bytes_arch(void *buf, int nbytes);
+extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
-- 
cgit v1.2.3


From 9ba546c01976a426292af99e682a557075d6c010 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 15:48:46 +0200
Subject: aio: don't expose __aio_sigset in uapi

glibc uses a different defintion of sigset_t than the kernel does,
and the current version would pull in both.  To fix this just do not
expose the type at all - this somewhat mirrors pselect() where we
do not even have a type for the magic sigmask argument, but just
use pointer arithmetics.

Fixes: 7a074e96 ("aio: implement io_pgetevents")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Adrian Reber <adrian@lisas.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c                     | 5 +++++
 include/linux/syscalls.h     | 1 +
 include/uapi/linux/aio_abi.h | 6 ------
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..b1a42e45698b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2042,6 +2042,11 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 	return ret;
 }
 
+struct __aio_sigset {
+	const sigset_t __user	*sigmask;
+	size_t		sigsetsize;
+};
+
 SYSCALL_DEFINE6(io_pgetevents,
 		aio_context_t, ctx_id,
 		long, min_nr,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 73810808cdf2..b06b5eeda8e8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -11,6 +11,7 @@
 #ifndef _LINUX_SYSCALLS_H
 #define _LINUX_SYSCALLS_H
 
+struct __aio_sigset;
 struct epoll_event;
 struct iattr;
 struct inode;
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index d00221345c19..ce43d340f010 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -29,7 +29,6 @@
 
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/signal.h>
 #include <asm/byteorder.h>
 
 typedef __kernel_ulong_t aio_context_t;
@@ -108,10 +107,5 @@ struct iocb {
 #undef IFBIG
 #undef IFLITTLE
 
-struct __aio_sigset {
-	const sigset_t __user	*sigmask;
-	size_t		sigsetsize;
-};
-
 #endif /* __LINUX__AIO_ABI_H */
 
-- 
cgit v1.2.3


From 181ace9e1b2e1c5c2be590bc9603baa65f3a16d8 Mon Sep 17 00:00:00 2001
From: Abhishek Sahu <absahu@codeaurora.org>
Date: Wed, 20 Jun 2018 12:57:28 +0530
Subject: mtd: rawnand: helper function for setting up ECC configuration

commit 2c8f8afa7f92 ("mtd: nand: add generic helpers to check,
match, maximize ECC settings") provides generic helpers which
drivers can use for setting up ECC parameters.

Since same board can have different ECC strength nand chips so
following is the logic for setting up ECC strength and ECC step
size, which can be used by most of the drivers.

1. If both ECC step size and ECC strength are already set
   (usually by DT) then just check whether this setting
   is supported by NAND controller.
2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength
   supported by NAND controller.
3. Otherwise, try to match the ECC step size and ECC strength closest
   to the chip's requirement. If available OOB size can't fit the chip
   requirement then select maximum ECC strength which can be fit with
   available OOB size.

This patch introduces nand_ecc_choose_conf function which calls the
required helper functions for the above logic. The drivers can use
this single function instead of calling the 3 helper functions
individually.

Signed-off-by: Abhishek Sahu <absahu@codeaurora.org>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/mtd/rawnand.h      |  3 +++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index dbf6e80e9ab5..2858d0b09c1d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -6295,6 +6295,39 @@ int nand_maximize_ecc(struct nand_chip *chip,
 }
 EXPORT_SYMBOL_GPL(nand_maximize_ecc);
 
+/**
+ * nand_ecc_choose_conf - Set the ECC strength and ECC step size
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * Choose the ECC configuration according to following logic
+ *
+ * 1. If both ECC step size and ECC strength are already set (usually by DT)
+ *    then check if it is supported by this controller.
+ * 2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength.
+ * 3. Otherwise, try to match the ECC step size and ECC strength closest
+ *    to the chip's requirement. If available OOB size can't fit the chip
+ *    requirement then fallback to the maximum ECC step size and ECC strength.
+ *
+ * On success, the chosen ECC settings are set.
+ */
+int nand_ecc_choose_conf(struct nand_chip *chip,
+			 const struct nand_ecc_caps *caps, int oobavail)
+{
+	if (chip->ecc.size && chip->ecc.strength)
+		return nand_check_ecc_caps(chip, caps, oobavail);
+
+	if (chip->ecc.options & NAND_ECC_MAXIMIZE)
+		return nand_maximize_ecc(chip, caps, oobavail);
+
+	if (!nand_match_ecc_req(chip, caps, oobavail))
+		return 0;
+
+	return nand_maximize_ecc(chip, caps, oobavail);
+}
+EXPORT_SYMBOL_GPL(nand_ecc_choose_conf);
+
 /*
  * Check if the chip configuration meet the datasheet requirements.
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index fbac4741da52..2ce305388471 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1672,6 +1672,9 @@ int nand_match_ecc_req(struct nand_chip *chip,
 int nand_maximize_ecc(struct nand_chip *chip,
 		      const struct nand_ecc_caps *caps, int oobavail);
 
+int nand_ecc_choose_conf(struct nand_chip *chip,
+			 const struct nand_ecc_caps *caps, int oobavail);
+
 /* Default write_oob implementation */
 int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 
-- 
cgit v1.2.3


From 0cf5c7dbaa392d11cfae532cedeaad0ac48b2165 Mon Sep 17 00:00:00 2001
From: Abhishek Sahu <absahu@codeaurora.org>
Date: Wed, 20 Jun 2018 12:57:42 +0530
Subject: mtd: rawnand: provide only single helper function for ECC conf

Function nand_ecc_choose_conf() will be help for all the cases, so
other helper functions can be made static.

nand_check_ecc_caps(): Invoke nand_ecc_choose_conf() with
                       both chip->ecc.size and chip->ecc.strength
                       value set.

nand_maximize_ecc(): Invoke nand_ecc_choose_conf() with
                     NAND_ECC_MAXIMIZE flag.

nand_match_ecc_req(): Invoke nand_ecc_choose_conf() with either
                      chip->ecc.size or chip->ecc.strength value
                      set and without NAND_ECC_MAXIMIZE flag.

CC: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Abhishek Sahu <absahu@codeaurora.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 39 +++++++++++++++------------------------
 include/linux/mtd/rawnand.h      |  9 ---------
 2 files changed, 15 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 2858d0b09c1d..faac82b1e058 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -6085,24 +6085,17 @@ static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
  * by the controller and the calculated ECC bytes fit within the chip's OOB.
  * On success, the calculated ECC bytes is set.
  */
-int nand_check_ecc_caps(struct nand_chip *chip,
-			const struct nand_ecc_caps *caps, int oobavail)
+static int
+nand_check_ecc_caps(struct nand_chip *chip,
+		    const struct nand_ecc_caps *caps, int oobavail)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_ecc_step_info *stepinfo;
 	int preset_step = chip->ecc.size;
 	int preset_strength = chip->ecc.strength;
-	int nsteps, ecc_bytes;
+	int ecc_bytes, nsteps = mtd->writesize / preset_step;
 	int i, j;
 
-	if (WARN_ON(oobavail < 0))
-		return -EINVAL;
-
-	if (!preset_step || !preset_strength)
-		return -ENODATA;
-
-	nsteps = mtd->writesize / preset_step;
-
 	for (i = 0; i < caps->nstepinfos; i++) {
 		stepinfo = &caps->stepinfos[i];
 
@@ -6135,7 +6128,6 @@ int nand_check_ecc_caps(struct nand_chip *chip,
 
 	return -ENOTSUPP;
 }
-EXPORT_SYMBOL_GPL(nand_check_ecc_caps);
 
 /**
  * nand_match_ecc_req - meet the chip's requirement with least ECC bytes
@@ -6147,8 +6139,9 @@ EXPORT_SYMBOL_GPL(nand_check_ecc_caps);
  * number of ECC bytes (i.e. with the largest number of OOB-free bytes).
  * On success, the chosen ECC settings are set.
  */
-int nand_match_ecc_req(struct nand_chip *chip,
-		       const struct nand_ecc_caps *caps, int oobavail)
+static int
+nand_match_ecc_req(struct nand_chip *chip,
+		   const struct nand_ecc_caps *caps, int oobavail)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_ecc_step_info *stepinfo;
@@ -6159,9 +6152,6 @@ int nand_match_ecc_req(struct nand_chip *chip,
 	int best_ecc_bytes_total = INT_MAX;
 	int i, j;
 
-	if (WARN_ON(oobavail < 0))
-		return -EINVAL;
-
 	/* No information provided by the NAND chip */
 	if (!req_step || !req_strength)
 		return -ENOTSUPP;
@@ -6220,7 +6210,6 @@ int nand_match_ecc_req(struct nand_chip *chip,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nand_match_ecc_req);
 
 /**
  * nand_maximize_ecc - choose the max ECC strength available
@@ -6231,8 +6220,9 @@ EXPORT_SYMBOL_GPL(nand_match_ecc_req);
  * Choose the max ECC strength that is supported on the controller, and can fit
  * within the chip's OOB.  On success, the chosen ECC settings are set.
  */
-int nand_maximize_ecc(struct nand_chip *chip,
-		      const struct nand_ecc_caps *caps, int oobavail)
+static int
+nand_maximize_ecc(struct nand_chip *chip,
+		  const struct nand_ecc_caps *caps, int oobavail)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_ecc_step_info *stepinfo;
@@ -6242,9 +6232,6 @@ int nand_maximize_ecc(struct nand_chip *chip,
 	int best_strength, best_ecc_bytes;
 	int i, j;
 
-	if (WARN_ON(oobavail < 0))
-		return -EINVAL;
-
 	for (i = 0; i < caps->nstepinfos; i++) {
 		stepinfo = &caps->stepinfos[i];
 		step_size = stepinfo->stepsize;
@@ -6293,7 +6280,6 @@ int nand_maximize_ecc(struct nand_chip *chip,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nand_maximize_ecc);
 
 /**
  * nand_ecc_choose_conf - Set the ECC strength and ECC step size
@@ -6315,6 +6301,11 @@ EXPORT_SYMBOL_GPL(nand_maximize_ecc);
 int nand_ecc_choose_conf(struct nand_chip *chip,
 			 const struct nand_ecc_caps *caps, int oobavail)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	if (WARN_ON(oobavail < 0 || oobavail > mtd->oobsize))
+		return -EINVAL;
+
 	if (chip->ecc.size && chip->ecc.strength)
 		return nand_check_ecc_caps(chip, caps, oobavail);
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 2ce305388471..0c6fb316b409 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1663,15 +1663,6 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
 				void *extraoob, int extraooblen,
 				int threshold);
 
-int nand_check_ecc_caps(struct nand_chip *chip,
-			const struct nand_ecc_caps *caps, int oobavail);
-
-int nand_match_ecc_req(struct nand_chip *chip,
-		       const struct nand_ecc_caps *caps,  int oobavail);
-
-int nand_maximize_ecc(struct nand_chip *chip,
-		      const struct nand_ecc_caps *caps, int oobavail);
-
 int nand_ecc_choose_conf(struct nand_chip *chip,
 			 const struct nand_ecc_caps *caps, int oobavail);
 
-- 
cgit v1.2.3


From 7529df4652482c33ae1a99ee8189401146f13cb7 Mon Sep 17 00:00:00 2001
From: Peter Pan <peterpandong@micron.com>
Date: Fri, 22 Jun 2018 14:28:23 +0200
Subject: mtd: nand: Add core infrastructure to support SPI NANDs

Add a SPI NAND framework based on the generic NAND framework and the
spi-mem infrastructure.

In its current state, this framework supports the following features:

- single/dual/quad IO modes
- on-die ECC

Signed-off-by: Peter Pan <peterpandong@micron.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/Kconfig      |    1 +
 drivers/mtd/nand/Makefile     |    1 +
 drivers/mtd/nand/spi/Kconfig  |    7 +
 drivers/mtd/nand/spi/Makefile |    3 +
 drivers/mtd/nand/spi/core.c   | 1136 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h   |  416 +++++++++++++++
 include/linux/spi/spi-mem.h   |    4 +-
 7 files changed, 1567 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/Kconfig
 create mode 100644 drivers/mtd/nand/spi/Makefile
 create mode 100644 drivers/mtd/nand/spi/core.c
 create mode 100644 include/linux/mtd/spinand.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 88c7d3b4ff8b..9033215e62ea 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -4,3 +4,4 @@ config MTD_NAND_CORE
 source "drivers/mtd/nand/onenand/Kconfig"
 
 source "drivers/mtd/nand/raw/Kconfig"
+source "drivers/mtd/nand/spi/Kconfig"
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 3f0cb87f1a57..7ecd80c0a66e 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
 
 obj-y	+= onenand/
 obj-y	+= raw/
+obj-y	+= spi/
diff --git a/drivers/mtd/nand/spi/Kconfig b/drivers/mtd/nand/spi/Kconfig
new file mode 100644
index 000000000000..7c37d2929b68
--- /dev/null
+++ b/drivers/mtd/nand/spi/Kconfig
@@ -0,0 +1,7 @@
+menuconfig MTD_SPI_NAND
+	tristate "SPI NAND device Support"
+	select MTD_NAND_CORE
+	depends on SPI_MASTER
+	select SPI_MEM
+	help
+	  This is the framework for the SPI NAND device drivers.
diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
new file mode 100644
index 000000000000..2c473b765027
--- /dev/null
+++ b/drivers/mtd/nand/spi/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+spinand-objs := core.o
+obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
new file mode 100644
index 000000000000..4efbeb0d85b4
--- /dev/null
+++ b/drivers/mtd/nand/spi/core.c
@@ -0,0 +1,1136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016-2017 Micron Technology, Inc.
+ *
+ * Authors:
+ *	Peter Pan <peterpandong@micron.com>
+ *	Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#define pr_fmt(fmt)	"spi-nand: " fmt
+
+#include <linux/device.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mtd/spinand.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+static void spinand_cache_op_adjust_colum(struct spinand_device *spinand,
+					  const struct nand_page_io_req *req,
+					  u16 *column)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int shift;
+
+	if (nand->memorg.planes_per_lun < 2)
+		return;
+
+	/* The plane number is passed in MSB just above the column address */
+	shift = fls(nand->memorg.pagesize);
+	*column |= req->pos.plane << shift;
+}
+
+static int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val)
+{
+	struct spi_mem_op op = SPINAND_GET_FEATURE_OP(reg,
+						      spinand->scratchbuf);
+	int ret;
+
+	ret = spi_mem_exec_op(spinand->spimem, &op);
+	if (ret)
+		return ret;
+
+	*val = *spinand->scratchbuf;
+	return 0;
+}
+
+static int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val)
+{
+	struct spi_mem_op op = SPINAND_SET_FEATURE_OP(reg,
+						      spinand->scratchbuf);
+
+	*spinand->scratchbuf = val;
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_read_status(struct spinand_device *spinand, u8 *status)
+{
+	return spinand_read_reg_op(spinand, REG_STATUS, status);
+}
+
+static int spinand_get_cfg(struct spinand_device *spinand, u8 *cfg)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+
+	if (WARN_ON(spinand->cur_target < 0 ||
+		    spinand->cur_target >= nand->memorg.ntargets))
+		return -EINVAL;
+
+	*cfg = spinand->cfg_cache[spinand->cur_target];
+	return 0;
+}
+
+static int spinand_set_cfg(struct spinand_device *spinand, u8 cfg)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	int ret;
+
+	if (WARN_ON(spinand->cur_target < 0 ||
+		    spinand->cur_target >= nand->memorg.ntargets))
+		return -EINVAL;
+
+	if (spinand->cfg_cache[spinand->cur_target] == cfg)
+		return 0;
+
+	ret = spinand_write_reg_op(spinand, REG_CFG, cfg);
+	if (ret)
+		return ret;
+
+	spinand->cfg_cache[spinand->cur_target] = cfg;
+	return 0;
+}
+
+/**
+ * spinand_upd_cfg() - Update the configuration register
+ * @spinand: the spinand device
+ * @mask: the mask encoding the bits to update in the config reg
+ * @val: the new value to apply
+ *
+ * Update the configuration register.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val)
+{
+	int ret;
+	u8 cfg;
+
+	ret = spinand_get_cfg(spinand, &cfg);
+	if (ret)
+		return ret;
+
+	cfg &= ~mask;
+	cfg |= val;
+
+	return spinand_set_cfg(spinand, cfg);
+}
+
+/**
+ * spinand_select_target() - Select a specific NAND target/die
+ * @spinand: the spinand device
+ * @target: the target/die to select
+ *
+ * Select a new target/die. If chip only has one die, this function is a NOOP.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int spinand_select_target(struct spinand_device *spinand, unsigned int target)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	int ret;
+
+	if (WARN_ON(target >= nand->memorg.ntargets))
+		return -EINVAL;
+
+	if (spinand->cur_target == target)
+		return 0;
+
+	if (nand->memorg.ntargets == 1) {
+		spinand->cur_target = target;
+		return 0;
+	}
+
+	ret = spinand->select_target(spinand, target);
+	if (ret)
+		return ret;
+
+	spinand->cur_target = target;
+	return 0;
+}
+
+static int spinand_init_cfg_cache(struct spinand_device *spinand)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	struct device *dev = &spinand->spimem->spi->dev;
+	unsigned int target;
+	int ret;
+
+	spinand->cfg_cache = devm_kcalloc(dev,
+					  nand->memorg.ntargets,
+					  sizeof(*spinand->cfg_cache),
+					  GFP_KERNEL);
+	if (!spinand->cfg_cache)
+		return -ENOMEM;
+
+	for (target = 0; target < nand->memorg.ntargets; target++) {
+		ret = spinand_select_target(spinand, target);
+		if (ret)
+			return ret;
+
+		/*
+		 * We use spinand_read_reg_op() instead of spinand_get_cfg()
+		 * here to bypass the config cache.
+		 */
+		ret = spinand_read_reg_op(spinand, REG_CFG,
+					  &spinand->cfg_cache[target]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int spinand_init_quad_enable(struct spinand_device *spinand)
+{
+	bool enable = false;
+
+	if (!(spinand->flags & SPINAND_HAS_QE_BIT))
+		return 0;
+
+	if (spinand->op_templates.read_cache->data.buswidth == 4 ||
+	    spinand->op_templates.write_cache->data.buswidth == 4 ||
+	    spinand->op_templates.update_cache->data.buswidth == 4)
+		enable = true;
+
+	return spinand_upd_cfg(spinand, CFG_QUAD_ENABLE,
+			       enable ? CFG_QUAD_ENABLE : 0);
+}
+
+static int spinand_ecc_enable(struct spinand_device *spinand,
+			      bool enable)
+{
+	return spinand_upd_cfg(spinand, CFG_ECC_ENABLE,
+			       enable ? CFG_ECC_ENABLE : 0);
+}
+
+static int spinand_write_enable_op(struct spinand_device *spinand)
+{
+	struct spi_mem_op op = SPINAND_WR_EN_DIS_OP(true);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_load_page_op(struct spinand_device *spinand,
+				const struct nand_page_io_req *req)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int row = nanddev_pos_to_row(nand, &req->pos);
+	struct spi_mem_op op = SPINAND_PAGE_READ_OP(row);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_read_from_cache_op(struct spinand_device *spinand,
+				      const struct nand_page_io_req *req)
+{
+	struct spi_mem_op op = *spinand->op_templates.read_cache;
+	struct nand_device *nand = spinand_to_nand(spinand);
+	struct mtd_info *mtd = nanddev_to_mtd(nand);
+	struct nand_page_io_req adjreq = *req;
+	unsigned int nbytes = 0;
+	void *buf = NULL;
+	u16 column = 0;
+	int ret;
+
+	if (req->datalen) {
+		adjreq.datalen = nanddev_page_size(nand);
+		adjreq.dataoffs = 0;
+		adjreq.databuf.in = spinand->databuf;
+		buf = spinand->databuf;
+		nbytes = adjreq.datalen;
+	}
+
+	if (req->ooblen) {
+		adjreq.ooblen = nanddev_per_page_oobsize(nand);
+		adjreq.ooboffs = 0;
+		adjreq.oobbuf.in = spinand->oobbuf;
+		nbytes += nanddev_per_page_oobsize(nand);
+		if (!buf) {
+			buf = spinand->oobbuf;
+			column = nanddev_page_size(nand);
+		}
+	}
+
+	spinand_cache_op_adjust_colum(spinand, &adjreq, &column);
+	op.addr.val = column;
+
+	/*
+	 * Some controllers are limited in term of max RX data size. In this
+	 * case, just repeat the READ_CACHE operation after updating the
+	 * column.
+	 */
+	while (nbytes) {
+		op.data.buf.in = buf;
+		op.data.nbytes = nbytes;
+		ret = spi_mem_adjust_op_size(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		ret = spi_mem_exec_op(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		buf += op.data.nbytes;
+		nbytes -= op.data.nbytes;
+		op.addr.val += op.data.nbytes;
+	}
+
+	if (req->datalen)
+		memcpy(req->databuf.in, spinand->databuf + req->dataoffs,
+		       req->datalen);
+
+	if (req->ooblen) {
+		if (req->mode == MTD_OPS_AUTO_OOB)
+			mtd_ooblayout_get_databytes(mtd, req->oobbuf.in,
+						    spinand->oobbuf,
+						    req->ooboffs,
+						    req->ooblen);
+		else
+			memcpy(req->oobbuf.in, spinand->oobbuf + req->ooboffs,
+			       req->ooblen);
+	}
+
+	return 0;
+}
+
+static int spinand_write_to_cache_op(struct spinand_device *spinand,
+				     const struct nand_page_io_req *req)
+{
+	struct spi_mem_op op = *spinand->op_templates.write_cache;
+	struct nand_device *nand = spinand_to_nand(spinand);
+	struct mtd_info *mtd = nanddev_to_mtd(nand);
+	struct nand_page_io_req adjreq = *req;
+	unsigned int nbytes = 0;
+	void *buf = NULL;
+	u16 column = 0;
+	int ret;
+
+	memset(spinand->databuf, 0xff,
+	       nanddev_page_size(nand) +
+	       nanddev_per_page_oobsize(nand));
+
+	if (req->datalen) {
+		memcpy(spinand->databuf + req->dataoffs, req->databuf.out,
+		       req->datalen);
+		adjreq.dataoffs = 0;
+		adjreq.datalen = nanddev_page_size(nand);
+		adjreq.databuf.out = spinand->databuf;
+		nbytes = adjreq.datalen;
+		buf = spinand->databuf;
+	}
+
+	if (req->ooblen) {
+		if (req->mode == MTD_OPS_AUTO_OOB)
+			mtd_ooblayout_set_databytes(mtd, req->oobbuf.out,
+						    spinand->oobbuf,
+						    req->ooboffs,
+						    req->ooblen);
+		else
+			memcpy(spinand->oobbuf + req->ooboffs, req->oobbuf.out,
+			       req->ooblen);
+
+		adjreq.ooblen = nanddev_per_page_oobsize(nand);
+		adjreq.ooboffs = 0;
+		nbytes += nanddev_per_page_oobsize(nand);
+		if (!buf) {
+			buf = spinand->oobbuf;
+			column = nanddev_page_size(nand);
+		}
+	}
+
+	spinand_cache_op_adjust_colum(spinand, &adjreq, &column);
+
+	op = *spinand->op_templates.write_cache;
+	op.addr.val = column;
+
+	/*
+	 * Some controllers are limited in term of max TX data size. In this
+	 * case, split the operation into one LOAD CACHE and one or more
+	 * LOAD RANDOM CACHE.
+	 */
+	while (nbytes) {
+		op.data.buf.out = buf;
+		op.data.nbytes = nbytes;
+
+		ret = spi_mem_adjust_op_size(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		ret = spi_mem_exec_op(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		buf += op.data.nbytes;
+		nbytes -= op.data.nbytes;
+		op.addr.val += op.data.nbytes;
+
+		/*
+		 * We need to use the RANDOM LOAD CACHE operation if there's
+		 * more than one iteration, because the LOAD operation resets
+		 * the cache to 0xff.
+		 */
+		if (nbytes) {
+			column = op.addr.val;
+			op = *spinand->op_templates.update_cache;
+			op.addr.val = column;
+		}
+	}
+
+	return 0;
+}
+
+static int spinand_program_op(struct spinand_device *spinand,
+			      const struct nand_page_io_req *req)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int row = nanddev_pos_to_row(nand, &req->pos);
+	struct spi_mem_op op = SPINAND_PROG_EXEC_OP(row);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_erase_op(struct spinand_device *spinand,
+			    const struct nand_pos *pos)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int row = nanddev_pos_to_row(nand, pos);
+	struct spi_mem_op op = SPINAND_BLK_ERASE_OP(row);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_wait(struct spinand_device *spinand, u8 *s)
+{
+	unsigned long timeo =  jiffies + msecs_to_jiffies(400);
+	u8 status;
+	int ret;
+
+	do {
+		ret = spinand_read_status(spinand, &status);
+		if (ret)
+			return ret;
+
+		if (!(status & STATUS_BUSY))
+			goto out;
+	} while (time_before(jiffies, timeo));
+
+	/*
+	 * Extra read, just in case the STATUS_READY bit has changed
+	 * since our last check
+	 */
+	ret = spinand_read_status(spinand, &status);
+	if (ret)
+		return ret;
+
+out:
+	if (s)
+		*s = status;
+
+	return status & STATUS_BUSY ? -ETIMEDOUT : 0;
+}
+
+static int spinand_read_id_op(struct spinand_device *spinand, u8 *buf)
+{
+	struct spi_mem_op op = SPINAND_READID_OP(0, spinand->scratchbuf,
+						 SPINAND_MAX_ID_LEN);
+	int ret;
+
+	ret = spi_mem_exec_op(spinand->spimem, &op);
+	if (!ret)
+		memcpy(buf, spinand->scratchbuf, SPINAND_MAX_ID_LEN);
+
+	return ret;
+}
+
+static int spinand_reset_op(struct spinand_device *spinand)
+{
+	struct spi_mem_op op = SPINAND_RESET_OP;
+	int ret;
+
+	ret = spi_mem_exec_op(spinand->spimem, &op);
+	if (ret)
+		return ret;
+
+	return spinand_wait(spinand, NULL);
+}
+
+static int spinand_lock_block(struct spinand_device *spinand, u8 lock)
+{
+	return spinand_write_reg_op(spinand, REG_BLOCK_LOCK, lock);
+}
+
+static int spinand_check_ecc_status(struct spinand_device *spinand, u8 status)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+
+	if (spinand->eccinfo.get_status)
+		return spinand->eccinfo.get_status(spinand, status);
+
+	switch (status & STATUS_ECC_MASK) {
+	case STATUS_ECC_NO_BITFLIPS:
+		return 0;
+
+	case STATUS_ECC_HAS_BITFLIPS:
+		/*
+		 * We have no way to know exactly how many bitflips have been
+		 * fixed, so let's return the maximum possible value so that
+		 * wear-leveling layers move the data immediately.
+		 */
+		return nand->eccreq.strength;
+
+	case STATUS_ECC_UNCOR_ERROR:
+		return -EBADMSG;
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int spinand_read_page(struct spinand_device *spinand,
+			     const struct nand_page_io_req *req,
+			     bool ecc_enabled)
+{
+	u8 status;
+	int ret;
+
+	ret = spinand_load_page_op(spinand, req);
+	if (ret)
+		return ret;
+
+	ret = spinand_wait(spinand, &status);
+	if (ret < 0)
+		return ret;
+
+	ret = spinand_read_from_cache_op(spinand, req);
+	if (ret)
+		return ret;
+
+	if (!ecc_enabled)
+		return 0;
+
+	return spinand_check_ecc_status(spinand, status);
+}
+
+static int spinand_write_page(struct spinand_device *spinand,
+			      const struct nand_page_io_req *req)
+{
+	u8 status;
+	int ret;
+
+	ret = spinand_write_enable_op(spinand);
+	if (ret)
+		return ret;
+
+	ret = spinand_write_to_cache_op(spinand, req);
+	if (ret)
+		return ret;
+
+	ret = spinand_program_op(spinand, req);
+	if (ret)
+		return ret;
+
+	ret = spinand_wait(spinand, &status);
+	if (!ret && (status & STATUS_PROG_FAILED))
+		ret = -EIO;
+
+	return ret;
+}
+
+static int spinand_mtd_read(struct mtd_info *mtd, loff_t from,
+			    struct mtd_oob_ops *ops)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	unsigned int max_bitflips = 0;
+	struct nand_io_iter iter;
+	bool enable_ecc = false;
+	bool ecc_failed = false;
+	int ret = 0;
+
+	if (ops->mode != MTD_OPS_RAW && spinand->eccinfo.ooblayout)
+		enable_ecc = true;
+
+	mutex_lock(&spinand->lock);
+
+	nanddev_io_for_each_page(nand, from, ops, &iter) {
+		ret = spinand_select_target(spinand, iter.req.pos.target);
+		if (ret)
+			break;
+
+		ret = spinand_ecc_enable(spinand, enable_ecc);
+		if (ret)
+			break;
+
+		ret = spinand_read_page(spinand, &iter.req, enable_ecc);
+		if (ret < 0 && ret != -EBADMSG)
+			break;
+
+		if (ret == -EBADMSG) {
+			ecc_failed = true;
+			mtd->ecc_stats.failed++;
+			ret = 0;
+		} else {
+			mtd->ecc_stats.corrected += ret;
+			max_bitflips = max_t(unsigned int, max_bitflips, ret);
+		}
+
+		ops->retlen += iter.req.datalen;
+		ops->oobretlen += iter.req.ooblen;
+	}
+
+	mutex_unlock(&spinand->lock);
+
+	if (ecc_failed && !ret)
+		ret = -EBADMSG;
+
+	return ret ? ret : max_bitflips;
+}
+
+static int spinand_mtd_write(struct mtd_info *mtd, loff_t to,
+			     struct mtd_oob_ops *ops)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct nand_io_iter iter;
+	bool enable_ecc = false;
+	int ret = 0;
+
+	if (ops->mode != MTD_OPS_RAW && mtd->ooblayout)
+		enable_ecc = true;
+
+	mutex_lock(&spinand->lock);
+
+	nanddev_io_for_each_page(nand, to, ops, &iter) {
+		ret = spinand_select_target(spinand, iter.req.pos.target);
+		if (ret)
+			break;
+
+		ret = spinand_ecc_enable(spinand, enable_ecc);
+		if (ret)
+			break;
+
+		ret = spinand_write_page(spinand, &iter.req);
+		if (ret)
+			break;
+
+		ops->retlen += iter.req.datalen;
+		ops->oobretlen += iter.req.ooblen;
+	}
+
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static bool spinand_isbad(struct nand_device *nand, const struct nand_pos *pos)
+{
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_page_io_req req = {
+		.pos = *pos,
+		.ooblen = 2,
+		.ooboffs = 0,
+		.oobbuf.in = spinand->oobbuf,
+		.mode = MTD_OPS_RAW,
+	};
+
+	memset(spinand->oobbuf, 0, 2);
+	spinand_select_target(spinand, pos->target);
+	spinand_read_page(spinand, &req, false);
+	if (spinand->oobbuf[0] != 0xff || spinand->oobbuf[1] != 0xff)
+		return true;
+
+	return false;
+}
+
+static int spinand_mtd_block_isbad(struct mtd_info *mtd, loff_t offs)
+{
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_pos pos;
+	int ret;
+
+	nanddev_offs_to_pos(nand, offs, &pos);
+	mutex_lock(&spinand->lock);
+	ret = nanddev_isbad(nand, &pos);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static int spinand_markbad(struct nand_device *nand, const struct nand_pos *pos)
+{
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_page_io_req req = {
+		.pos = *pos,
+		.ooboffs = 0,
+		.ooblen = 2,
+		.oobbuf.out = spinand->oobbuf,
+	};
+	int ret;
+
+	/* Erase block before marking it bad. */
+	ret = spinand_select_target(spinand, pos->target);
+	if (ret)
+		return ret;
+
+	ret = spinand_write_enable_op(spinand);
+	if (ret)
+		return ret;
+
+	spinand_erase_op(spinand, pos);
+
+	memset(spinand->oobbuf, 0, 2);
+	return spinand_write_page(spinand, &req);
+}
+
+static int spinand_mtd_block_markbad(struct mtd_info *mtd, loff_t offs)
+{
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_pos pos;
+	int ret;
+
+	nanddev_offs_to_pos(nand, offs, &pos);
+	mutex_lock(&spinand->lock);
+	ret = nanddev_markbad(nand, &pos);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos)
+{
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	u8 status;
+	int ret;
+
+	ret = spinand_select_target(spinand, pos->target);
+	if (ret)
+		return ret;
+
+	ret = spinand_write_enable_op(spinand);
+	if (ret)
+		return ret;
+
+	ret = spinand_erase_op(spinand, pos);
+	if (ret)
+		return ret;
+
+	ret = spinand_wait(spinand, &status);
+	if (!ret && (status & STATUS_ERASE_FAILED))
+		ret = -EIO;
+
+	return ret;
+}
+
+static int spinand_mtd_erase(struct mtd_info *mtd,
+			     struct erase_info *einfo)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	int ret;
+
+	mutex_lock(&spinand->lock);
+	ret = nanddev_mtd_erase(mtd, einfo);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static int spinand_mtd_block_isreserved(struct mtd_info *mtd, loff_t offs)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct nand_pos pos;
+	int ret;
+
+	nanddev_offs_to_pos(nand, offs, &pos);
+	mutex_lock(&spinand->lock);
+	ret = nanddev_isreserved(nand, &pos);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static const struct nand_ops spinand_ops = {
+	.erase = spinand_erase,
+	.markbad = spinand_markbad,
+	.isbad = spinand_isbad,
+};
+
+static int spinand_manufacturer_detect(struct spinand_device *spinand)
+{
+	return -ENOTSUPP;
+}
+
+static int spinand_manufacturer_init(struct spinand_device *spinand)
+{
+	if (spinand->manufacturer->ops->init)
+		return spinand->manufacturer->ops->init(spinand);
+
+	return 0;
+}
+
+static void spinand_manufacturer_cleanup(struct spinand_device *spinand)
+{
+	/* Release manufacturer private data */
+	if (spinand->manufacturer->ops->cleanup)
+		return spinand->manufacturer->ops->cleanup(spinand);
+}
+
+static const struct spi_mem_op *
+spinand_select_op_variant(struct spinand_device *spinand,
+			  const struct spinand_op_variants *variants)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int i;
+
+	for (i = 0; i < variants->nops; i++) {
+		struct spi_mem_op op = variants->ops[i];
+		unsigned int nbytes;
+		int ret;
+
+		nbytes = nanddev_per_page_oobsize(nand) +
+			 nanddev_page_size(nand);
+
+		while (nbytes) {
+			op.data.nbytes = nbytes;
+			ret = spi_mem_adjust_op_size(spinand->spimem, &op);
+			if (ret)
+				break;
+
+			if (!spi_mem_supports_op(spinand->spimem, &op))
+				break;
+
+			nbytes -= op.data.nbytes;
+		}
+
+		if (!nbytes)
+			return &variants->ops[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * spinand_match_and_init() - Try to find a match between a device ID and an
+ *			      entry in a spinand_info table
+ * @spinand: SPI NAND object
+ * @table: SPI NAND device description table
+ * @table_size: size of the device description table
+ *
+ * Should be used by SPI NAND manufacturer drivers when they want to find a
+ * match between a device ID retrieved through the READ_ID command and an
+ * entry in the SPI NAND description table. If a match is found, the spinand
+ * object will be initialized with information provided by the matching
+ * spinand_info entry.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int spinand_match_and_init(struct spinand_device *spinand,
+			   const struct spinand_info *table,
+			   unsigned int table_size, u8 devid)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int i;
+
+	for (i = 0; i < table_size; i++) {
+		const struct spinand_info *info = &table[i];
+		const struct spi_mem_op *op;
+
+		if (devid != info->devid)
+			continue;
+
+		nand->memorg = table[i].memorg;
+		nand->eccreq = table[i].eccreq;
+		spinand->eccinfo = table[i].eccinfo;
+		spinand->flags = table[i].flags;
+		spinand->select_target = table[i].select_target;
+
+		op = spinand_select_op_variant(spinand,
+					       info->op_variants.read_cache);
+		if (!op)
+			return -ENOTSUPP;
+
+		spinand->op_templates.read_cache = op;
+
+		op = spinand_select_op_variant(spinand,
+					       info->op_variants.write_cache);
+		if (!op)
+			return -ENOTSUPP;
+
+		spinand->op_templates.write_cache = op;
+
+		op = spinand_select_op_variant(spinand,
+					       info->op_variants.update_cache);
+		spinand->op_templates.update_cache = op;
+
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static int spinand_detect(struct spinand_device *spinand)
+{
+	struct device *dev = &spinand->spimem->spi->dev;
+	struct nand_device *nand = spinand_to_nand(spinand);
+	int ret;
+
+	ret = spinand_reset_op(spinand);
+	if (ret)
+		return ret;
+
+	ret = spinand_read_id_op(spinand, spinand->id.data);
+	if (ret)
+		return ret;
+
+	spinand->id.len = SPINAND_MAX_ID_LEN;
+
+	ret = spinand_manufacturer_detect(spinand);
+	if (ret) {
+		dev_err(dev, "unknown raw ID %*phN\n", SPINAND_MAX_ID_LEN,
+			spinand->id.data);
+		return ret;
+	}
+
+	if (nand->memorg.ntargets > 1 && !spinand->select_target) {
+		dev_err(dev,
+			"SPI NANDs with more than one die must implement ->select_target()\n");
+		return -EINVAL;
+	}
+
+	dev_info(&spinand->spimem->spi->dev,
+		 "%s SPI NAND was found.\n", spinand->manufacturer->name);
+	dev_info(&spinand->spimem->spi->dev,
+		 "%llu MiB, block size: %zu KiB, page size: %zu, OOB size: %u\n",
+		 nanddev_size(nand) >> 20, nanddev_eraseblock_size(nand) >> 10,
+		 nanddev_page_size(nand), nanddev_per_page_oobsize(nand));
+
+	return 0;
+}
+
+static int spinand_noecc_ooblayout_ecc(struct mtd_info *mtd, int section,
+				       struct mtd_oob_region *region)
+{
+	return -ERANGE;
+}
+
+static int spinand_noecc_ooblayout_free(struct mtd_info *mtd, int section,
+					struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	/* Reserve 2 bytes for the BBM. */
+	region->offset = 2;
+	region->length = 62;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops spinand_noecc_ooblayout = {
+	.ecc = spinand_noecc_ooblayout_ecc,
+	.free = spinand_noecc_ooblayout_free,
+};
+
+static int spinand_init(struct spinand_device *spinand)
+{
+	struct device *dev = &spinand->spimem->spi->dev;
+	struct mtd_info *mtd = spinand_to_mtd(spinand);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	int ret, i;
+
+	/*
+	 * We need a scratch buffer because the spi_mem interface requires that
+	 * buf passed in spi_mem_op->data.buf be DMA-able.
+	 */
+	spinand->scratchbuf = kzalloc(SPINAND_MAX_ID_LEN, GFP_KERNEL);
+	if (!spinand->scratchbuf)
+		return -ENOMEM;
+
+	ret = spinand_detect(spinand);
+	if (ret)
+		goto err_free_bufs;
+
+	/*
+	 * Use kzalloc() instead of devm_kzalloc() here, because some drivers
+	 * may use this buffer for DMA access.
+	 * Memory allocated by devm_ does not guarantee DMA-safe alignment.
+	 */
+	spinand->databuf = kzalloc(nanddev_page_size(nand) +
+			       nanddev_per_page_oobsize(nand),
+			       GFP_KERNEL);
+	if (!spinand->databuf) {
+		ret = -ENOMEM;
+		goto err_free_bufs;
+	}
+
+	spinand->oobbuf = spinand->databuf + nanddev_page_size(nand);
+
+	ret = spinand_init_cfg_cache(spinand);
+	if (ret)
+		goto err_free_bufs;
+
+	ret = spinand_init_quad_enable(spinand);
+	if (ret)
+		goto err_free_bufs;
+
+	ret = spinand_upd_cfg(spinand, CFG_OTP_ENABLE, 0);
+	if (ret)
+		goto err_free_bufs;
+
+	ret = spinand_manufacturer_init(spinand);
+	if (ret) {
+		dev_err(dev,
+			"Failed to initialize the SPI NAND chip (err = %d)\n",
+			ret);
+		goto err_free_bufs;
+	}
+
+	/* After power up, all blocks are locked, so unlock them here. */
+	for (i = 0; i < nand->memorg.ntargets; i++) {
+		ret = spinand_select_target(spinand, i);
+		if (ret)
+			goto err_free_bufs;
+
+		ret = spinand_lock_block(spinand, BL_ALL_UNLOCKED);
+		if (ret)
+			goto err_free_bufs;
+	}
+
+	ret = nanddev_init(nand, &spinand_ops, THIS_MODULE);
+	if (ret)
+		goto err_manuf_cleanup;
+
+	/*
+	 * Right now, we don't support ECC, so let the whole oob
+	 * area is available for user.
+	 */
+	mtd->_read_oob = spinand_mtd_read;
+	mtd->_write_oob = spinand_mtd_write;
+	mtd->_block_isbad = spinand_mtd_block_isbad;
+	mtd->_block_markbad = spinand_mtd_block_markbad;
+	mtd->_block_isreserved = spinand_mtd_block_isreserved;
+	mtd->_erase = spinand_mtd_erase;
+
+	if (spinand->eccinfo.ooblayout)
+		mtd_set_ooblayout(mtd, spinand->eccinfo.ooblayout);
+	else
+		mtd_set_ooblayout(mtd, &spinand_noecc_ooblayout);
+
+	ret = mtd_ooblayout_count_freebytes(mtd);
+	if (ret < 0)
+		goto err_cleanup_nanddev;
+
+	mtd->oobavail = ret;
+
+	return 0;
+
+err_cleanup_nanddev:
+	nanddev_cleanup(nand);
+
+err_manuf_cleanup:
+	spinand_manufacturer_cleanup(spinand);
+
+err_free_bufs:
+	kfree(spinand->databuf);
+	kfree(spinand->scratchbuf);
+	return ret;
+}
+
+static void spinand_cleanup(struct spinand_device *spinand)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+
+	nanddev_cleanup(nand);
+	spinand_manufacturer_cleanup(spinand);
+	kfree(spinand->databuf);
+	kfree(spinand->scratchbuf);
+}
+
+static int spinand_probe(struct spi_mem *mem)
+{
+	struct spinand_device *spinand;
+	struct mtd_info *mtd;
+	int ret;
+
+	spinand = devm_kzalloc(&mem->spi->dev, sizeof(*spinand),
+			       GFP_KERNEL);
+	if (!spinand)
+		return -ENOMEM;
+
+	spinand->spimem = mem;
+	spi_mem_set_drvdata(mem, spinand);
+	spinand_set_of_node(spinand, mem->spi->dev.of_node);
+	mutex_init(&spinand->lock);
+	mtd = spinand_to_mtd(spinand);
+	mtd->dev.parent = &mem->spi->dev;
+
+	ret = spinand_init(spinand);
+	if (ret)
+		return ret;
+
+	ret = mtd_device_register(mtd, NULL, 0);
+	if (ret)
+		goto err_spinand_cleanup;
+
+	return 0;
+
+err_spinand_cleanup:
+	spinand_cleanup(spinand);
+
+	return ret;
+}
+
+static int spinand_remove(struct spi_mem *mem)
+{
+	struct spinand_device *spinand;
+	struct mtd_info *mtd;
+	int ret;
+
+	spinand = spi_mem_get_drvdata(mem);
+	mtd = spinand_to_mtd(spinand);
+
+	ret = mtd_device_unregister(mtd);
+	if (ret)
+		return ret;
+
+	spinand_cleanup(spinand);
+
+	return 0;
+}
+
+static const struct spi_device_id spinand_ids[] = {
+	{ .name = "spi-nand" },
+	{ /* sentinel */ },
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id spinand_of_ids[] = {
+	{ .compatible = "spi-nand" },
+	{ /* sentinel */ },
+};
+#endif
+
+static struct spi_mem_driver spinand_drv = {
+	.spidrv = {
+		.id_table = spinand_ids,
+		.driver = {
+			.name = "spi-nand",
+			.of_match_table = of_match_ptr(spinand_of_ids),
+		},
+	},
+	.probe = spinand_probe,
+	.remove = spinand_remove,
+};
+module_spi_mem_driver(spinand_drv);
+
+MODULE_DESCRIPTION("SPI NAND framework");
+MODULE_AUTHOR("Peter Pan<peterpandong@micron.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
new file mode 100644
index 000000000000..d3efe62dc9de
--- /dev/null
+++ b/include/linux/mtd/spinand.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2017 Micron Technology, Inc.
+ *
+ *  Authors:
+ *	Peter Pan <peterpandong@micron.com>
+ */
+#ifndef __LINUX_MTD_SPINAND_H
+#define __LINUX_MTD_SPINAND_H
+
+#include <linux/mutex.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/**
+ * Standard SPI NAND flash operations
+ */
+
+#define SPINAND_RESET_OP						\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xff, 1),				\
+		   SPI_MEM_OP_NO_ADDR,					\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_WR_EN_DIS_OP(enable)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1),		\
+		   SPI_MEM_OP_NO_ADDR,					\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_READID_OP(ndummy, buf, len)				\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1),				\
+		   SPI_MEM_OP_NO_ADDR,					\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 1))
+
+#define SPINAND_SET_FEATURE_OP(reg, valptr)				\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x1f, 1),				\
+		   SPI_MEM_OP_ADDR(1, reg, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(1, valptr, 1))
+
+#define SPINAND_GET_FEATURE_OP(reg, valptr)				\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x0f, 1),				\
+		   SPI_MEM_OP_ADDR(1, reg, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_IN(1, valptr, 1))
+
+#define SPINAND_BLK_ERASE_OP(addr)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xd8, 1),				\
+		   SPI_MEM_OP_ADDR(3, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_PAGE_READ_OP(addr)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x13, 1),				\
+		   SPI_MEM_OP_ADDR(3, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_PAGE_READ_FROM_CACHE_OP(fast, addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1),		\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 1))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 2))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 4))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 2),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 2),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 2))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 4),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 4),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 4))
+
+#define SPINAND_PROG_EXEC_OP(addr)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1),				\
+		   SPI_MEM_OP_ADDR(3, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_PROG_LOAD(reset, addr, buf, len)			\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x02 : 0x84, 1),		\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(len, buf, 1))
+
+#define SPINAND_PROG_LOAD_X4(reset, addr, buf, len)			\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x32 : 0x34, 1),		\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(len, buf, 4))
+
+/**
+ * Standard SPI NAND flash commands
+ */
+#define SPINAND_CMD_PROG_LOAD_X4		0x32
+#define SPINAND_CMD_PROG_LOAD_RDM_DATA_X4	0x34
+
+/* feature register */
+#define REG_BLOCK_LOCK		0xa0
+#define BL_ALL_UNLOCKED		0x00
+
+/* configuration register */
+#define REG_CFG			0xb0
+#define CFG_OTP_ENABLE		BIT(6)
+#define CFG_ECC_ENABLE		BIT(4)
+#define CFG_QUAD_ENABLE		BIT(0)
+
+/* status register */
+#define REG_STATUS		0xc0
+#define STATUS_BUSY		BIT(0)
+#define STATUS_ERASE_FAILED	BIT(2)
+#define STATUS_PROG_FAILED	BIT(3)
+#define STATUS_ECC_MASK		GENMASK(5, 4)
+#define STATUS_ECC_NO_BITFLIPS	(0 << 4)
+#define STATUS_ECC_HAS_BITFLIPS	(1 << 4)
+#define STATUS_ECC_UNCOR_ERROR	(2 << 4)
+
+struct spinand_op;
+struct spinand_device;
+
+#define SPINAND_MAX_ID_LEN	4
+
+/**
+ * struct spinand_id - SPI NAND id structure
+ * @data: buffer containing the id bytes. Currently 4 bytes large, but can
+ *	  be extended if required
+ * @len: ID length
+ *
+ * struct_spinand_id->data contains all bytes returned after a READ_ID command,
+ * including dummy bytes if the chip does not emit ID bytes right after the
+ * READ_ID command. The responsibility to extract real ID bytes is left to
+ * struct_manufacurer_ops->detect().
+ */
+struct spinand_id {
+	u8 data[SPINAND_MAX_ID_LEN];
+	int len;
+};
+
+/**
+ * struct manufacurer_ops - SPI NAND manufacturer specific operations
+ * @detect: detect a SPI NAND device. Every time a SPI NAND device is probed
+ *	    the core calls the struct_manufacurer_ops->detect() hook of each
+ *	    registered manufacturer until one of them return 1. Note that
+ *	    the first thing to check in this hook is that the manufacturer ID
+ *	    in struct_spinand_device->id matches the manufacturer whose
+ *	    ->detect() hook has been called. Should return 1 if there's a
+ *	    match, 0 if the manufacturer ID does not match and a negative
+ *	    error code otherwise. When true is returned, the core assumes
+ *	    that properties of the NAND chip (spinand->base.memorg and
+ *	    spinand->base.eccreq) have been filled
+ * @init: initialize a SPI NAND device
+ * @cleanup: cleanup a SPI NAND device
+ *
+ * Each SPI NAND manufacturer driver should implement this interface so that
+ * NAND chips coming from this vendor can be detected and initialized properly.
+ */
+struct spinand_manufacturer_ops {
+	int (*detect)(struct spinand_device *spinand);
+	int (*init)(struct spinand_device *spinand);
+	void (*cleanup)(struct spinand_device *spinand);
+};
+
+/**
+ * struct spinand_manufacturer - SPI NAND manufacturer instance
+ * @id: manufacturer ID
+ * @name: manufacturer name
+ * @ops: manufacturer operations
+ */
+struct spinand_manufacturer {
+	u8 id;
+	char *name;
+	const struct spinand_manufacturer_ops *ops;
+};
+
+/**
+ * struct spinand_op_variants - SPI NAND operation variants
+ * @ops: the list of variants for a given operation
+ * @nops: the number of variants
+ *
+ * Some operations like read-from-cache/write-to-cache have several variants
+ * depending on the number of IO lines you use to transfer data or address
+ * cycles. This structure is a way to describe the different variants supported
+ * by a chip and let the core pick the best one based on the SPI mem controller
+ * capabilities.
+ */
+struct spinand_op_variants {
+	const struct spi_mem_op *ops;
+	unsigned int nops;
+};
+
+#define SPINAND_OP_VARIANTS(name, ...)					\
+	const struct spinand_op_variants name = {			\
+		.ops = (struct spi_mem_op[]) { __VA_ARGS__ },		\
+		.nops = sizeof((struct spi_mem_op[]){ __VA_ARGS__ }) /	\
+			sizeof(struct spi_mem_op),			\
+	}
+
+/**
+ * spinand_ecc_info - description of the on-die ECC implemented by a SPI NAND
+ *		      chip
+ * @get_status: get the ECC status. Should return a positive number encoding
+ *		the number of corrected bitflips if correction was possible or
+ *		-EBADMSG if there are uncorrectable errors. I can also return
+ *		other negative error codes if the error is not caused by
+ *		uncorrectable bitflips
+ * @ooblayout: the OOB layout used by the on-die ECC implementation
+ */
+struct spinand_ecc_info {
+	int (*get_status)(struct spinand_device *spinand, u8 status);
+	const struct mtd_ooblayout_ops *ooblayout;
+};
+
+#define SPINAND_HAS_QE_BIT		BIT(0)
+
+/**
+ * struct spinand_info - Structure used to describe SPI NAND chips
+ * @model: model name
+ * @devid: device ID
+ * @flags: OR-ing of the SPINAND_XXX flags
+ * @memorg: memory organization
+ * @eccreq: ECC requirements
+ * @eccinfo: on-die ECC info
+ * @op_variants: operations variants
+ * @op_variants.read_cache: variants of the read-cache operation
+ * @op_variants.write_cache: variants of the write-cache operation
+ * @op_variants.update_cache: variants of the update-cache operation
+ * @select_target: function used to select a target/die. Required only for
+ *		   multi-die chips
+ *
+ * Each SPI NAND manufacturer driver should have a spinand_info table
+ * describing all the chips supported by the driver.
+ */
+struct spinand_info {
+	const char *model;
+	u8 devid;
+	u32 flags;
+	struct nand_memory_organization memorg;
+	struct nand_ecc_req eccreq;
+	struct spinand_ecc_info eccinfo;
+	struct {
+		const struct spinand_op_variants *read_cache;
+		const struct spinand_op_variants *write_cache;
+		const struct spinand_op_variants *update_cache;
+	} op_variants;
+	int (*select_target)(struct spinand_device *spinand,
+			     unsigned int target);
+};
+
+#define SPINAND_INFO_OP_VARIANTS(__read, __write, __update)		\
+	{								\
+		.read_cache = __read,					\
+		.write_cache = __write,					\
+		.update_cache = __update,				\
+	}
+
+#define SPINAND_ECCINFO(__ooblayout, __get_status)			\
+	.eccinfo = {							\
+		.ooblayout = __ooblayout,				\
+		.get_status = __get_status,				\
+	}
+
+#define SPINAND_SELECT_TARGET(__func)					\
+	.select_target = __func,
+
+#define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants,	\
+		     __flags, ...)					\
+	{								\
+		.model = __model,					\
+		.devid = __id,						\
+		.memorg = __memorg,					\
+		.eccreq = __eccreq,					\
+		.op_variants = __op_variants,				\
+		.flags = __flags,					\
+		__VA_ARGS__						\
+	}
+
+/**
+ * struct spinand_device - SPI NAND device instance
+ * @base: NAND device instance
+ * @spimem: pointer to the SPI mem object
+ * @lock: lock used to serialize accesses to the NAND
+ * @id: NAND ID as returned by READ_ID
+ * @flags: NAND flags
+ * @op_templates: various SPI mem op templates
+ * @op_templates.read_cache: read cache op template
+ * @op_templates.write_cache: write cache op template
+ * @op_templates.update_cache: update cache op template
+ * @select_target: select a specific target/die. Usually called before sending
+ *		   a command addressing a page or an eraseblock embedded in
+ *		   this die. Only required if your chip exposes several dies
+ * @cur_target: currently selected target/die
+ * @eccinfo: on-die ECC information
+ * @cfg_cache: config register cache. One entry per die
+ * @databuf: bounce buffer for data
+ * @oobbuf: bounce buffer for OOB data
+ * @scratchbuf: buffer used for everything but page accesses. This is needed
+ *		because the spi-mem interface explicitly requests that buffers
+ *		passed in spi_mem_op be DMA-able, so we can't based the bufs on
+ *		the stack
+ * @manufacturer: SPI NAND manufacturer information
+ * @priv: manufacturer private data
+ */
+struct spinand_device {
+	struct nand_device base;
+	struct spi_mem *spimem;
+	struct mutex lock;
+	struct spinand_id id;
+	u32 flags;
+
+	struct {
+		const struct spi_mem_op *read_cache;
+		const struct spi_mem_op *write_cache;
+		const struct spi_mem_op *update_cache;
+	} op_templates;
+
+	int (*select_target)(struct spinand_device *spinand,
+			     unsigned int target);
+	unsigned int cur_target;
+
+	struct spinand_ecc_info eccinfo;
+
+	u8 *cfg_cache;
+	u8 *databuf;
+	u8 *oobbuf;
+	u8 *scratchbuf;
+	const struct spinand_manufacturer *manufacturer;
+	void *priv;
+};
+
+/**
+ * mtd_to_spinand() - Get the SPI NAND device attached to an MTD instance
+ * @mtd: MTD instance
+ *
+ * Return: the SPI NAND device attached to @mtd.
+ */
+static inline struct spinand_device *mtd_to_spinand(struct mtd_info *mtd)
+{
+	return container_of(mtd_to_nanddev(mtd), struct spinand_device, base);
+}
+
+/**
+ * spinand_to_mtd() - Get the MTD device embedded in a SPI NAND device
+ * @spinand: SPI NAND device
+ *
+ * Return: the MTD device embedded in @spinand.
+ */
+static inline struct mtd_info *spinand_to_mtd(struct spinand_device *spinand)
+{
+	return nanddev_to_mtd(&spinand->base);
+}
+
+/**
+ * nand_to_spinand() - Get the SPI NAND device embedding an NAND object
+ * @nand: NAND object
+ *
+ * Return: the SPI NAND device embedding @nand.
+ */
+static inline struct spinand_device *nand_to_spinand(struct nand_device *nand)
+{
+	return container_of(nand, struct spinand_device, base);
+}
+
+/**
+ * spinand_to_nand() - Get the NAND device embedded in a SPI NAND object
+ * @spinand: SPI NAND device
+ *
+ * Return: the NAND device embedded in @spinand.
+ */
+static inline struct nand_device *
+spinand_to_nand(struct spinand_device *spinand)
+{
+	return &spinand->base;
+}
+
+/**
+ * spinand_set_of_node - Attach a DT node to a SPI NAND device
+ * @spinand: SPI NAND device
+ * @np: DT node
+ *
+ * Attach a DT node to a SPI NAND device.
+ */
+static inline void spinand_set_of_node(struct spinand_device *spinand,
+				       struct device_node *np)
+{
+	nanddev_set_of_node(&spinand->base, np);
+}
+
+int spinand_match_and_init(struct spinand_device *dev,
+			   const struct spinand_info *table,
+			   unsigned int table_size, u8 devid);
+
+int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val);
+int spinand_select_target(struct spinand_device *spinand, unsigned int target);
+
+#endif /* __LINUX_MTD_SPINAND_H */
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index bb4bd15ae1f6..4fa34a227a0f 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -3,7 +3,9 @@
  * Copyright (C) 2018 Exceet Electronics GmbH
  * Copyright (C) 2018 Bootlin
  *
- * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ * Author:
+ *	Peter Pan <peterpandong@micron.com>
+ *	Boris Brezillon <boris.brezillon@bootlin.com>
  */
 
 #ifndef __LINUX_SPI_MEM_H
-- 
cgit v1.2.3


From a508e8875e135d7a1df26d8131b5443cb07005ff Mon Sep 17 00:00:00 2001
From: Peter Pan <peterpandong@micron.com>
Date: Fri, 22 Jun 2018 14:28:25 +0200
Subject: mtd: spinand: Add initial support for Micron MT29F2G01ABAGD

Add a basic driver for Micron SPI NANDs. Only one device is supported
right now, but the driver will be extended to support more devices
afterwards.

Signed-off-by: Peter Pan <peterpandong@micron.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/spi/Makefile |   2 +-
 drivers/mtd/nand/spi/core.c   |  17 ++++++
 drivers/mtd/nand/spi/micron.c | 133 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h   |   3 +
 4 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/micron.c

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
index 2c473b765027..a1df25398f20 100644
--- a/drivers/mtd/nand/spi/Makefile
+++ b/drivers/mtd/nand/spi/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-spinand-objs := core.o
+spinand-objs := core.o micron.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 4efbeb0d85b4..9cc502d7e745 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -763,8 +763,25 @@ static const struct nand_ops spinand_ops = {
 	.isbad = spinand_isbad,
 };
 
+static const struct spinand_manufacturer *spinand_manufacturers[] = {
+	&micron_spinand_manufacturer,
+};
+
 static int spinand_manufacturer_detect(struct spinand_device *spinand)
 {
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(spinand_manufacturers); i++) {
+		ret = spinand_manufacturers[i]->ops->detect(spinand);
+		if (ret > 0) {
+			spinand->manufacturer = spinand_manufacturers[i];
+			return 0;
+		} else if (ret < 0) {
+			return ret;
+		}
+	}
+
 	return -ENOTSUPP;
 }
 
diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c
new file mode 100644
index 000000000000..9c4381d6847b
--- /dev/null
+++ b/drivers/mtd/nand/spi/micron.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016-2017 Micron Technology, Inc.
+ *
+ * Authors:
+ *	Peter Pan <peterpandong@micron.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_MICRON		0x2c
+
+#define MICRON_STATUS_ECC_MASK		GENMASK(7, 4)
+#define MICRON_STATUS_ECC_NO_BITFLIPS	(0 << 4)
+#define MICRON_STATUS_ECC_1TO3_BITFLIPS	(1 << 4)
+#define MICRON_STATUS_ECC_4TO6_BITFLIPS	(3 << 4)
+#define MICRON_STATUS_ECC_7TO8_BITFLIPS	(5 << 4)
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+		SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+		SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int mt29f2g01abagd_ooblayout_ecc(struct mtd_info *mtd, int section,
+					struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	region->offset = 64;
+	region->length = 64;
+
+	return 0;
+}
+
+static int mt29f2g01abagd_ooblayout_free(struct mtd_info *mtd, int section,
+					 struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	/* Reserve 2 bytes for the BBM. */
+	region->offset = 2;
+	region->length = 62;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops mt29f2g01abagd_ooblayout = {
+	.ecc = mt29f2g01abagd_ooblayout_ecc,
+	.free = mt29f2g01abagd_ooblayout_free,
+};
+
+static int mt29f2g01abagd_ecc_get_status(struct spinand_device *spinand,
+					 u8 status)
+{
+	switch (status & MICRON_STATUS_ECC_MASK) {
+	case STATUS_ECC_NO_BITFLIPS:
+		return 0;
+
+	case STATUS_ECC_UNCOR_ERROR:
+		return -EBADMSG;
+
+	case MICRON_STATUS_ECC_1TO3_BITFLIPS:
+		return 3;
+
+	case MICRON_STATUS_ECC_4TO6_BITFLIPS:
+		return 6;
+
+	case MICRON_STATUS_ECC_7TO8_BITFLIPS:
+		return 8;
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static const struct spinand_info micron_spinand_table[] = {
+	SPINAND_INFO("MT29F2G01ABAGD", 0x24,
+		     NAND_MEMORG(1, 2048, 128, 64, 2048, 2, 1, 1),
+		     NAND_ECCREQ(8, 512),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     0,
+		     SPINAND_ECCINFO(&mt29f2g01abagd_ooblayout,
+				     mt29f2g01abagd_ecc_get_status)),
+};
+
+static int micron_spinand_detect(struct spinand_device *spinand)
+{
+	u8 *id = spinand->id.data;
+	int ret;
+
+	/*
+	 * Micron SPI NAND read ID need a dummy byte,
+	 * so the first byte in raw_id is dummy.
+	 */
+	if (id[1] != SPINAND_MFR_MICRON)
+		return 0;
+
+	ret = spinand_match_and_init(spinand, micron_spinand_table,
+				     ARRAY_SIZE(micron_spinand_table), id[2]);
+	if (ret)
+		return ret;
+
+	return 1;
+}
+
+static const struct spinand_manufacturer_ops micron_spinand_manuf_ops = {
+	.detect = micron_spinand_detect,
+};
+
+const struct spinand_manufacturer micron_spinand_manufacturer = {
+	.id = SPINAND_MFR_MICRON,
+	.name = "Micron",
+	.ops = &micron_spinand_manuf_ops,
+};
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index d3efe62dc9de..717b272940f1 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -193,6 +193,9 @@ struct spinand_manufacturer {
 	const struct spinand_manufacturer_ops *ops;
 };
 
+/* SPI NAND manufacturers */
+extern const struct spinand_manufacturer micron_spinand_manufacturer;
+
 /**
  * struct spinand_op_variants - SPI NAND operation variants
  * @ops: the list of variants for a given operation
-- 
cgit v1.2.3


From 1075492bb9e26312bc8ddeec1a93e2de5f9c76b4 Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@exceet.de>
Date: Fri, 22 Jun 2018 14:28:26 +0200
Subject: mtd: spinand: Add initial support for Winbond W25M02GV

Add support for the W25M02GV chip.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/spi/Makefile  |   2 +-
 drivers/mtd/nand/spi/core.c    |   1 +
 drivers/mtd/nand/spi/winbond.c | 141 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h    |   1 +
 4 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/winbond.c

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
index a1df25398f20..100008d202ed 100644
--- a/drivers/mtd/nand/spi/Makefile
+++ b/drivers/mtd/nand/spi/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-spinand-objs := core.o micron.o
+spinand-objs := core.o micron.o winbond.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 9cc502d7e745..4803a6bfe8ec 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -765,6 +765,7 @@ static const struct nand_ops spinand_ops = {
 
 static const struct spinand_manufacturer *spinand_manufacturers[] = {
 	&micron_spinand_manufacturer,
+	&winbond_spinand_manufacturer,
 };
 
 static int spinand_manufacturer_detect(struct spinand_device *spinand)
diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c
new file mode 100644
index 000000000000..67baa1b32c00
--- /dev/null
+++ b/drivers/mtd/nand/spi/winbond.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017 exceet electronics GmbH
+ *
+ * Authors:
+ *	Frieder Schrempf <frieder.schrempf@exceet.de>
+ *	Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_WINBOND		0xEF
+
+#define WINBOND_CFG_BUF_READ		BIT(3)
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+		SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+		SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int w25m02gv_ooblayout_ecc(struct mtd_info *mtd, int section,
+				  struct mtd_oob_region *region)
+{
+	if (section > 3)
+		return -ERANGE;
+
+	region->offset = (16 * section) + 8;
+	region->length = 8;
+
+	return 0;
+}
+
+static int w25m02gv_ooblayout_free(struct mtd_info *mtd, int section,
+				   struct mtd_oob_region *region)
+{
+	if (section > 3)
+		return -ERANGE;
+
+	region->offset = (16 * section) + 2;
+	region->length = 6;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops w25m02gv_ooblayout = {
+	.ecc = w25m02gv_ooblayout_ecc,
+	.free = w25m02gv_ooblayout_free,
+};
+
+static int w25m02gv_select_target(struct spinand_device *spinand,
+				  unsigned int target)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(0xc2, 1),
+					  SPI_MEM_OP_NO_ADDR,
+					  SPI_MEM_OP_NO_DUMMY,
+					  SPI_MEM_OP_DATA_OUT(1,
+							spinand->scratchbuf,
+							1));
+
+	*spinand->scratchbuf = target;
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static const struct spinand_info winbond_spinand_table[] = {
+	SPINAND_INFO("W25M02GV", 0xAB,
+		     NAND_MEMORG(1, 2048, 64, 64, 1024, 1, 1, 2),
+		     NAND_ECCREQ(1, 512),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     0,
+		     SPINAND_ECCINFO(&w25m02gv_ooblayout, NULL),
+		     SPINAND_SELECT_TARGET(w25m02gv_select_target)),
+};
+
+/**
+ * winbond_spinand_detect - initialize device related part in spinand_device
+ * struct if it is a Winbond device.
+ * @spinand: SPI NAND device structure
+ */
+static int winbond_spinand_detect(struct spinand_device *spinand)
+{
+	u8 *id = spinand->id.data;
+	int ret;
+
+	/*
+	 * Winbond SPI NAND read ID need a dummy byte,
+	 * so the first byte in raw_id is dummy.
+	 */
+	if (id[1] != SPINAND_MFR_WINBOND)
+		return 0;
+
+	ret = spinand_match_and_init(spinand, winbond_spinand_table,
+				     ARRAY_SIZE(winbond_spinand_table), id[2]);
+	if (ret)
+		return ret;
+
+	return 1;
+}
+
+static int winbond_spinand_init(struct spinand_device *spinand)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int i;
+
+	/*
+	 * Make sure all dies are in buffer read mode and not continuous read
+	 * mode.
+	 */
+	for (i = 0; i < nand->memorg.ntargets; i++) {
+		spinand_select_target(spinand, i);
+		spinand_upd_cfg(spinand, WINBOND_CFG_BUF_READ,
+				WINBOND_CFG_BUF_READ);
+	}
+
+	return 0;
+}
+
+static const struct spinand_manufacturer_ops winbond_spinand_manuf_ops = {
+	.detect = winbond_spinand_detect,
+	.init = winbond_spinand_init,
+};
+
+const struct spinand_manufacturer winbond_spinand_manufacturer = {
+	.id = SPINAND_MFR_WINBOND,
+	.name = "Winbond",
+	.ops = &winbond_spinand_manuf_ops,
+};
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 717b272940f1..f0f16b9029e7 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -195,6 +195,7 @@ struct spinand_manufacturer {
 
 /* SPI NAND manufacturers */
 extern const struct spinand_manufacturer micron_spinand_manufacturer;
+extern const struct spinand_manufacturer winbond_spinand_manufacturer;
 
 /**
  * struct spinand_op_variants - SPI NAND operation variants
-- 
cgit v1.2.3


From b02308af05e62c7d995f4fc75b0bc2ae3c3026f7 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 22 Jun 2018 14:28:27 +0200
Subject: mtd: spinand: Add initial support for the MX35LF1GE4AB chip

Add minimal support for the MX35LF1GE4AB SPI NAND chip.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/spi/Makefile   |   2 +-
 drivers/mtd/nand/spi/core.c     |   1 +
 drivers/mtd/nand/spi/macronix.c | 136 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h     |   1 +
 4 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/macronix.c

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
index 100008d202ed..b74e074b363a 100644
--- a/drivers/mtd/nand/spi/Makefile
+++ b/drivers/mtd/nand/spi/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-spinand-objs := core.o micron.o winbond.o
+spinand-objs := core.o macronix.o micron.o winbond.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 4803a6bfe8ec..30f83649c481 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -764,6 +764,7 @@ static const struct nand_ops spinand_ops = {
 };
 
 static const struct spinand_manufacturer *spinand_manufacturers[] = {
+	&macronix_spinand_manufacturer,
 	&micron_spinand_manufacturer,
 	&winbond_spinand_manufacturer,
 };
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
new file mode 100644
index 000000000000..8ba3489d332a
--- /dev/null
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Macronix
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_MACRONIX		0xC2
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+		SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+		SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int mx35lfxge4ab_ooblayout_ecc(struct mtd_info *mtd, int section,
+				      struct mtd_oob_region *region)
+{
+	return -ERANGE;
+}
+
+static int mx35lfxge4ab_ooblayout_free(struct mtd_info *mtd, int section,
+				       struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	region->offset = 2;
+	region->length = mtd->oobsize - 2;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops mx35lfxge4ab_ooblayout = {
+	.ecc = mx35lfxge4ab_ooblayout_ecc,
+	.free = mx35lfxge4ab_ooblayout_free,
+};
+
+static int mx35lf1ge4ab_get_eccsr(struct spinand_device *spinand, u8 *eccsr)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(0x7c, 1),
+					  SPI_MEM_OP_NO_ADDR,
+					  SPI_MEM_OP_DUMMY(1, 1),
+					  SPI_MEM_OP_DATA_IN(1, eccsr, 1));
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand,
+				       u8 status)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	u8 eccsr;
+
+	switch (status & STATUS_ECC_MASK) {
+	case STATUS_ECC_NO_BITFLIPS:
+		return 0;
+
+	case STATUS_ECC_UNCOR_ERROR:
+		return -EBADMSG;
+
+	case STATUS_ECC_HAS_BITFLIPS:
+		/*
+		 * Let's try to retrieve the real maximum number of bitflips
+		 * in order to avoid forcing the wear-leveling layer to move
+		 * data around if it's not necessary.
+		 */
+		if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr))
+			return nand->eccreq.strength;
+
+		if (WARN_ON(eccsr > nand->eccreq.strength || !eccsr))
+			return nand->eccreq.strength;
+
+		return eccsr;
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static const struct spinand_info macronix_spinand_table[] = {
+	SPINAND_INFO("MX35LF1GE4AB", 0x12,
+		     NAND_MEMORG(1, 2048, 64, 64, 1024, 1, 1, 1),
+		     NAND_ECCREQ(4, 512),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     SPINAND_HAS_QE_BIT,
+		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
+				     mx35lf1ge4ab_ecc_get_status)),
+};
+
+static int macronix_spinand_detect(struct spinand_device *spinand)
+{
+	u8 *id = spinand->id.data;
+	int ret;
+
+	/*
+	 * Macronix SPI NAND read ID needs a dummy byte, so the first byte in
+	 * raw_id is garbage.
+	 */
+	if (id[1] != SPINAND_MFR_MACRONIX)
+		return 0;
+
+	ret = spinand_match_and_init(spinand, macronix_spinand_table,
+				     ARRAY_SIZE(macronix_spinand_table),
+				     id[2]);
+	if (ret)
+		return ret;
+
+	return 1;
+}
+
+static const struct spinand_manufacturer_ops macronix_spinand_manuf_ops = {
+	.detect = macronix_spinand_detect,
+};
+
+const struct spinand_manufacturer macronix_spinand_manufacturer = {
+	.id = SPINAND_MFR_MACRONIX,
+	.name = "Macronix",
+	.ops = &macronix_spinand_manuf_ops,
+};
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index f0f16b9029e7..088ff96c3eb6 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -194,6 +194,7 @@ struct spinand_manufacturer {
 };
 
 /* SPI NAND manufacturers */
+extern const struct spinand_manufacturer macronix_spinand_manufacturer;
 extern const struct spinand_manufacturer micron_spinand_manufacturer;
 extern const struct spinand_manufacturer winbond_spinand_manufacturer;
 
-- 
cgit v1.2.3


From a4c025372d9d4756e7be98a98f5dde302c6df562 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:27 +0200
Subject: mtd: rawnand: Remove nand_do_read() prototype from rawnand.h

nand_do_read() is a static function implemented in nand_base.c. There's
no good reason to expose its prototype in rawnand.h.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 0c6fb316b409..4d7a46c42900 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1545,8 +1545,6 @@ int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
 int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
 int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		    int allowbbt);
-int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
-		 size_t *retlen, uint8_t *buf);
 
 /**
  * struct platform_nand_chip - chip level device structure
-- 
cgit v1.2.3


From 707329aca6e0cf5781ca7f62c39bdcb5f87e9c23 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:28 +0200
Subject: mtd: rawnand: Remove forward declaration of mtd_info

struct mtd_info is defined in linux/mtd/mtd.h which is included
at the beginning of nand_base.c, there's thus no need for the
forward declaration of mtd_info.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 4d7a46c42900..32145b302585 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -23,7 +23,6 @@
 #include <linux/mtd/bbm.h>
 #include <linux/types.h>
 
-struct mtd_info;
 struct nand_flash_dev;
 struct device_node;
 
-- 
cgit v1.2.3


From 1c3ab61ebcf8cfb5308d2e68e03fd4b4df484e62 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:29 +0200
Subject: mtd: rawnand: Remove forward declaration of device_node

struct device_node is defined in linux/of.h. Let's include this file
instead of having a forward declaration of this struct.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 32145b302585..83ab6779144e 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -21,10 +21,10 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/bbm.h>
+#include <linux/of.h>
 #include <linux/types.h>
 
 struct nand_flash_dev;
-struct device_node;
 
 /* Scan and identify a NAND device */
 int nand_scan_with_ids(struct mtd_info *mtd, int max_chips,
-- 
cgit v1.2.3


From 44b07b921dea5ec997fb929ceaa82582a7415816 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:30 +0200
Subject: mtd: rawnand: Rename nand_default_bbt() into nand_create_bbt()

Rename nand_default_bbt() into nand_create_bbt() and pass it a nand_chip
object to prepare removal of the chip->scan_bbt() hook.

We add a temporary nand_default_bbt() wrapper which will be dropped
after the removal of ->scan_bbt().

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c |  5 +++++
 drivers/mtd/nand/raw/nand_bbt.c  | 10 +++++-----
 include/linux/mtd/rawnand.h      |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index faac82b1e058..0476e13d47b1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4924,6 +4924,11 @@ static void nand_shutdown(struct mtd_info *mtd)
 	nand_get_device(mtd, FL_PM_SUSPENDED);
 }
 
+static int nand_default_bbt(struct mtd_info *mtd)
+{
+	return nand_create_bbt(mtd_to_nand(mtd));
+}
+
 /* Set default functions */
 static void nand_set_defaults(struct nand_chip *chip)
 {
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index d9f4ceff2568..39db352f8757 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -1349,15 +1349,14 @@ static int nand_create_badblock_pattern(struct nand_chip *this)
 }
 
 /**
- * nand_default_bbt - [NAND Interface] Select a default bad block table for the device
- * @mtd: MTD device structure
+ * nand_create_bbt - [NAND Interface] Select a default bad block table for the device
+ * @this: NAND chip object
  *
  * This function selects the default bad block table support for the device and
  * calls the nand_scan_bbt function.
  */
-int nand_default_bbt(struct mtd_info *mtd)
+int nand_create_bbt(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	int ret;
 
 	/* Is a flash based bad block table requested? */
@@ -1383,8 +1382,9 @@ int nand_default_bbt(struct mtd_info *mtd)
 			return ret;
 	}
 
-	return nand_scan_bbt(mtd, this->badblock_pattern);
+	return nand_scan_bbt(nand_to_mtd(this), this->badblock_pattern);
 }
+EXPORT_SYMBOL(nand_create_bbt);
 
 /**
  * nand_isreserved_bbt - [NAND Interface] Check if a block is reserved
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 83ab6779144e..186f9fb1e7eb 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1538,7 +1538,7 @@ extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
 extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
 extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
 
-int nand_default_bbt(struct mtd_info *mtd);
+int nand_create_bbt(struct nand_chip *chip);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
 int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
 int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
-- 
cgit v1.2.3


From e80eba7581512625083ba540f120479b935425de Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:31 +0200
Subject: mtd: rawnand: Kill the chip->scan_bbt() hook

None of the existing drivers are overloading the ->scan_bbt() method,
let's get rid of it and replace calls to ->scan_bbt() by
nand_create_bbt() ones.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/diskonchip.c          | 4 ++--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +-
 drivers/mtd/nand/raw/nand_base.c           | 9 +--------
 drivers/mtd/nand/raw/nandsim.c             | 2 +-
 include/linux/mtd/rawnand.h                | 2 --
 5 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 8d10061abb4b..3c46188dd6d2 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1291,7 +1291,7 @@ static int __init nftl_scan_bbt(struct mtd_info *mtd)
 		this->bbt_md = NULL;
 	}
 
-	ret = this->scan_bbt(mtd);
+	ret = nand_create_bbt(this);
 	if (ret)
 		return ret;
 
@@ -1338,7 +1338,7 @@ static int __init inftl_scan_bbt(struct mtd_info *mtd)
 		this->bbt_md->pattern = "TBB_SYSM";
 	}
 
-	ret = this->scan_bbt(mtd);
+	ret = nand_create_bbt(this);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 58d4e4a93a93..599513321e71 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1944,7 +1944,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 	ret = nand_boot_init(this);
 	if (ret)
 		goto err_nand_cleanup;
-	ret = chip->scan_bbt(mtd);
+	ret = nand_create_bbt(chip);
 	if (ret)
 		goto err_nand_cleanup;
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0476e13d47b1..4fa5e20d9690 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4924,11 +4924,6 @@ static void nand_shutdown(struct mtd_info *mtd)
 	nand_get_device(mtd, FL_PM_SUSPENDED);
 }
 
-static int nand_default_bbt(struct mtd_info *mtd)
-{
-	return nand_create_bbt(mtd_to_nand(mtd));
-}
-
 /* Set default functions */
 static void nand_set_defaults(struct nand_chip *chip)
 {
@@ -4970,8 +4965,6 @@ static void nand_set_defaults(struct nand_chip *chip)
 		chip->write_byte = busw ? nand_write_byte16 : nand_write_byte;
 	if (!chip->read_buf || chip->read_buf == nand_read_buf)
 		chip->read_buf = busw ? nand_read_buf16 : nand_read_buf;
-	if (!chip->scan_bbt)
-		chip->scan_bbt = nand_default_bbt;
 
 	if (!chip->controller) {
 		chip->controller = &chip->hwcontrol;
@@ -6673,7 +6666,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		return 0;
 
 	/* Build bad block table */
-	ret = chip->scan_bbt(mtd);
+	ret = nand_create_bbt(chip);
 	if (ret)
 		goto err_nand_manuf_cleanup;
 
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index f8edacde49ab..8a3b36cfe5ea 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2337,7 +2337,7 @@ static int __init ns_init_module(void)
 	if ((retval = init_nandsim(nsmtd)) != 0)
 		goto err_exit;
 
-	if ((retval = chip->scan_bbt(nsmtd)) != 0)
+	if ((retval = nand_create_bbt(chip)) != 0)
 		goto err_exit;
 
 	if ((retval = parse_badblocks(nand, nsmtd)) != 0)
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 186f9fb1e7eb..ac0007ddadf2 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1199,7 +1199,6 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @buf_align:		minimum buffer alignment required by a platform
  * @hwcontrol:		platform-specific hardware control structure
  * @erase:		[REPLACEABLE] erase function
- * @scan_bbt:		[REPLACEABLE] function to scan bad block table
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
  * @state:		[INTERN] the current state of the NAND device
@@ -1292,7 +1291,6 @@ struct nand_chip {
 		       const struct nand_operation *op,
 		       bool check_only);
 	int (*erase)(struct mtd_info *mtd, int page);
-	int (*scan_bbt)(struct mtd_info *mtd);
 	int (*set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			    int feature_addr, uint8_t *subfeature_para);
 	int (*get_features)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From f0f01838f76420988b50b23f315704c9a5cd2fa9 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:32 +0200
Subject: mtd: rawnand: orion_nand: Kill orion_nand_data.dev_ready()

None of the boards seem to overload the ->dev_ready() hook, just drop
this field from orion_nand_data.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/orion_nand.c            | 3 ---
 include/linux/platform_data/mtd-orion_nand.h | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 7825fd3ce66b..1a4828442675 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -153,9 +153,6 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	if (board->width == 16)
 		nc->options |= NAND_BUSWIDTH_16;
 
-	if (board->dev_ready)
-		nc->dev_ready = board->dev_ready;
-
 	platform_set_drvdata(pdev, info);
 
 	/* Not all platforms can gate the clock, so it is not
diff --git a/include/linux/platform_data/mtd-orion_nand.h b/include/linux/platform_data/mtd-orion_nand.h
index a7ce77c7c1a8..34828eb85982 100644
--- a/include/linux/platform_data/mtd-orion_nand.h
+++ b/include/linux/platform_data/mtd-orion_nand.h
@@ -12,7 +12,6 @@
  */
 struct orion_nand_data {
 	struct mtd_partition *parts;
-	int (*dev_ready)(struct mtd_info *mtd);
 	u32 nr_parts;
 	u8 ale;		/* address line number connected to ALE */
 	u8 cle;		/* address line number connected to CLE */
-- 
cgit v1.2.3


From dc2d8856a758627d4f126d17bc113eedd72b2d60 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:33 +0200
Subject: mtd: rawnand: plat_nand: Kill pdata->ctrl.{hwcontrol, read_byte}()

None of the board files are overloading those hooks, so let's drop them
from struct platform_nand_ctrl.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/arm/mach-pxa/balloon3.c     | 1 -
 arch/arm/mach-pxa/em-x270.c      | 1 -
 drivers/mtd/nand/raw/plat_nand.c | 2 --
 include/linux/mtd/rawnand.h      | 4 ----
 4 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index f4f8f23bda8c..af46d2182533 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -688,7 +688,6 @@ struct platform_nand_data balloon3_nand_pdata = {
 		.chip_delay	= 50,
 	},
 	.ctrl = {
-		.hwcontrol	= 0,
 		.dev_ready	= balloon3_nand_dev_ready,
 		.select_chip	= balloon3_nand_select_chip,
 		.cmd_ctrl	= balloon3_nand_cmd_ctl,
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 49022ad338e9..29be04c6cc48 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -346,7 +346,6 @@ struct platform_nand_data em_x270_nand_platdata = {
 		.chip_delay = 20,
 	},
 	.ctrl = {
-		.hwcontrol = 0,
 		.dev_ready = em_x270_nand_device_ready,
 		.select_chip = 0,
 		.cmd_ctrl = em_x270_nand_cmd_ctl,
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 925a1323604d..222626df4b96 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -67,12 +67,10 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.write_buf = pdata->ctrl.write_buf;
 	data->chip.read_buf = pdata->ctrl.read_buf;
-	data->chip.read_byte = pdata->ctrl.read_byte;
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 	data->chip.bbt_options |= pdata->chip.bbt_options;
 
-	data->chip.ecc.hwctl = pdata->ctrl.hwcontrol;
 	data->chip.ecc.mode = NAND_ECC_SOFT;
 	data->chip.ecc.algo = NAND_ECC_HAMMING;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index ac0007ddadf2..11c2426fc363 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1572,14 +1572,12 @@ struct platform_device;
  * struct platform_nand_ctrl - controller level device structure
  * @probe:		platform specific function to probe/setup hardware
  * @remove:		platform specific function to remove/teardown hardware
- * @hwcontrol:		platform specific hardware control structure
  * @dev_ready:		platform specific function to read ready/busy pin
  * @select_chip:	platform specific chip select function
  * @cmd_ctrl:		platform specific function for controlling
  *			ALE/CLE/nCE. Also used to write command and address
  * @write_buf:		platform specific function for write buffer
  * @read_buf:		platform specific function for read buffer
- * @read_byte:		platform specific function to read one byte from chip
  * @priv:		private data to transport driver specific settings
  *
  * All fields are optional and depend on the hardware driver requirements
@@ -1587,13 +1585,11 @@ struct platform_device;
 struct platform_nand_ctrl {
 	int (*probe)(struct platform_device *pdev);
 	void (*remove)(struct platform_device *pdev);
-	void (*hwcontrol)(struct mtd_info *mtd, int cmd);
 	int (*dev_ready)(struct mtd_info *mtd);
 	void (*select_chip)(struct mtd_info *mtd, int chip);
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
 	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
-	unsigned char (*read_byte)(struct mtd_info *mtd);
 	void *priv;
 };
 
-- 
cgit v1.2.3


From dc2865ac3527d76fe04ee1f1a3ffc101a60faba0 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Mon, 9 Jul 2018 22:09:40 +0200
Subject: MIPS: txx9: Move the ndfc.h header to
 include/linux/platform_data/txx9

This way we will be able to compile the ndfmc driver when
COMPILE_TEST=y.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/mips/include/asm/txx9/ndfmc.h       | 30 ------------------------------
 arch/mips/txx9/generic/setup.c           |  2 +-
 arch/mips/txx9/generic/setup_tx4938.c    |  2 +-
 arch/mips/txx9/generic/setup_tx4939.c    |  2 +-
 drivers/mtd/nand/raw/txx9ndfmc.c         |  2 +-
 include/linux/platform_data/txx9/ndfmc.h | 30 ++++++++++++++++++++++++++++++
 6 files changed, 34 insertions(+), 34 deletions(-)
 delete mode 100644 arch/mips/include/asm/txx9/ndfmc.h
 create mode 100644 include/linux/platform_data/txx9/ndfmc.h

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/txx9/ndfmc.h b/arch/mips/include/asm/txx9/ndfmc.h
deleted file mode 100644
index fa67f3df78fc..000000000000
--- a/arch/mips/include/asm/txx9/ndfmc.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * (C) Copyright TOSHIBA CORPORATION 2007
- */
-#ifndef __ASM_TXX9_NDFMC_H
-#define __ASM_TXX9_NDFMC_H
-
-#define NDFMC_PLAT_FLAG_USE_BSPRT	0x01
-#define NDFMC_PLAT_FLAG_NO_RSTR		0x02
-#define NDFMC_PLAT_FLAG_HOLDADD		0x04
-#define NDFMC_PLAT_FLAG_DUMMYWRITE	0x08
-
-struct txx9ndfmc_platform_data {
-	unsigned int shift;
-	unsigned int gbus_clock;
-	unsigned int hold;		/* hold time in nanosecond */
-	unsigned int spw;		/* strobe pulse width in nanosecond */
-	unsigned int flags;
-	unsigned char ch_mask;		/* available channel bitmask */
-	unsigned char wp_mask;		/* write-protect bitmask */
-	unsigned char wide_mask;	/* 16bit-nand bitmask */
-};
-
-void txx9_ndfmc_init(unsigned long baseaddr,
-		     const struct txx9ndfmc_platform_data *plat_data);
-
-#endif /* __ASM_TXX9_NDFMC_H */
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 1791a44ee570..aa47932abd28 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/gpio/driver.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 #include <linux/serial_core.h>
 #include <linux/mtd/physmap.h>
 #include <linux/leds.h>
@@ -35,7 +36,6 @@
 #include <asm/txx9/generic.h>
 #include <asm/txx9/pci.h>
 #include <asm/txx9tmr.h>
-#include <asm/txx9/ndfmc.h>
 #include <asm/txx9/dmac.h>
 #ifdef CONFIG_CPU_TX49XX
 #include <asm/txx9/tx4938.h>
diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c
index 85d1795652da..17395d5d15ca 100644
--- a/arch/mips/txx9/generic/setup_tx4938.c
+++ b/arch/mips/txx9/generic/setup_tx4938.c
@@ -17,13 +17,13 @@
 #include <linux/ptrace.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 #include <asm/reboot.h>
 #include <asm/traps.h>
 #include <asm/txx9irq.h>
 #include <asm/txx9tmr.h>
 #include <asm/txx9pio.h>
 #include <asm/txx9/generic.h>
-#include <asm/txx9/ndfmc.h>
 #include <asm/txx9/dmac.h>
 #include <asm/txx9/tx4938.h>
 
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 274928987a21..360c388f4c82 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -21,13 +21,13 @@
 #include <linux/ptrace.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 #include <asm/bootinfo.h>
 #include <asm/reboot.h>
 #include <asm/traps.h>
 #include <asm/txx9irq.h>
 #include <asm/txx9tmr.h>
 #include <asm/txx9/generic.h>
-#include <asm/txx9/ndfmc.h>
 #include <asm/txx9/dmac.h>
 #include <asm/txx9/tx4939.h>
 
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index b567d212fe7d..04d57474ef97 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -20,7 +20,7 @@
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
-#include <asm/txx9/ndfmc.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 
 /* TXX9 NDFMC Registers */
 #define TXX9_NDFDTR	0x00
diff --git a/include/linux/platform_data/txx9/ndfmc.h b/include/linux/platform_data/txx9/ndfmc.h
new file mode 100644
index 000000000000..fc172627d54e
--- /dev/null
+++ b/include/linux/platform_data/txx9/ndfmc.h
@@ -0,0 +1,30 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * (C) Copyright TOSHIBA CORPORATION 2007
+ */
+#ifndef __TXX9_NDFMC_H
+#define __TXX9_NDFMC_H
+
+#define NDFMC_PLAT_FLAG_USE_BSPRT	0x01
+#define NDFMC_PLAT_FLAG_NO_RSTR		0x02
+#define NDFMC_PLAT_FLAG_HOLDADD		0x04
+#define NDFMC_PLAT_FLAG_DUMMYWRITE	0x08
+
+struct txx9ndfmc_platform_data {
+	unsigned int shift;
+	unsigned int gbus_clock;
+	unsigned int hold;		/* hold time in nanosecond */
+	unsigned int spw;		/* strobe pulse width in nanosecond */
+	unsigned int flags;
+	unsigned char ch_mask;		/* available channel bitmask */
+	unsigned char wp_mask;		/* write-protect bitmask */
+	unsigned char wide_mask;	/* 16bit-nand bitmask */
+};
+
+void txx9_ndfmc_init(unsigned long baseaddr,
+		     const struct txx9ndfmc_platform_data *plat_data);
+
+#endif /* __TXX9_NDFMC_H */
-- 
cgit v1.2.3


From e65e3a50702f4e7a01c0b36ff08d6ed8dde7ee7b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Mon, 9 Jul 2018 22:09:42 +0200
Subject: MIPS: jz4740: Move jz4740_nand.h header to
 include/linux/platform_data/jz4740

This way we will be able to compile the jz4740_nand driver when
COMPILE_TEST=y.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/mips/include/asm/mach-jz4740/jz4740_nand.h  | 34 ------------------------
 arch/mips/jz4740/board-qi_lb60.c                 |  3 ++-
 drivers/mtd/nand/raw/jz4740_nand.c               |  2 +-
 include/linux/platform_data/jz4740/jz4740_nand.h | 34 ++++++++++++++++++++++++
 4 files changed, 37 insertions(+), 36 deletions(-)
 delete mode 100644 arch/mips/include/asm/mach-jz4740/jz4740_nand.h
 create mode 100644 include/linux/platform_data/jz4740/jz4740_nand.h

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h b/arch/mips/include/asm/mach-jz4740/jz4740_nand.h
deleted file mode 100644
index f381d465e768..000000000000
--- a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 SoC NAND controller driver
- *
- *  This program is free software; you can redistribute	 it and/or modify it
- *  under  the terms of	 the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the	License, or (at your
- *  option) any later version.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef __ASM_MACH_JZ4740_JZ4740_NAND_H__
-#define __ASM_MACH_JZ4740_JZ4740_NAND_H__
-
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
-
-#define JZ_NAND_NUM_BANKS 4
-
-struct jz_nand_platform_data {
-	int			num_partitions;
-	struct mtd_partition	*partitions;
-
-	unsigned char banks[JZ_NAND_NUM_BANKS];
-
-	void (*ident_callback)(struct platform_device *, struct mtd_info *,
-				struct mtd_partition **, int *num_partitions);
-};
-
-#endif
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 60f0767507c6..af0c8ace0141 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -29,10 +29,11 @@
 #include <linux/power/gpio-charger.h>
 #include <linux/pwm.h>
 
+#include <linux/platform_data/jz4740/jz4740_nand.h>
+
 #include <asm/mach-jz4740/gpio.h>
 #include <asm/mach-jz4740/jz4740_fb.h>
 #include <asm/mach-jz4740/jz4740_mmc.h>
-#include <asm/mach-jz4740/jz4740_nand.h>
 
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 613b00a9604b..a0254461812d 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -25,7 +25,7 @@
 
 #include <linux/gpio.h>
 
-#include <asm/mach-jz4740/jz4740_nand.h>
+#include <linux/platform_data/jz4740/jz4740_nand.h>
 
 #define JZ_REG_NAND_CTRL	0x50
 #define JZ_REG_NAND_ECC_CTRL	0x100
diff --git a/include/linux/platform_data/jz4740/jz4740_nand.h b/include/linux/platform_data/jz4740/jz4740_nand.h
new file mode 100644
index 000000000000..bc571f6d5ced
--- /dev/null
+++ b/include/linux/platform_data/jz4740/jz4740_nand.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
+ *  JZ4740 SoC NAND controller driver
+ *
+ *  This program is free software; you can redistribute	 it and/or modify it
+ *  under  the terms of	 the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __JZ4740_NAND_H__
+#define __JZ4740_NAND_H__
+
+#include <linux/mtd/rawnand.h>
+#include <linux/mtd/partitions.h>
+
+#define JZ_NAND_NUM_BANKS 4
+
+struct jz_nand_platform_data {
+	int			num_partitions;
+	struct mtd_partition	*partitions;
+
+	unsigned char banks[JZ_NAND_NUM_BANKS];
+
+	void (*ident_callback)(struct platform_device *, struct mtd_info *,
+				struct mtd_partition **, int *num_partitions);
+};
+
+#endif
-- 
cgit v1.2.3


From be2ab5b4d5c0bf041a34ec2e1397d50afbfb095e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 11 Jul 2018 13:45:12 +0200
Subject: netfilter: nf_tables: take module reference when starting a batch

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h | 1 +
 net/netfilter/nf_tables_api.c       | 1 +
 net/netfilter/nfnetlink.c           | 9 +++++++++
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 3ecc3050be0e..4a520d3304a2 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -29,6 +29,7 @@ struct nfnetlink_subsystem {
 	__u8 subsys_id;			/* nfnetlink subsystem ID */
 	__u8 cb_count;			/* number of callbacks */
 	const struct nfnl_callback *cb;	/* callback for individual types */
+	struct module *owner;
 	int (*commit)(struct net *net, struct sk_buff *skb);
 	int (*abort)(struct net *net, struct sk_buff *skb);
 	void (*cleanup)(struct net *net);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 594b395442d6..c16c481fc52a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6603,6 +6603,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
 	.abort		= nf_tables_abort,
 	.cleanup	= nf_tables_cleanup,
 	.valid_genid	= nf_tables_valid_genid,
+	.owner		= THIS_MODULE,
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 94f9bcaa0799..dd1d7bc23b03 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -337,7 +337,14 @@ replay:
 		return kfree_skb(skb);
 	}
 
+	if (!try_module_get(ss->owner)) {
+		nfnl_unlock(subsys_id);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+		return kfree_skb(skb);
+	}
+
 	if (!ss->valid_genid(net, genid)) {
+		module_put(ss->owner);
 		nfnl_unlock(subsys_id);
 		netlink_ack(oskb, nlh, -ERESTART, NULL);
 		return kfree_skb(skb);
@@ -472,6 +479,7 @@ done:
 		nfnl_err_reset(&err_list);
 		nfnl_unlock(subsys_id);
 		kfree_skb(skb);
+		module_put(ss->owner);
 		goto replay;
 	} else if (status == NFNL_BATCH_DONE) {
 		err = ss->commit(net, oskb);
@@ -491,6 +499,7 @@ done:
 	nfnl_err_deliver(&err_list, oskb);
 	nfnl_unlock(subsys_id);
 	kfree_skb(skb);
+	module_put(ss->owner);
 }
 
 static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
-- 
cgit v1.2.3


From e2861fa71641c6414831d628a1f4f793b6562580 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@google.com>
Date: Fri, 8 Jun 2018 14:57:42 -0700
Subject: evm: Don't deadlock if a crypto algorithm is unavailable

When EVM attempts to appraise a file signed with a crypto algorithm the
kernel doesn't have support for, it will cause the kernel to trigger a
module load. If the EVM policy includes appraisal of kernel modules this
will in turn call back into EVM - since EVM is holding a lock until the
crypto initialisation is complete, this triggers a deadlock. Add a
CRYPTO_NOLOAD flag and skip module loading if it's set, and add that flag
in the EVM case in order to fail gracefully with an error message
instead of deadlocking.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 crypto/api.c                        | 2 +-
 include/linux/crypto.h              | 5 +++++
 security/integrity/evm/evm_crypto.c | 3 ++-
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/api.c b/crypto/api.c
index 0ee632bba064..7aca9f86c5f3 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -229,7 +229,7 @@ static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 	mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
 
 	alg = crypto_alg_lookup(name, type, mask);
-	if (!alg) {
+	if (!alg && !(mask & CRYPTO_NOLOAD)) {
 		request_module("crypto-%s", name);
 
 		if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 6eb06101089f..e8839d3a7559 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -112,6 +112,11 @@
  */
 #define CRYPTO_ALG_OPTIONAL_KEY		0x00004000
 
+/*
+ * Don't trigger module loading
+ */
+#define CRYPTO_NOLOAD			0x00008000
+
 /*
  * Transform masks and values (for crt_flags).
  */
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index b60524310855..c20e3142b541 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -97,7 +97,8 @@ static struct shash_desc *init_desc(char type)
 		mutex_lock(&mutex);
 		if (*tfm)
 			goto out;
-		*tfm = crypto_alloc_shash(algo, 0, CRYPTO_ALG_ASYNC);
+		*tfm = crypto_alloc_shash(algo, 0,
+					  CRYPTO_ALG_ASYNC | CRYPTO_NOLOAD);
 		if (IS_ERR(*tfm)) {
 			rc = PTR_ERR(*tfm);
 			pr_err("Can not allocate %s (reason: %ld)\n", algo, rc);
-- 
cgit v1.2.3


From 6eb864c1d9dd1ef32b88e03c3f49d8be0dab7dcf Mon Sep 17 00:00:00 2001
From: Mikhail Kurinnoi <viewizard@viewizard.com>
Date: Wed, 27 Jun 2018 16:33:42 +0300
Subject: integrity: prevent deadlock during digsig verification.

This patch aimed to prevent deadlock during digsig verification.The point
of issue - user space utility modprobe and/or it's dependencies (ld-*.so,
libz.so.*, libc-*.so and /lib/modules/ files) that could be used for
kernel modules load during digsig verification and could be signed by
digsig in the same time.

First at all, look at crypto_alloc_tfm() work algorithm:
crypto_alloc_tfm() will first attempt to locate an already loaded
algorithm. If that fails and the kernel supports dynamically loadable
modules, it will then attempt to load a module of the same name or alias.
If that fails it will send a query to any loaded crypto manager to
construct an algorithm on the fly.

We have situation, when public_key_verify_signature() in case of RSA
algorithm use alg_name to store internal information in order to construct
an algorithm on the fly, but crypto_larval_lookup() will try to use
alg_name in order to load kernel module with same name.

1) we can't do anything with crypto module work, since it designed to work
exactly in this way;
2) we can't globally filter module requests for modprobe, since it
designed to work with any requests.

In this patch, I propose add an exception for "crypto-pkcs1pad(rsa,*)"
module requests only in case of enabled integrity asymmetric keys support.
Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules for
sure, we are safe to fail such module request from crypto_larval_lookup().
In this way we prevent modprobe execution during digsig verification and
avoid possible deadlock if modprobe and/or it's dependencies also signed
with digsig.

Requested "crypto-pkcs1pad(rsa,*)" kernel module name formed by:
1) "pkcs1pad(rsa,%s)" in public_key_verify_signature();
2) "crypto-%s" / "crypto-%s-all" in crypto_larval_lookup().
"crypto-pkcs1pad(rsa," part of request is a constant and unique and could
be used as filter.

Signed-off-by: Mikhail Kurinnoi <viewizard@viewizard.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>

 include/linux/integrity.h              | 13 +++++++++++++
 security/integrity/digsig_asymmetric.c | 23 +++++++++++++++++++++++
 security/security.c                    |  7 ++++++-
 3 files changed, 42 insertions(+), 1 deletion(-)
---
 include/linux/integrity.h              | 13 +++++++++++++
 security/integrity/digsig_asymmetric.c | 23 +++++++++++++++++++++++
 security/security.c                    |  7 ++++++-
 3 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/integrity.h b/include/linux/integrity.h
index 858d3f4a2241..54c853ec2fd1 100644
--- a/include/linux/integrity.h
+++ b/include/linux/integrity.h
@@ -44,4 +44,17 @@ static inline void integrity_load_keys(void)
 }
 #endif /* CONFIG_INTEGRITY */
 
+#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
+
+extern int integrity_kernel_module_request(char *kmod_name);
+
+#else
+
+static inline int integrity_kernel_module_request(char *kmod_name)
+{
+	return 0;
+}
+
+#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */
+
 #endif /* _LINUX_INTEGRITY_H */
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index ab6a029062a1..6dc075144508 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -115,3 +115,26 @@ int asymmetric_verify(struct key *keyring, const char *sig,
 	pr_debug("%s() = %d\n", __func__, ret);
 	return ret;
 }
+
+/**
+ * integrity_kernel_module_request - prevent crypto-pkcs1pad(rsa,*) requests
+ * @kmod_name: kernel module name
+ *
+ * We have situation, when public_key_verify_signature() in case of RSA
+ * algorithm use alg_name to store internal information in order to
+ * construct an algorithm on the fly, but crypto_larval_lookup() will try
+ * to use alg_name in order to load kernel module with same name.
+ * Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules,
+ * we are safe to fail such module request from crypto_larval_lookup().
+ *
+ * In this way we prevent modprobe execution during digsig verification
+ * and avoid possible deadlock if modprobe and/or it's dependencies
+ * also signed with digsig.
+ */
+int integrity_kernel_module_request(char *kmod_name)
+{
+	if (strncmp(kmod_name, "crypto-pkcs1pad(rsa,", 20) == 0)
+		return -EINVAL;
+
+	return 0;
+}
diff --git a/security/security.c b/security/security.c
index b49ee810371b..dbca03d3629b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1032,7 +1032,12 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 
 int security_kernel_module_request(char *kmod_name)
 {
-	return call_int_hook(kernel_module_request, 0, kmod_name);
+	int ret;
+
+	ret = call_int_hook(kernel_module_request, 0, kmod_name);
+	if (ret)
+		return ret;
+	return integrity_kernel_module_request(kmod_name);
 }
 
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id)
-- 
cgit v1.2.3


From d29ab6e1fa21ebc2a8a771015dd9e0e5d4e28dc1 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 13 Jul 2018 12:41:10 -0700
Subject: bpf: bpf_prog_array_alloc() should return a generic non-rcu pointer

Currently the return type of the bpf_prog_array_alloc() is
struct bpf_prog_array __rcu *, which is not quite correct.
Obviously, the returned pointer is a generic pointer, which
is valid for an indefinite amount of time and it's not shared
with anyone else, so there is no sense in marking it as __rcu.

This change eliminate the following sparse warnings:
kernel/bpf/core.c:1544:31: warning: incorrect type in return expression (different address spaces)
kernel/bpf/core.c:1544:31:    expected struct bpf_prog_array [noderef] <asn:4>*
kernel/bpf/core.c:1544:31:    got void *
kernel/bpf/core.c:1548:17: warning: incorrect type in return expression (different address spaces)
kernel/bpf/core.c:1548:17:    expected struct bpf_prog_array [noderef] <asn:4>*
kernel/bpf/core.c:1548:17:    got struct bpf_prog_array *<noident>
kernel/bpf/core.c:1681:15: warning: incorrect type in assignment (different address spaces)
kernel/bpf/core.c:1681:15:    expected struct bpf_prog_array *array
kernel/bpf/core.c:1681:15:    got struct bpf_prog_array [noderef] <asn:4>*

Fixes: 324bda9e6c5a ("bpf: multi program support for cgroup+bpf")
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h | 2 +-
 kernel/bpf/core.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8827e797ff97..943fb08d8287 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -352,7 +352,7 @@ struct bpf_prog_array {
 	struct bpf_prog *progs[0];
 };
 
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
+struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
 void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
 int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
 int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1e5625d46414..253aa8e79c7b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1538,7 +1538,7 @@ static struct {
 	.null_prog = NULL,
 };
 
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
+struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 {
 	if (prog_cnt)
 		return kzalloc(sizeof(struct bpf_prog_array) +
-- 
cgit v1.2.3


From 09728266b6f99ab57cd4f84f3eead65b7b65dbf7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:23 -0700
Subject: bpf: offload: rename bpf_offload_dev_match() to
 bpf_offload_prog_map_match()

A set of new API functions exported for the drivers will soon use
'bpf_offload_dev_' as a prefix.  Rename the bpf_offload_dev_match()
which is internal to the core (used by the verifier) to avoid any
confusion.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h   | 2 +-
 kernel/bpf/offload.c  | 2 +-
 kernel/bpf/verifier.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 943fb08d8287..9b010d9129f3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -648,7 +648,7 @@ int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
 int bpf_map_offload_get_next_key(struct bpf_map *map,
 				 void *key, void *next_key);
 
-bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index ac747d5cf7c6..6184e48703f4 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -468,7 +468,7 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
 	return 0;
 }
 
-bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
 {
 	struct bpf_offloaded_map *offmap;
 	struct bpf_prog_offload *offload;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9e2bf834f13a..15d69b278277 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5054,7 +5054,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 	}
 
 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
-	    !bpf_offload_dev_match(prog, map)) {
+	    !bpf_offload_prog_map_match(prog, map)) {
 		verbose(env, "offload device mismatch between prog and map\n");
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 9fd7c5559165f4c679b40c5e6ad442955832dfad Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:24 -0700
Subject: bpf: offload: aggregate offloads per-device

Currently we have two lists of offloaded objects - programs and maps.
Netdevice unregister notifier scans those lists to orphan objects
associated with device being unregistered.  This puts unnecessary
(even if negligible) burden on all netdev unregister calls in BPF-
-enabled kernel.  The lists of objects may potentially get long
making the linear scan even more problematic.  There haven't been
complaints about this mechanisms so far, but it is suboptimal.

Instead of relying on notifiers, make the few BPF-capable drivers
register explicitly for BPF offloads.  The programs and maps will
now be collected per-device not on a global list, and only scanned
for removal when driver unregisters from BPF offloads.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c |  13 +++
 drivers/net/netdevsim/bpf.c                   |   7 ++
 include/linux/bpf.h                           |   3 +
 kernel/bpf/offload.c                          | 142 +++++++++++++++++---------
 4 files changed, 119 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index b95b94d008cf..dee039ada75c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -404,6 +404,16 @@ err_release_free:
 	return -EINVAL;
 }
 
+static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
+{
+	return bpf_offload_dev_netdev_register(netdev);
+}
+
+static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
+{
+	bpf_offload_dev_netdev_unregister(netdev);
+}
+
 static int nfp_bpf_init(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf;
@@ -466,6 +476,9 @@ const struct nfp_app_type app_bpf = {
 
 	.extra_cap	= nfp_bpf_extra_cap,
 
+	.ndo_init	= nfp_bpf_ndo_init,
+	.ndo_uninit	= nfp_bpf_ndo_uninit,
+
 	.vnic_alloc	= nfp_bpf_vnic_alloc,
 	.vnic_free	= nfp_bpf_vnic_free,
 
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 357f9e62f306..c4a2829e0e1f 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -582,6 +582,8 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 
 int nsim_bpf_init(struct netdevsim *ns)
 {
+	int err;
+
 	if (ns->sdev->refcnt == 1) {
 		INIT_LIST_HEAD(&ns->sdev->bpf_bound_progs);
 		INIT_LIST_HEAD(&ns->sdev->bpf_bound_maps);
@@ -592,6 +594,10 @@ int nsim_bpf_init(struct netdevsim *ns)
 			return -ENOMEM;
 	}
 
+	err = bpf_offload_dev_netdev_register(ns->netdev);
+	if (err)
+		return err;
+
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
 
@@ -625,6 +631,7 @@ void nsim_bpf_uninit(struct netdevsim *ns)
 	WARN_ON(ns->xdp.prog);
 	WARN_ON(ns->xdp_hw.prog);
 	WARN_ON(ns->bpf_offloaded);
+	bpf_offload_dev_netdev_unregister(ns->netdev);
 
 	if (ns->sdev->refcnt == 1) {
 		WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b010d9129f3..aa2e834a122b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -650,6 +650,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
 
 bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
+int bpf_offload_dev_netdev_register(struct net_device *netdev);
+void bpf_offload_dev_netdev_unregister(struct net_device *netdev);
+
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
 
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 6184e48703f4..cd64a26807aa 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -18,19 +18,37 @@
 #include <linux/bug.h>
 #include <linux/kdev_t.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/proc_ns.h>
+#include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 #include <linux/rwsem.h>
 
-/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
+/* Protects offdevs, members of bpf_offload_netdev and offload members
  * of all progs.
  * RTNL lock cannot be taken when holding this lock.
  */
 static DECLARE_RWSEM(bpf_devs_lock);
-static LIST_HEAD(bpf_prog_offload_devs);
-static LIST_HEAD(bpf_map_offload_devs);
+
+struct bpf_offload_netdev {
+	struct rhash_head l;
+	struct net_device *netdev;
+	struct list_head progs;
+	struct list_head maps;
+};
+
+static const struct rhashtable_params offdevs_params = {
+	.nelem_hint		= 4,
+	.key_len		= sizeof(struct net_device *),
+	.key_offset		= offsetof(struct bpf_offload_netdev, netdev),
+	.head_offset		= offsetof(struct bpf_offload_netdev, l),
+	.automatic_shrinking	= true,
+};
+
+static struct rhashtable offdevs;
+static bool offdevs_inited;
 
 static int bpf_dev_offload_check(struct net_device *netdev)
 {
@@ -41,8 +59,19 @@ static int bpf_dev_offload_check(struct net_device *netdev)
 	return 0;
 }
 
+static struct bpf_offload_netdev *
+bpf_offload_find_netdev(struct net_device *netdev)
+{
+	lockdep_assert_held(&bpf_devs_lock);
+
+	if (!offdevs_inited)
+		return NULL;
+	return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+}
+
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 {
+	struct bpf_offload_netdev *ondev;
 	struct bpf_prog_offload *offload;
 	int err;
 
@@ -66,12 +95,13 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 		goto err_maybe_put;
 
 	down_write(&bpf_devs_lock);
-	if (offload->netdev->reg_state != NETREG_REGISTERED) {
+	ondev = bpf_offload_find_netdev(offload->netdev);
+	if (!ondev) {
 		err = -EINVAL;
 		goto err_unlock;
 	}
 	prog->aux->offload = offload;
-	list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
+	list_add_tail(&offload->offloads, &ondev->progs);
 	dev_put(offload->netdev);
 	up_write(&bpf_devs_lock);
 
@@ -294,6 +324,7 @@ static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 {
 	struct net *net = current->nsproxy->net_ns;
+	struct bpf_offload_netdev *ondev;
 	struct bpf_offloaded_map *offmap;
 	int err;
 
@@ -316,11 +347,17 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 	if (err)
 		goto err_unlock;
 
+	ondev = bpf_offload_find_netdev(offmap->netdev);
+	if (!ondev) {
+		err = -EINVAL;
+		goto err_unlock;
+	}
+
 	err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
 	if (err)
 		goto err_unlock;
 
-	list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
+	list_add_tail(&offmap->offloads, &ondev->maps);
 	up_write(&bpf_devs_lock);
 	rtnl_unlock();
 
@@ -489,56 +526,69 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
 	return ret;
 }
 
-static void bpf_offload_orphan_all_progs(struct net_device *netdev)
+int bpf_offload_dev_netdev_register(struct net_device *netdev)
 {
-	struct bpf_prog_offload *offload, *tmp;
+	struct bpf_offload_netdev *ondev;
+	int err;
 
-	list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
-		if (offload->netdev == netdev)
-			__bpf_prog_offload_destroy(offload->prog);
-}
+	down_write(&bpf_devs_lock);
+	if (!offdevs_inited) {
+		err = rhashtable_init(&offdevs, &offdevs_params);
+		if (err)
+			return err;
+		offdevs_inited = true;
+	}
+	up_write(&bpf_devs_lock);
 
-static void bpf_offload_orphan_all_maps(struct net_device *netdev)
-{
-	struct bpf_offloaded_map *offmap, *tmp;
+	ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
+	if (!ondev)
+		return -ENOMEM;
+
+	ondev->netdev = netdev;
+	INIT_LIST_HEAD(&ondev->progs);
+	INIT_LIST_HEAD(&ondev->maps);
+
+	down_write(&bpf_devs_lock);
+	err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
+	if (err) {
+		netdev_warn(netdev, "failed to register for BPF offload\n");
+		goto err_unlock_free;
+	}
 
-	list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
-		if (offmap->netdev == netdev)
-			__bpf_map_offload_destroy(offmap);
+	up_write(&bpf_devs_lock);
+	return 0;
+
+err_unlock_free:
+	up_write(&bpf_devs_lock);
+	kfree(ondev);
+	return err;
 }
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
 
-static int bpf_offload_notification(struct notifier_block *notifier,
-				    ulong event, void *ptr)
+void bpf_offload_dev_netdev_unregister(struct net_device *netdev)
 {
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct bpf_offloaded_map *offmap, *mtmp;
+	struct bpf_prog_offload *offload, *ptmp;
+	struct bpf_offload_netdev *ondev;
 
 	ASSERT_RTNL();
 
-	switch (event) {
-	case NETDEV_UNREGISTER:
-		/* ignore namespace changes */
-		if (netdev->reg_state != NETREG_UNREGISTERING)
-			break;
-
-		down_write(&bpf_devs_lock);
-		bpf_offload_orphan_all_progs(netdev);
-		bpf_offload_orphan_all_maps(netdev);
-		up_write(&bpf_devs_lock);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
+	down_write(&bpf_devs_lock);
+	ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+	if (WARN_ON(!ondev))
+		goto unlock;
 
-static struct notifier_block bpf_offload_notifier = {
-	.notifier_call = bpf_offload_notification,
-};
+	WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
 
-static int __init bpf_offload_init(void)
-{
-	register_netdevice_notifier(&bpf_offload_notifier);
-	return 0;
-}
+	list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+		__bpf_prog_offload_destroy(offload->prog);
+	list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+		__bpf_map_offload_destroy(offmap);
 
-subsys_initcall(bpf_offload_init);
+	WARN_ON(!list_empty(&ondev->progs));
+	WARN_ON(!list_empty(&ondev->maps));
+	kfree(ondev);
+unlock:
+	up_write(&bpf_devs_lock);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
-- 
cgit v1.2.3


From 602144c224604f1cbff02ee2d1cf46825269ecbd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:25 -0700
Subject: bpf: offload: keep the offload state per-ASIC

Create a higher-level entity to represent a device/ASIC to allow
programs and maps to be shared between device ports.  The extra
work is required to make sure we don't destroy BPF objects as
soon as the netdev for which they were loaded gets destroyed,
as other ports may still be using them.  When netdev goes away
all of its BPF objects will be moved to other netdevs of the
device, and only destroyed when last netdev is unregistered.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c | 14 ++++-
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 ++
 drivers/net/netdevsim/bpf.c                   | 17 +++++-
 drivers/net/netdevsim/netdevsim.h             |  3 +
 include/linux/bpf.h                           |  9 ++-
 kernel/bpf/offload.c                          | 81 +++++++++++++++++++++------
 6 files changed, 104 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index dee039ada75c..458f49235d06 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -406,12 +406,16 @@ err_release_free:
 
 static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
 {
-	return bpf_offload_dev_netdev_register(netdev);
+	struct nfp_app_bpf *bpf = app->priv;
+
+	return bpf_offload_dev_netdev_register(bpf->bpf_dev, netdev);
 }
 
 static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
 {
-	bpf_offload_dev_netdev_unregister(netdev);
+	struct nfp_app_bpf *bpf = app->priv;
+
+	bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev);
 }
 
 static int nfp_bpf_init(struct nfp_app *app)
@@ -437,6 +441,11 @@ static int nfp_bpf_init(struct nfp_app *app)
 	if (err)
 		goto err_free_neutral_maps;
 
+	bpf->bpf_dev = bpf_offload_dev_create();
+	err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
+	if (err)
+		goto err_free_neutral_maps;
+
 	return 0;
 
 err_free_neutral_maps:
@@ -455,6 +464,7 @@ static void nfp_bpf_clean(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf = app->priv;
 
+	bpf_offload_dev_destroy(bpf->bpf_dev);
 	WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
 	WARN_ON(!list_empty(&bpf->map_list));
 	WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 9845c1a2d4c2..bec935468f90 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -110,6 +110,8 @@ enum pkt_vec {
  * struct nfp_app_bpf - bpf app priv structure
  * @app:		backpointer to the app
  *
+ * @bpf_dev:		BPF offload device handle
+ *
  * @tag_allocator:	bitmap of control message tags in use
  * @tag_alloc_next:	next tag bit to allocate
  * @tag_alloc_last:	next tag bit to be freed
@@ -150,6 +152,8 @@ enum pkt_vec {
 struct nfp_app_bpf {
 	struct nfp_app *app;
 
+	struct bpf_offload_dev *bpf_dev;
+
 	DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
 	u16 tag_alloc_next;
 	u16 tag_alloc_last;
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index c4a2829e0e1f..9eab29f67a0e 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -592,11 +592,16 @@ int nsim_bpf_init(struct netdevsim *ns)
 			debugfs_create_dir("bpf_bound_progs", ns->sdev->ddir);
 		if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
 			return -ENOMEM;
+
+		ns->sdev->bpf_dev = bpf_offload_dev_create();
+		err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
+		if (err)
+			return err;
 	}
 
-	err = bpf_offload_dev_netdev_register(ns->netdev);
+	err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev);
 	if (err)
-		return err;
+		goto err_destroy_bdev;
 
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
@@ -624,6 +629,11 @@ int nsim_bpf_init(struct netdevsim *ns)
 			    &ns->bpf_map_accept);
 
 	return 0;
+
+err_destroy_bdev:
+	if (ns->sdev->refcnt == 1)
+		bpf_offload_dev_destroy(ns->sdev->bpf_dev);
+	return err;
 }
 
 void nsim_bpf_uninit(struct netdevsim *ns)
@@ -631,10 +641,11 @@ void nsim_bpf_uninit(struct netdevsim *ns)
 	WARN_ON(ns->xdp.prog);
 	WARN_ON(ns->xdp_hw.prog);
 	WARN_ON(ns->bpf_offloaded);
-	bpf_offload_dev_netdev_unregister(ns->netdev);
+	bpf_offload_dev_netdev_unregister(ns->sdev->bpf_dev, ns->netdev);
 
 	if (ns->sdev->refcnt == 1) {
 		WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
 		WARN_ON(!list_empty(&ns->sdev->bpf_bound_maps));
+		bpf_offload_dev_destroy(ns->sdev->bpf_dev);
 	}
 }
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 98f26fa1e671..02be199eb005 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -27,6 +27,7 @@
 #define NSIM_EA(extack, msg)	NL_SET_ERR_MSG_MOD((extack), msg)
 
 struct bpf_prog;
+struct bpf_offload_dev;
 struct dentry;
 struct nsim_vf_config;
 
@@ -36,6 +37,8 @@ struct netdevsim_shared_dev {
 
 	struct dentry *ddir;
 
+	struct bpf_offload_dev *bpf_dev;
+
 	struct dentry *ddir_bpf_bound_progs;
 	u32 prog_id_gen;
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aa2e834a122b..913465e45062 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -85,6 +85,7 @@ struct bpf_map {
 	char name[BPF_OBJ_NAME_LEN];
 };
 
+struct bpf_offload_dev;
 struct bpf_offloaded_map;
 
 struct bpf_map_dev_ops {
@@ -650,8 +651,12 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
 
 bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
-int bpf_offload_dev_netdev_register(struct net_device *netdev);
-void bpf_offload_dev_netdev_unregister(struct net_device *netdev);
+struct bpf_offload_dev *bpf_offload_dev_create(void);
+void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
+int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+				    struct net_device *netdev);
+void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+				       struct net_device *netdev);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index cd64a26807aa..925575f64ff1 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -32,11 +32,17 @@
  */
 static DECLARE_RWSEM(bpf_devs_lock);
 
+struct bpf_offload_dev {
+	struct list_head netdevs;
+};
+
 struct bpf_offload_netdev {
 	struct rhash_head l;
 	struct net_device *netdev;
+	struct bpf_offload_dev *offdev;
 	struct list_head progs;
 	struct list_head maps;
+	struct list_head offdev_netdevs;
 };
 
 static const struct rhashtable_params offdevs_params = {
@@ -526,25 +532,18 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
 	return ret;
 }
 
-int bpf_offload_dev_netdev_register(struct net_device *netdev)
+int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+				    struct net_device *netdev)
 {
 	struct bpf_offload_netdev *ondev;
 	int err;
 
-	down_write(&bpf_devs_lock);
-	if (!offdevs_inited) {
-		err = rhashtable_init(&offdevs, &offdevs_params);
-		if (err)
-			return err;
-		offdevs_inited = true;
-	}
-	up_write(&bpf_devs_lock);
-
 	ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
 	if (!ondev)
 		return -ENOMEM;
 
 	ondev->netdev = netdev;
+	ondev->offdev = offdev;
 	INIT_LIST_HEAD(&ondev->progs);
 	INIT_LIST_HEAD(&ondev->maps);
 
@@ -555,6 +554,7 @@ int bpf_offload_dev_netdev_register(struct net_device *netdev)
 		goto err_unlock_free;
 	}
 
+	list_add(&ondev->offdev_netdevs, &offdev->netdevs);
 	up_write(&bpf_devs_lock);
 	return 0;
 
@@ -565,11 +565,12 @@ err_unlock_free:
 }
 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
 
-void bpf_offload_dev_netdev_unregister(struct net_device *netdev)
+void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+				       struct net_device *netdev)
 {
+	struct bpf_offload_netdev *ondev, *altdev;
 	struct bpf_offloaded_map *offmap, *mtmp;
 	struct bpf_prog_offload *offload, *ptmp;
-	struct bpf_offload_netdev *ondev;
 
 	ASSERT_RTNL();
 
@@ -579,11 +580,26 @@ void bpf_offload_dev_netdev_unregister(struct net_device *netdev)
 		goto unlock;
 
 	WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
-
-	list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
-		__bpf_prog_offload_destroy(offload->prog);
-	list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
-		__bpf_map_offload_destroy(offmap);
+	list_del(&ondev->offdev_netdevs);
+
+	/* Try to move the objects to another netdev of the device */
+	altdev = list_first_entry_or_null(&offdev->netdevs,
+					  struct bpf_offload_netdev,
+					  offdev_netdevs);
+	if (altdev) {
+		list_for_each_entry(offload, &ondev->progs, offloads)
+			offload->netdev = altdev->netdev;
+		list_splice_init(&ondev->progs, &altdev->progs);
+
+		list_for_each_entry(offmap, &ondev->maps, offloads)
+			offmap->netdev = altdev->netdev;
+		list_splice_init(&ondev->maps, &altdev->maps);
+	} else {
+		list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+			__bpf_prog_offload_destroy(offload->prog);
+		list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+			__bpf_map_offload_destroy(offmap);
+	}
 
 	WARN_ON(!list_empty(&ondev->progs));
 	WARN_ON(!list_empty(&ondev->maps));
@@ -592,3 +608,34 @@ unlock:
 	up_write(&bpf_devs_lock);
 }
 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
+
+struct bpf_offload_dev *bpf_offload_dev_create(void)
+{
+	struct bpf_offload_dev *offdev;
+	int err;
+
+	down_write(&bpf_devs_lock);
+	if (!offdevs_inited) {
+		err = rhashtable_init(&offdevs, &offdevs_params);
+		if (err)
+			return ERR_PTR(err);
+		offdevs_inited = true;
+	}
+	up_write(&bpf_devs_lock);
+
+	offdev = kzalloc(sizeof(*offdev), GFP_KERNEL);
+	if (!offdev)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&offdev->netdevs);
+
+	return offdev;
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_create);
+
+void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
+{
+	WARN_ON(!list_empty(&offdev->netdevs));
+	kfree(offdev);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);
-- 
cgit v1.2.3


From fd4f227dea0f24d89f52f7c4eb3207f84ddcbcbd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:26 -0700
Subject: bpf: offload: allow program and map sharing per-ASIC

Allow programs and maps to be re-used across different netdevs,
as long as they belong to the same struct bpf_offload_dev.
Update the bpf_offload_prog_map_match() helper for the verifier
and export a new helper for the drivers to use when checking
programs at attachment time.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h  |  1 +
 kernel/bpf/offload.c | 42 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 913465e45062..5b5ad95cf339 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -657,6 +657,7 @@ int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
 				    struct net_device *netdev);
 void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
 				       struct net_device *netdev);
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 925575f64ff1..177a52436394 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -511,22 +511,50 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
 	return 0;
 }
 
-bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
+static bool __bpf_offload_dev_match(struct bpf_prog *prog,
+				    struct net_device *netdev)
 {
-	struct bpf_offloaded_map *offmap;
+	struct bpf_offload_netdev *ondev1, *ondev2;
 	struct bpf_prog_offload *offload;
-	bool ret;
 
 	if (!bpf_prog_is_dev_bound(prog->aux))
 		return false;
-	if (!bpf_map_is_dev_bound(map))
-		return bpf_map_offload_neutral(map);
 
-	down_read(&bpf_devs_lock);
 	offload = prog->aux->offload;
+	if (!offload)
+		return false;
+	if (offload->netdev == netdev)
+		return true;
+
+	ondev1 = bpf_offload_find_netdev(offload->netdev);
+	ondev2 = bpf_offload_find_netdev(netdev);
+
+	return ondev1 && ondev2 && ondev1->offdev == ondev2->offdev;
+}
+
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev)
+{
+	bool ret;
+
+	down_read(&bpf_devs_lock);
+	ret = __bpf_offload_dev_match(prog, netdev);
+	up_read(&bpf_devs_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_match);
+
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
+{
+	struct bpf_offloaded_map *offmap;
+	bool ret;
+
+	if (!bpf_map_is_dev_bound(map))
+		return bpf_map_offload_neutral(map);
 	offmap = map_to_offmap(map);
 
-	ret = offload && offload->netdev == offmap->netdev;
+	down_read(&bpf_devs_lock);
+	ret = __bpf_offload_dev_match(prog, offmap->netdev);
 	up_read(&bpf_devs_lock);
 
 	return ret;
-- 
cgit v1.2.3


From d3b1084dfd629ef89bc1c4bab95e5cb87e7d08c2 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: make open_with_fake_path() not contribute to nr_files

Stacking file operations in overlay will store an extra open file for each
overlay file opened.

The overhead is just that of "struct file" which is about 256bytes, because
overlay already pins an extra dentry and inode when the file is open, which
add up to a much larger overhead.

For fear of breaking working setups, don't start accounting the extra file.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/file_table.c    | 69 +++++++++++++++++++++++++++++++++++++-----------------
 fs/internal.h      |  1 +
 fs/open.c          |  2 +-
 include/linux/fs.h |  3 +++
 4 files changed, 53 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 9b70ed2bbc4e..0cc7bea6b51a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head)
 static inline void file_free(struct file *f)
 {
 	security_file_free(f);
-	percpu_counter_dec(&nr_files);
+	if (!(f->f_mode & FMODE_NOACCOUNT))
+		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -91,6 +92,34 @@ int proc_nr_files(struct ctl_table *table, int write,
 }
 #endif
 
+static struct file *__alloc_file(int flags, const struct cred *cred)
+{
+	struct file *f;
+	int error;
+
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+	if (unlikely(!f))
+		return ERR_PTR(-ENOMEM);
+
+	f->f_cred = get_cred(cred);
+	error = security_file_alloc(f);
+	if (unlikely(error)) {
+		file_free_rcu(&f->f_u.fu_rcuhead);
+		return ERR_PTR(error);
+	}
+
+	atomic_long_set(&f->f_count, 1);
+	rwlock_init(&f->f_owner.lock);
+	spin_lock_init(&f->f_lock);
+	mutex_init(&f->f_pos_lock);
+	eventpoll_init_file(f);
+	f->f_flags = flags;
+	f->f_mode = OPEN_FMODE(flags);
+	/* f->f_version: 0 */
+
+	return f;
+}
+
 /* Find an unused file structure and return a pointer to it.
  * Returns an error pointer if some error happend e.g. we over file
  * structures limit, run out of memory or operation is not permitted.
@@ -105,7 +134,6 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
 {
 	static long old_max;
 	struct file *f;
-	int error;
 
 	/*
 	 * Privileged users can go above max_files
@@ -119,26 +147,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
 			goto over;
 	}
 
-	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
-	if (unlikely(!f))
-		return ERR_PTR(-ENOMEM);
-
-	f->f_cred = get_cred(cred);
-	error = security_file_alloc(f);
-	if (unlikely(error)) {
-		file_free_rcu(&f->f_u.fu_rcuhead);
-		return ERR_PTR(error);
-	}
+	f = __alloc_file(flags, cred);
+	if (!IS_ERR(f))
+		percpu_counter_inc(&nr_files);
 
-	atomic_long_set(&f->f_count, 1);
-	rwlock_init(&f->f_owner.lock);
-	spin_lock_init(&f->f_lock);
-	mutex_init(&f->f_pos_lock);
-	eventpoll_init_file(f);
-	f->f_flags = flags;
-	f->f_mode = OPEN_FMODE(flags);
-	/* f->f_version: 0 */
-	percpu_counter_inc(&nr_files);
 	return f;
 
 over:
@@ -150,6 +162,21 @@ over:
 	return ERR_PTR(-ENFILE);
 }
 
+/*
+ * Variant of alloc_empty_file() that doesn't check and modify nr_files.
+ *
+ * Should not be used unless there's a very good reason to do so.
+ */
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
+{
+	struct file *f = __alloc_file(flags, cred);
+
+	if (!IS_ERR(f))
+		f->f_mode |= FMODE_NOACCOUNT;
+
+	return f;
+}
+
 /**
  * alloc_file - allocate and initialize a 'struct file'
  *
diff --git a/fs/internal.h b/fs/internal.h
index 52a346903748..442098fa0a84 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -94,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
  * file_table.c
  */
 extern struct file *alloc_empty_file(int, const struct cred *);
+extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
 
 /*
  * super.c
diff --git a/fs/open.c b/fs/open.c
index dd15711eb658..9c6617dbb2c0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -928,7 +928,7 @@ EXPORT_SYMBOL(dentry_open);
 struct file *open_with_fake_path(const struct path *path, int flags,
 				struct inode *inode, const struct cred *cred)
 {
-	struct file *f = alloc_empty_file(flags, cred);
+	struct file *f = alloc_empty_file_noaccount(flags, cred);
 	if (!IS_ERR(f)) {
 		int error;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ce2b413abc6..e1884840d556 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -156,6 +156,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File is capable of returning -EAGAIN if I/O will block */
 #define FMODE_NOWAIT	((__force fmode_t)0x8000000)
 
+/* File does not contribute to nr_files count */
+#define FMODE_NOACCOUNT	((__force fmode_t)0x20000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
-- 
cgit v1.2.3


From 9df6702ad0e85901450fe48a7b5f0f8975353eeb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: export vfs_ioctl() to modules

This is needed by the stacked ioctl implementation in overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/internal.h      | 1 -
 fs/ioctl.c         | 1 +
 include/linux/fs.h | 2 ++
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 442098fa0a84..9c3b4c40e582 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -184,7 +184,6 @@ extern const struct dentry_operations ns_dentry_operations;
  */
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
-extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 
 /*
  * iomap support:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index b445b13fc59b..3212c29235ce 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  out:
 	return error;
 }
+EXPORT_SYMBOL(vfs_ioctl);
 
 static int ioctl_fibmap(struct file *filp, int __user *p)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e1884840d556..019817a083a0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1634,6 +1634,8 @@ int vfs_mkobj(struct dentry *, umode_t,
 		int (*f)(struct dentry *, umode_t, void *),
 		void *);
 
+extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
 /*
  * VFS file helper functions.
  */
-- 
cgit v1.2.3


From f182536684d876afaf4627c36a16c4e15ea8a2b8 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: export vfs_dedupe_file_range_one() to modules

This is needed by the stacked dedupe implementation in overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c    | 6 +++---
 include/linux/fs.h | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index cce4ebac34a8..39b4a21dd933 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1964,9 +1964,8 @@ out_error:
 }
 EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
 
-static int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-				     struct file *dst_file, loff_t dst_pos,
-				     u64 len)
+int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+			      struct file *dst_file, loff_t dst_pos, u64 len)
 {
 	s64 ret;
 
@@ -2001,6 +2000,7 @@ out_drop_write:
 
 	return ret;
 }
+EXPORT_SYMBOL(vfs_dedupe_file_range_one);
 
 int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 019817a083a0..b67209948f1b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1829,6 +1829,10 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
+extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+				     struct file *dst_file, loff_t dst_pos,
+				     u64 len);
+
 
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
-- 
cgit v1.2.3


From 88059de155d4db817a3a78ba899cb3b7f4de0fb0 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "ovl: fix relatime for directories"

This reverts commit cd91304e7190b4c4802f8e413ab2214b233e0260.

Overlayfs no longer relies on the vfs correct atime handling.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/inode.c             | 21 ++++-----------------
 fs/overlayfs/super.c   |  3 ---
 include/linux/dcache.h |  3 ---
 3 files changed, 4 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..7ef6f1942757 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1560,24 +1560,11 @@ EXPORT_SYMBOL(bmap);
 static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
 			       bool rcu)
 {
-	struct dentry *upperdentry;
+	if (!rcu) {
+		struct inode *realinode = d_real_inode(dentry);
 
-	/*
-	 * Nothing to do if in rcu or if non-overlayfs
-	 */
-	if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
-		return;
-
-	upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
-
-	/*
-	 * If file is on lower then we can't update atime, so no worries about
-	 * stale mtime/ctime.
-	 */
-	if (upperdentry) {
-		struct inode *realinode = d_inode(upperdentry);
-
-		if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
+		if (unlikely(inode != realinode) &&
+		    (!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
 		     !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
 			inode->i_mtime = realinode->i_mtime;
 			inode->i_ctime = realinode->i_ctime;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 211975921a90..a7e2287e4b8f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -101,9 +101,6 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 	if (inode && d_inode(dentry) == inode)
 		return dentry;
 
-	if (flags & D_REAL_UPPER)
-		return ovl_dentry_upper(dentry);
-
 	if (!d_is_reg(dentry)) {
 		if (!inode || inode == d_inode(dentry))
 			return dentry;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..ddae4103d324 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -564,9 +564,6 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
 	return upper;
 }
 
-/* d_real() flags */
-#define D_REAL_UPPER	0x2	/* return upper dentry or NULL if non-upper */
-
 /**
  * d_real - Return the real dentry
  * @dentry: the dentry to query
-- 
cgit v1.2.3


From c6718543463dbb78486ad259f884cb800df802b5 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "vfs: update ovl inode before relatime check"

This reverts commit 598e3c8f72f5b77c84d2cb26cfd936ffb3cfdbaa.

Overlayfs no longer relies on the vfs correct atime handling.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/inode.c         | 33 ++++++---------------------------
 fs/internal.h      |  7 -------
 fs/namei.c         |  2 +-
 include/linux/fs.h |  1 +
 4 files changed, 8 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 7ef6f1942757..077a6174dfd5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1554,37 +1554,17 @@ sector_t bmap(struct inode *inode, sector_t block)
 }
 EXPORT_SYMBOL(bmap);
 
-/*
- * Update times in overlayed inode from underlying real inode
- */
-static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
-			       bool rcu)
-{
-	if (!rcu) {
-		struct inode *realinode = d_real_inode(dentry);
-
-		if (unlikely(inode != realinode) &&
-		    (!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
-		     !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
-			inode->i_mtime = realinode->i_mtime;
-			inode->i_ctime = realinode->i_ctime;
-		}
-	}
-}
-
 /*
  * With relative atime, only update atime if the previous atime is
  * earlier than either the ctime or mtime or if at least a day has
  * passed since the last atime update.
  */
-static int relatime_need_update(const struct path *path, struct inode *inode,
-				struct timespec now, bool rcu)
+static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+			     struct timespec now)
 {
 
-	if (!(path->mnt->mnt_flags & MNT_RELATIME))
+	if (!(mnt->mnt_flags & MNT_RELATIME))
 		return 1;
-
-	update_ovl_inode_times(path->dentry, inode, rcu);
 	/*
 	 * Is mtime younger than atime? If yes, update atime:
 	 */
@@ -1655,8 +1635,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags)
  *	This function automatically handles read only file systems and media,
  *	as well as the "noatime" flag and inode specific "noatime" markers.
  */
-bool __atime_needs_update(const struct path *path, struct inode *inode,
-			  bool rcu)
+bool atime_needs_update(const struct path *path, struct inode *inode)
 {
 	struct vfsmount *mnt = path->mnt;
 	struct timespec64 now;
@@ -1682,7 +1661,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,
 
 	now = current_time(inode);
 
-	if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu))
+	if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
 		return false;
 
 	if (timespec64_equal(&inode->i_atime, &now))
@@ -1697,7 +1676,7 @@ void touch_atime(const struct path *path)
 	struct inode *inode = d_inode(path->dentry);
 	struct timespec64 now;
 
-	if (!__atime_needs_update(path, inode, false))
+	if (!atime_needs_update(path, inode))
 		return;
 
 	if (!sb_start_write_trylock(inode->i_sb))
diff --git a/fs/internal.h b/fs/internal.h
index abb41f346b18..e54ad4361d0d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -133,13 +133,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
 extern void inode_add_lru(struct inode *inode);
 extern int dentry_needs_remove_privs(struct dentry *dentry);
 
-extern bool __atime_needs_update(const struct path *, struct inode *, bool);
-static inline bool atime_needs_update_rcu(const struct path *path,
-					  struct inode *inode)
-{
-	return __atime_needs_update(path, inode, true);
-}
-
 /*
  * fs-writeback.c
  */
diff --git a/fs/namei.c b/fs/namei.c
index d152cc05fdc3..31182b71b313 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd)
 	if (!(nd->flags & LOOKUP_RCU)) {
 		touch_atime(&last->link);
 		cond_resched();
-	} else if (atime_needs_update_rcu(&last->link, inode)) {
+	} else if (atime_needs_update(&last->link, inode)) {
 		if (unlikely(unlazy_walk(nd)))
 			return ERR_PTR(-ECHILD);
 		touch_atime(&last->link);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b67209948f1b..8f8c9ac1c9d5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2088,6 +2088,7 @@ enum file_time_flags {
 	S_VERSION = 8,
 };
 
+extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
 static inline void file_accessed(struct file *file)
 {
-- 
cgit v1.2.3


From 4ab30319fd7c691a1b3165325c647a5cd6d282ac Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "vfs: add flags to d_real()"

This reverts commit 495e642939114478a5237a7d91661ba93b76f15a.

No user of "flags" argument of d_real() remain.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 fs/open.c                         |  2 +-
 fs/overlayfs/super.c              |  4 ++--
 include/linux/dcache.h            | 11 +++++------
 include/linux/fs.h                |  2 +-
 6 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2c391338c675..24e1a4f37c83 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -22,7 +22,7 @@ prototypes:
 	struct vfsmount *(*d_automount)(struct path *path);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int, unsigned int);
+				 unsigned int);
 
 locking rules:
 		rename_lock	->d_lock	may block	rcu-walk
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 829a7b7857a4..6417b161f88d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1001,7 +1001,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int, unsigned int);
+				 unsigned int);
 };
 
   d_revalidate: called when the VFS needs to revalidate a dentry. This
diff --git a/fs/open.c b/fs/open.c
index a3c03a4a9078..a973ca074896 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -96,7 +96,7 @@ long vfs_truncate(const struct path *path, loff_t length)
 	 * write access on the upper inode, not on the overlay inode.  For
 	 * non-overlay filesystems d_real() is an identity function.
 	 */
-	upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
+	upperdentry = d_real(path->dentry, NULL, O_WRONLY);
 	error = PTR_ERR(upperdentry);
 	if (IS_ERR(upperdentry))
 		goto mnt_drop_write_and_out;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index a7e2287e4b8f..5bc261de5041 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -92,7 +92,7 @@ static int ovl_check_append_only(struct inode *inode, int flag)
 
 static struct dentry *ovl_d_real(struct dentry *dentry,
 				 const struct inode *inode,
-				 unsigned int open_flags, unsigned int flags)
+				 unsigned int open_flags)
 {
 	struct dentry *real;
 	int err;
@@ -128,7 +128,7 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 		goto bug;
 
 	/* Handle recursion */
-	real = d_real(real, inode, open_flags, 0);
+	real = d_real(real, inode, open_flags);
 
 	if (!inode || inode == d_inode(real))
 		return real;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ddae4103d324..8fe4efa94af6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -146,7 +146,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int, unsigned int);
+				 unsigned int);
 } ____cacheline_aligned;
 
 /*
@@ -568,8 +568,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * d_real - Return the real dentry
  * @dentry: the dentry to query
  * @inode: inode to select the dentry from multiple layers (can be NULL)
- * @open_flags: open flags to control copy-up behavior
- * @flags: flags to control what is returned by this function
+ * @flags: open flags to control copy-up behavior
  *
  * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
@@ -578,10 +577,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  */
 static inline struct dentry *d_real(struct dentry *dentry,
 				    const struct inode *inode,
-				    unsigned int open_flags, unsigned int flags)
+				    unsigned int flags)
 {
 	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
-		return dentry->d_op->d_real(dentry, inode, open_flags, flags);
+		return dentry->d_op->d_real(dentry, inode, flags);
 	else
 		return dentry;
 }
@@ -596,7 +595,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
 static inline struct inode *d_real_inode(const struct dentry *dentry)
 {
 	/* This usage of d_real() results in const dentry */
-	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0));
+	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
 }
 
 struct name_snapshot {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8f8c9ac1c9d5..16e2741cec3c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1249,7 +1249,7 @@ static inline struct inode *file_inode(const struct file *f)
 
 static inline struct dentry *file_dentry(const struct file *file)
 {
-	return d_real(file->f_path.dentry, file_inode(file), 0, 0);
+	return d_real(file->f_path.dentry, file_inode(file), 0);
 }
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
-- 
cgit v1.2.3


From de2a4a501e716bbf5ff691ba16faf59a35320228 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Partially revert "locks: fix file locking on overlayfs"

This partially reverts commit c568d68341be7030f5647def68851e469b21ca11.

Overlayfs files will now automatically get the correct locks, no need to
hack overlay support in VFS.

It is a partial revert, because it leaves the locks_inode() calls in place
and defines locks_inode() to file_inode().  We could revert those as well,
but it would be unnecessary code churn and it makes sense to document that
we are getting the inode for locking purposes.

Don't revert MS_NOREMOTELOCK yet since that has been part of the userspace
API for some time (though not in a useful way).  Will try to remove
internal flags later when the dust around the new mount API settles.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c           | 17 ++++++-----------
 fs/overlayfs/super.c |  2 +-
 include/linux/fs.h   | 13 +------------
 3 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index baa564841c03..dab4e72f8bff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,11 +139,6 @@
 #define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
 #define IS_REMOTELCK(fl)	(fl->fl_pid <= 0)
 
-static inline bool is_remote_lock(struct file *filp)
-{
-	return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));
-}
-
 static bool lease_breaking(struct file_lock *fl)
 {
 	return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -1875,7 +1870,7 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
-	if (filp->f_op->setlease && is_remote_lock(filp))
+	if (filp->f_op->setlease)
 		return filp->f_op->setlease(filp, arg, lease, priv);
 	else
 		return generic_setlease(filp, arg, lease, priv);
@@ -2022,7 +2017,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	if (error)
 		goto out_free;
 
-	if (f.file->f_op->flock && is_remote_lock(f.file))
+	if (f.file->f_op->flock)
 		error = f.file->f_op->flock(f.file,
 					  (can_sleep) ? F_SETLKW : F_SETLK,
 					  lock);
@@ -2048,7 +2043,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
  */
 int vfs_test_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock && is_remote_lock(filp))
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, F_GETLK, fl);
 	posix_test_lock(filp, fl);
 	return 0;
@@ -2191,7 +2186,7 @@ out:
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
 {
-	if (filp->f_op->lock && is_remote_lock(filp))
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, cmd, fl);
 	else
 		return posix_lock_file(filp, fl, conf);
@@ -2513,7 +2508,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 	if (list_empty(&flctx->flc_flock))
 		return;
 
-	if (filp->f_op->flock && is_remote_lock(filp))
+	if (filp->f_op->flock)
 		filp->f_op->flock(filp, F_SETLKW, &fl);
 	else
 		flock_lock_inode(inode, &fl);
@@ -2600,7 +2595,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
  */
 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock && is_remote_lock(filp))
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, F_CANCELLK, fl);
 	return 0;
 }
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 5bc261de5041..c63beccad4fc 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1456,7 +1456,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_op = &ovl_super_operations;
 	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_fs_info = ofs;
-	sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
+	sb->s_flags |= SB_POSIXACL;
 
 	err = -ENOMEM;
 	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16e2741cec3c..1cbcf37c45e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1054,17 +1054,7 @@ struct file_lock_context {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
-/*
- * Return the inode to use for locking
- *
- * For overlayfs this should be the overlay inode, not the real inode returned
- * by file_inode().  For any other fs file_inode(filp) and locks_inode(filp) are
- * equal.
- */
-static inline struct inode *locks_inode(const struct file *f)
-{
-	return f->f_path.dentry->d_inode;
-}
+#define locks_inode(f) file_inode(f)
 
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
@@ -1305,7 +1295,6 @@ extern int send_sigurg(struct fown_struct *fown);
 
 /* These sb flags are internal to the kernel */
 #define SB_SUBMOUNT     (1<<26)
-#define SB_NOREMOTELOCK	(1<<27)
 #define SB_NOSEC	(1<<28)
 #define SB_BORN		(1<<29)
 #define SB_ACTIVE	(1<<30)
-- 
cgit v1.2.3


From 573e1784817ca1f13d76a0df636929e983e5de3c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:44 +0200
Subject: Revert "fsnotify: support overlayfs"

This reverts commit f3fbbb079263bd29ae592478de6808db7e708267.

Overlayfs now works correctly without adding hacks to fsnotify.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fsnotify.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bdaf22582f6e..fd1ce10553bf 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -30,11 +30,7 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry
 static inline int fsnotify_perm(struct file *file, int mask)
 {
 	const struct path *path = &file->f_path;
-	/*
-	 * Do not use file_inode() here or anywhere in this file to get the
-	 * inode.  That would break *notity on overlayfs.
-	 */
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 fsnotify_mask = 0;
 	int ret;
 
@@ -178,7 +174,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 static inline void fsnotify_access(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_ACCESS;
 
 	if (S_ISDIR(inode->i_mode))
@@ -196,7 +192,7 @@ static inline void fsnotify_access(struct file *file)
 static inline void fsnotify_modify(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_MODIFY;
 
 	if (S_ISDIR(inode->i_mode))
@@ -214,7 +210,7 @@ static inline void fsnotify_modify(struct file *file)
 static inline void fsnotify_open(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
@@ -230,7 +226,7 @@ static inline void fsnotify_open(struct file *file)
 static inline void fsnotify_close(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	fmode_t mode = file->f_mode;
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
-- 
cgit v1.2.3


From fb16043b46831a75c9b076a7262ae035290b0409 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:44 +0200
Subject: vfs: remove open_flags from d_real()

Opening regular files on overlayfs is now handled via ovl_open().  Remove
the now unused "open_flags" argument from d_op->d_real() and the d_real()
helper.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/Locking |  3 +--
 Documentation/filesystems/vfs.txt | 16 ++++------------
 fs/overlayfs/super.c              | 36 +++---------------------------------
 include/linux/dcache.h            | 11 ++++-------
 include/linux/fs.h                |  2 +-
 5 files changed, 13 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 24e1a4f37c83..08a36e14e8fc 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -21,8 +21,7 @@ prototypes:
 	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 	struct vfsmount *(*d_automount)(struct path *path);
 	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 
 locking rules:
 		rename_lock	->d_lock	may block	rcu-walk
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 6417b161f88d..497609f00e3e 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1000,8 +1000,7 @@ struct dentry_operations {
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 };
 
   d_revalidate: called when the VFS needs to revalidate a dentry. This
@@ -1135,22 +1134,15 @@ struct dentry_operations {
 	dentry being transited from.
 
   d_real: overlay/union type filesystems implement this method to return one of
-	the underlying dentries hidden by the overlay.  It is used in three
+	the underlying dentries hidden by the overlay.  It is used in two
 	different modes:
 
-	Called from open it may need to copy-up the file depending on the
-	supplied open flags.  This mode is selected with a non-zero flags
-	argument.  In this mode the d_real method can return an error.
-
 	Called from file_dentry() it returns the real dentry matching the inode
 	argument.  The real dentry may be from a lower layer already copied up,
 	but still referenced from the file.  This mode is selected with a
-	non-NULL inode argument.  This will always succeed.
-
-	With NULL inode and zero flags the topmost real underlying dentry is
-	returned.  This will always succeed.
+	non-NULL inode argument.
 
-	This method is never called with both non-NULL inode and non-zero flags.
+	With NULL inode the topmost real underlying dentry is returned.
 
 Each dentry has a pointer to its parent dentry, as well as a hash list
 of child dentries. Child dentries are basically like files in a
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c63beccad4fc..0e84593af84e 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -74,28 +74,10 @@ static void ovl_dentry_release(struct dentry *dentry)
 	}
 }
 
-static int ovl_check_append_only(struct inode *inode, int flag)
-{
-	/*
-	 * This test was moot in vfs may_open() because overlay inode does
-	 * not have the S_APPEND flag, so re-check on real upper inode
-	 */
-	if (IS_APPEND(inode)) {
-		if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
-			return -EPERM;
-		if (flag & O_TRUNC)
-			return -EPERM;
-	}
-
-	return 0;
-}
-
 static struct dentry *ovl_d_real(struct dentry *dentry,
-				 const struct inode *inode,
-				 unsigned int open_flags)
+				 const struct inode *inode)
 {
 	struct dentry *real;
-	int err;
 
 	/* It's an overlay file */
 	if (inode && d_inode(dentry) == inode)
@@ -107,28 +89,16 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 		goto bug;
 	}
 
-	if (open_flags) {
-		err = ovl_open_maybe_copy_up(dentry, open_flags);
-		if (err)
-			return ERR_PTR(err);
-	}
-
 	real = ovl_dentry_upper(dentry);
-	if (real && (!inode || inode == d_inode(real))) {
-		if (!inode) {
-			err = ovl_check_append_only(d_inode(real), open_flags);
-			if (err)
-				return ERR_PTR(err);
-		}
+	if (real && (!inode || inode == d_inode(real)))
 		return real;
-	}
 
 	real = ovl_dentry_lower(dentry);
 	if (!real)
 		goto bug;
 
 	/* Handle recursion */
-	real = d_real(real, inode, open_flags);
+	real = d_real(real, inode);
 
 	if (!inode || inode == d_inode(real))
 		return real;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8fe4efa94af6..78cea80423a3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -145,8 +145,7 @@ struct dentry_operations {
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 } ____cacheline_aligned;
 
 /*
@@ -568,7 +567,6 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * d_real - Return the real dentry
  * @dentry: the dentry to query
  * @inode: inode to select the dentry from multiple layers (can be NULL)
- * @flags: open flags to control copy-up behavior
  *
  * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
@@ -576,11 +574,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * See also: Documentation/filesystems/vfs.txt
  */
 static inline struct dentry *d_real(struct dentry *dentry,
-				    const struct inode *inode,
-				    unsigned int flags)
+				    const struct inode *inode)
 {
 	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
-		return dentry->d_op->d_real(dentry, inode, flags);
+		return dentry->d_op->d_real(dentry, inode);
 	else
 		return dentry;
 }
@@ -595,7 +592,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
 static inline struct inode *d_real_inode(const struct dentry *dentry)
 {
 	/* This usage of d_real() results in const dentry */
-	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
+	return d_backing_inode(d_real((struct dentry *) dentry, NULL));
 }
 
 struct name_snapshot {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1cbcf37c45e1..1fa63d184d1f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1239,7 +1239,7 @@ static inline struct inode *file_inode(const struct file *f)
 
 static inline struct dentry *file_dentry(const struct file *file)
 {
-	return d_real(file->f_path.dentry, file_inode(file), 0);
+	return d_real(file->f_path.dentry, file_inode(file));
 }
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
-- 
cgit v1.2.3


From 3f289dcb4b265416a57ca79cf4a324060bb09060 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Jul 2018 04:47:36 -0700
Subject: block: make bdev_ops->rw_page() take a REQ_OP instead of bool

c11f0c0b5bb9 ("block/mm: make bdev_ops->rw_page() take a bool for
read/write") replaced @op with boolean @is_write, which limited the
amount of information going into ->rw_page() and more importantly
page_endio(), which removed the need to expose block internals to mm.

Unfortunately, we want to track discards separately and @is_write
isn't enough information.  This patch updates bdev_ops->rw_page() to
take REQ_OP instead but leaves page_endio() to take bool @is_write.
This allows the block part of operations to have enough information
while not leaking it to mm.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c           | 14 +++++++-------
 drivers/block/zram/zram_drv.c | 16 ++++++++--------
 drivers/nvdimm/btt.c          | 12 ++++++------
 drivers/nvdimm/pmem.c         | 13 ++++++-------
 fs/block_dev.c                |  6 ++++--
 fs/mpage.c                    |  4 ++--
 include/linux/blkdev.h        |  2 +-
 7 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bb976598ee43..df8103dd40ac 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -254,20 +254,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
  * Process a single bvec of a bio.
  */
 static int brd_do_bvec(struct brd_device *brd, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	void *mem;
 	int err = 0;
 
-	if (is_write) {
+	if (op_is_write(op)) {
 		err = copy_to_brd_setup(brd, sector, len);
 		if (err)
 			goto out;
 	}
 
 	mem = kmap_atomic(page);
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		copy_from_brd(mem + off, brd, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -296,7 +296,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 		int err;
 
 		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
-					op_is_write(bio_op(bio)), sector);
+				  bio_op(bio), sector);
 		if (err)
 			goto io_error;
 		sector += len >> SECTOR_SHIFT;
@@ -310,15 +310,15 @@ io_error:
 }
 
 static int brd_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	int err;
 
 	if (PageTransHuge(page))
 		return -ENOTSUPP;
-	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
-	page_endio(page, is_write, err);
+	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
+	page_endio(page, op_is_write(op), err);
 	return err;
 }
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7436b2d27fa3..78c29044684a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1274,17 +1274,17 @@ static void zram_bio_discard(struct zram *zram, u32 index,
  * Returns 1 if IO request was successfully submitted.
  */
 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
-			int offset, bool is_write, struct bio *bio)
+			int offset, unsigned int op, struct bio *bio)
 {
 	unsigned long start_time = jiffies;
-	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
+	int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ;
 	struct request_queue *q = zram->disk->queue;
 	int ret;
 
 	generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
 			&zram->disk->part0);
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		atomic64_inc(&zram->stats.num_reads);
 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
 		flush_dcache_page(bvec->bv_page);
@@ -1300,7 +1300,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 	zram_slot_unlock(zram, index);
 
 	if (unlikely(ret < 0)) {
-		if (!is_write)
+		if (!op_is_write(op))
 			atomic64_inc(&zram->stats.failed_reads);
 		else
 			atomic64_inc(&zram->stats.failed_writes);
@@ -1338,7 +1338,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 			bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
 							unwritten);
 			if (zram_bvec_rw(zram, &bv, index, offset,
-					op_is_write(bio_op(bio)), bio) < 0)
+					 bio_op(bio), bio) < 0)
 				goto out;
 
 			bv.bv_offset += bv.bv_len;
@@ -1390,7 +1390,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
 }
 
 static int zram_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	int offset, ret;
 	u32 index;
@@ -1414,7 +1414,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 	bv.bv_len = PAGE_SIZE;
 	bv.bv_offset = 0;
 
-	ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
+	ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
 out:
 	/*
 	 * If I/O fails, just return error(ie, non-zero) without
@@ -1429,7 +1429,7 @@ out:
 
 	switch (ret) {
 	case 0:
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 		break;
 	case 1:
 		ret = 0;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 85de8053aa34..0360c015f658 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1423,11 +1423,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 
 static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
 			struct page *page, unsigned int len, unsigned int off,
-			bool is_write, sector_t sector)
+			unsigned int op, sector_t sector)
 {
 	int ret;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		ret = btt_read_pg(btt, bip, page, off, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -1464,7 +1464,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 		}
 
 		err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
-				  op_is_write(bio_op(bio)), iter.bi_sector);
+				  bio_op(bio), iter.bi_sector);
 		if (err) {
 			dev_err(&btt->nd_btt->dev,
 					"io error in %s sector %lld, len %d,\n",
@@ -1483,16 +1483,16 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int btt_rw_page(struct block_device *bdev, sector_t sector,
-		struct page *page, bool is_write)
+		struct page *page, unsigned int op)
 {
 	struct btt *btt = bdev->bd_disk->private_data;
 	int rc;
 	unsigned int len;
 
 	len = hpage_nr_pages(page) * PAGE_SIZE;
-	rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector);
+	rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector);
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8b1fd7f1a224..dd17acd8fe68 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -120,7 +120,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 }
 
 static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	blk_status_t rc = BLK_STS_OK;
@@ -131,7 +131,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
 		bad_pmem = true;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		if (unlikely(bad_pmem))
 			rc = BLK_STS_IOERR;
 		else {
@@ -180,8 +180,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
 		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
-				bvec.bv_offset, op_is_write(bio_op(bio)),
-				iter.bi_sector);
+				bvec.bv_offset, bio_op(bio), iter.bi_sector);
 		if (rc) {
 			bio->bi_status = rc;
 			break;
@@ -198,13 +197,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
 	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
-			  0, is_write, sector);
+			  0, op, sector);
 
 	/*
 	 * The ->rw_page interface is subtle and tricky.  The core
@@ -213,7 +212,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	 * caused by double completion.
 	 */
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return blk_status_to_errno(rc);
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0dd87aaeb39a..496fb51a1e1a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -665,7 +665,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 	result = blk_queue_enter(bdev->bd_queue, 0);
 	if (result)
 		return result;
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_READ);
 	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
@@ -703,7 +704,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 		return result;
 
 	set_page_writeback(page);
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_WRITE);
 	if (result) {
 		end_page_writeback(page);
 	} else {
diff --git a/fs/mpage.c b/fs/mpage.c
index b7e7f570733a..b73638db9866 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -51,8 +51,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)),
-				blk_status_to_errno(bio->bi_status));
+		page_endio(page, bio_op(bio),
+			   blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1939ed95f936..331a6cb8805f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1943,7 +1943,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
-	int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
+	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	unsigned int (*check_events) (struct gendisk *disk,
-- 
cgit v1.2.3


From 59767fbd49d794b4499d30b314df6c0d4aca584b Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:37 -0700
Subject: block: Add part_stat_read_accum to read across field entries.

Add a part_stat_read_accum macro to genhd.h to read and sum across
field entries.  For example to sum up the number read and write
sectors completed.  In addition to being ar reasonable cleanup by
itself this will make it easier to add new stat fields in the future.

tj: Refreshed on top of v4.17.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_receiver.c | 3 +--
 drivers/block/drbd/drbd_worker.c   | 4 +---
 drivers/md/md.c                    | 3 +--
 include/linux/genhd.h              | 4 ++++
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index a36a30795c43..75f6b47169e6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2674,8 +2674,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
 	if (c_min_rate == 0)
 		return false;
 
-	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
-		      (int)part_stat_read(&disk->part0, sectors[1]) -
+	curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
 			atomic_read(&device->rs_sect_ev);
 
 	if (atomic_read(&device->ap_actlog_cnt)
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 5e793dd7adfb..b8f77e83d456 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1690,9 +1690,7 @@ void drbd_rs_controller_reset(struct drbd_device *device)
 	atomic_set(&device->rs_sect_in, 0);
 	atomic_set(&device->rs_sect_ev, 0);
 	device->rs_in_flight = 0;
-	device->rs_last_events =
-		(int)part_stat_read(&disk->part0, sectors[0]) +
-		(int)part_stat_read(&disk->part0, sectors[1]);
+	device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors);
 
 	/* Updating the RCU protected object in place is necessary since
 	   this function gets called from atomic context.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 994aed2f9dff..dabe36723d60 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8046,8 +8046,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
 	rcu_read_lock();
 	rdev_for_each_rcu(rdev, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
-		curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
-			      (int)part_stat_read(&disk->part0, sectors[1]) -
+		curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
 			      atomic_read(&disk->sync_io);
 		/* sync IO will cause sync_io to increase before the disk_stats
 		 * as sync_io is counted when a request starts, and
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6cb8a5789668..19f36fa10995 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -353,6 +353,10 @@ static inline void free_part_stats(struct hd_struct *part)
 
 #endif /* CONFIG_SMP */
 
+#define part_stat_read_accum(part, field)				\
+	(part_stat_read(part, field[0]) +				\
+	 part_stat_read(part, field[1]))
+
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
 	if ((part)->partno)						\
-- 
cgit v1.2.3


From dbae2c551377b6533a00c11fc7ede370100ab404 Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:38 -0700
Subject: block: Define and use STAT_READ and STAT_WRITE

Add defines for STAT_READ and STAT_WRITE for indexing the partition
stat entries. This clarifies some fs/ code which has hardcoded 1 for
STAT_WRITE and will make it easier to extend the stats with additional
fields.

tj: Refreshed on top of v4.17.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c             | 16 ++++++++--------
 block/partition-generic.c | 16 ++++++++--------
 fs/ext4/super.c           |  5 +++--
 fs/ext4/sysfs.c           |  6 ++++--
 fs/f2fs/f2fs.h            |  2 +-
 fs/f2fs/super.c           |  3 ++-
 include/linux/blk_types.h |  7 +++++++
 include/linux/genhd.h     | 13 +++++++------
 8 files changed, 40 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index f1543a45e73b..0711a800d0d4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1337,14 +1337,14 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   "%u %lu %lu %lu %u %u %u %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
-			   part_stat_read(hd, ios[READ]),
-			   part_stat_read(hd, merges[READ]),
-			   part_stat_read(hd, sectors[READ]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
-			   part_stat_read(hd, ios[WRITE]),
-			   part_stat_read(hd, merges[WRITE]),
-			   part_stat_read(hd, sectors[WRITE]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
+			   part_stat_read(hd, ios[STAT_READ]),
+			   part_stat_read(hd, merges[STAT_READ]),
+			   part_stat_read(hd, sectors[STAT_READ]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])),
+			   part_stat_read(hd, ios[STAT_WRITE]),
+			   part_stat_read(hd, merges[STAT_WRITE]),
+			   part_stat_read(hd, sectors[STAT_WRITE]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
 			   inflight[0],
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 3dcfd4ec0e11..0ddb06722162 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -132,14 +132,14 @@ ssize_t part_stat_show(struct device *dev,
 		"%8lu %8lu %8llu %8u "
 		"%8u %8u %8u"
 		"\n",
-		part_stat_read(p, ios[READ]),
-		part_stat_read(p, merges[READ]),
-		(unsigned long long)part_stat_read(p, sectors[READ]),
-		jiffies_to_msecs(part_stat_read(p, ticks[READ])),
-		part_stat_read(p, ios[WRITE]),
-		part_stat_read(p, merges[WRITE]),
-		(unsigned long long)part_stat_read(p, sectors[WRITE]),
-		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
+		part_stat_read(p, ios[STAT_READ]),
+		part_stat_read(p, merges[STAT_READ]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_READ]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])),
+		part_stat_read(p, ios[STAT_WRITE]),
+		part_stat_read(p, merges[STAT_WRITE]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
 		inflight[0],
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba2396a7bd04..4b8aef989552 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3514,7 +3514,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_sb_block = sb_block;
 	if (sb->s_bdev->bd_part)
 		sbi->s_sectors_written_start =
-			part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+			part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
 
 	/* Cleanup superblock name */
 	strreplace(sb->s_id, '/', '!');
@@ -4824,7 +4824,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	if (sb->s_bdev->bd_part)
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
-			    ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			    ((part_stat_read(sb->s_bdev->bd_part,
+					     sectors[STAT_WRITE]) -
 			      EXT4_SB(sb)->s_sectors_written_start) >> 1));
 	else
 		es->s_kbytes_written =
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f34da0bb8f17..2be9ad790017 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -56,7 +56,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 	if (!sb->s_bdev->bd_part)
 		return snprintf(buf, PAGE_SIZE, "0\n");
 	return snprintf(buf, PAGE_SIZE, "%lu\n",
-			(part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			(part_stat_read(sb->s_bdev->bd_part,
+					sectors[STAT_WRITE]) -
 			 sbi->s_sectors_written_start) >> 1);
 }
 
@@ -68,7 +69,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 		return snprintf(buf, PAGE_SIZE, "0\n");
 	return snprintf(buf, PAGE_SIZE, "%llu\n",
 			(unsigned long long)(sbi->s_kbytes_written +
-			((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			((part_stat_read(sb->s_bdev->bd_part,
+					 sectors[STAT_WRITE]) -
 			  EXT4_SB(sb)->s_sectors_written_start) >> 1)));
 }
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4d8b1de83143..6799c3fc44e3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1304,7 +1304,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
  * and the return value is in kbytes. s is of struct f2fs_sb_info.
  */
 #define BD_PART_WRITTEN(s)						 \
-(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) -		 \
+(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) -   \
 		(s)->sectors_written_start) >> 1)
 
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3995e926ba3a..17bcff789c08 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2882,7 +2882,8 @@ try_onemore:
 	/* For write statistics */
 	if (sb->s_bdev->bd_part)
 		sbi->sectors_written_start =
-			(u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+			(u64)part_stat_read(sb->s_bdev->bd_part,
+					    sectors[STAT_WRITE]);
 
 	/* Read accumulated write IO statistics if exists */
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e13449a379a1..d2b44de56bc1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -357,6 +357,13 @@ enum req_flag_bits {
 #define REQ_NOMERGE_FLAGS \
 	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
+enum stat_group {
+	STAT_READ,
+	STAT_WRITE,
+
+	NR_STAT_GROUPS
+};
+
 #define bio_op(bio) \
 	((bio)->bi_opf & REQ_OP_MASK)
 #define req_op(req) \
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 19f36fa10995..a75445446974 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/percpu-refcount.h>
 #include <linux/uuid.h>
+#include <linux/blk_types.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -82,10 +83,10 @@ struct partition {
 } __attribute__((packed));
 
 struct disk_stats {
-	unsigned long sectors[2];	/* READs and WRITEs */
-	unsigned long ios[2];
-	unsigned long merges[2];
-	unsigned long ticks[2];
+	unsigned long sectors[NR_STAT_GROUPS];
+	unsigned long ios[NR_STAT_GROUPS];
+	unsigned long merges[NR_STAT_GROUPS];
+	unsigned long ticks[NR_STAT_GROUPS];
 	unsigned long io_ticks;
 	unsigned long time_in_queue;
 };
@@ -354,8 +355,8 @@ static inline void free_part_stats(struct hd_struct *part)
 #endif /* CONFIG_SMP */
 
 #define part_stat_read_accum(part, field)				\
-	(part_stat_read(part, field[0]) +				\
-	 part_stat_read(part, field[1]))
+	(part_stat_read(part, field[STAT_READ]) +			\
+	 part_stat_read(part, field[STAT_WRITE]))
 
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
-- 
cgit v1.2.3


From ddcf35d397976421a4ec1d0d00fbcc027a8cb034 Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:39 -0700
Subject: block: Add and use op_stat_group() for indexing disk_stat fields.

Add and use a new op_stat_group() function for indexing partition stat
fields rather than indexing them by rq_data_dir() or bio_data_dir().
This function works similarly to op_is_sync() in that it takes the
request::cmd_flags or bio::bi_opf flags and determines which stats
should et updated.

In addition, the second parameter to generic_start_io_acct() and
generic_end_io_acct() is now a REQ_OP rather than simply a read or
write bit and it uses op_stat_group() on the parameter to determine
the stat group.

Note that the partition in_flight counts are not part of the per-cpu
statistics and as such are not indexed via this function.  It's now
indexed by op_is_write().

tj: Refreshed on top of v4.17.  Updated to pass around REQ_OP.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joshua Morris <josh.h.morris@us.ibm.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Alasdair Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                   | 16 +++++++++-------
 block/blk-core.c              | 12 ++++++------
 drivers/block/drbd/drbd_req.c |  4 ++--
 drivers/block/rsxx/dev.c      |  6 +++---
 drivers/block/zram/zram_drv.c |  5 ++---
 drivers/lightnvm/pblk-cache.c |  5 +++--
 drivers/lightnvm/pblk-read.c  |  5 +++--
 drivers/md/bcache/request.c   | 13 +++++--------
 drivers/md/dm.c               |  6 ++++--
 drivers/md/md.c               |  5 +++--
 drivers/nvdimm/nd.h           |  7 +++----
 include/linux/bio.h           |  4 ++--
 include/linux/blk_types.h     |  5 +++++
 13 files changed, 50 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index f3536bfc8298..8ecc95615941 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1728,29 +1728,31 @@ void bio_check_pages_dirty(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
 
-void generic_start_io_acct(struct request_queue *q, int rw,
+void generic_start_io_acct(struct request_queue *q, int op,
 			   unsigned long sectors, struct hd_struct *part)
 {
+	const int sgrp = op_stat_group(op);
 	int cpu = part_stat_lock();
 
 	part_round_stats(q, cpu, part);
-	part_stat_inc(cpu, part, ios[rw]);
-	part_stat_add(cpu, part, sectors[rw], sectors);
-	part_inc_in_flight(q, part, rw);
+	part_stat_inc(cpu, part, ios[sgrp]);
+	part_stat_add(cpu, part, sectors[sgrp], sectors);
+	part_inc_in_flight(q, part, op_is_write(op));
 
 	part_stat_unlock();
 }
 EXPORT_SYMBOL(generic_start_io_acct);
 
-void generic_end_io_acct(struct request_queue *q, int rw,
+void generic_end_io_acct(struct request_queue *q, int req_op,
 			 struct hd_struct *part, unsigned long start_time)
 {
 	unsigned long duration = jiffies - start_time;
+	const int sgrp = op_stat_group(req_op);
 	int cpu = part_stat_lock();
 
-	part_stat_add(cpu, part, ticks[rw], duration);
+	part_stat_add(cpu, part, ticks[sgrp], duration);
 	part_round_stats(q, cpu, part);
-	part_dec_in_flight(q, part, rw);
+	part_dec_in_flight(q, part, op_is_write(req_op));
 
 	part_stat_unlock();
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index c4b57d8806fe..03a4ea93a5f3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2702,13 +2702,13 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
-		const int rw = rq_data_dir(req);
+		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 		int cpu;
 
 		cpu = part_stat_lock();
 		part = req->part;
-		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
+		part_stat_add(cpu, part, sectors[sgrp], bytes >> 9);
 		part_stat_unlock();
 	}
 }
@@ -2722,7 +2722,7 @@ void blk_account_io_done(struct request *req, u64 now)
 	 */
 	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
 		unsigned long duration;
-		const int rw = rq_data_dir(req);
+		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 		int cpu;
 
@@ -2730,10 +2730,10 @@ void blk_account_io_done(struct request *req, u64 now)
 		cpu = part_stat_lock();
 		part = req->part;
 
-		part_stat_inc(cpu, part, ios[rw]);
-		part_stat_add(cpu, part, ticks[rw], duration);
+		part_stat_inc(cpu, part, ios[sgrp]);
+		part_stat_add(cpu, part, ticks[sgrp], duration);
 		part_round_stats(req->q, cpu, part);
-		part_dec_in_flight(req->q, part, rw);
+		part_dec_in_flight(req->q, part, rq_data_dir(req));
 
 		hd_struct_put(part);
 		part_stat_unlock();
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index d146fedc38bb..19cac36e9737 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -38,7 +38,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request
 {
 	struct request_queue *q = device->rq_queue;
 
-	generic_start_io_acct(q, bio_data_dir(req->master_bio),
+	generic_start_io_acct(q, bio_op(req->master_bio),
 				req->i.size >> 9, &device->vdisk->part0);
 }
 
@@ -47,7 +47,7 @@ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *r
 {
 	struct request_queue *q = device->rq_queue;
 
-	generic_end_io_acct(q, bio_data_dir(req->master_bio),
+	generic_end_io_acct(q, bio_op(req->master_bio),
 			    &device->vdisk->part0, req->start_jif);
 }
 
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index dddb3f2490b6..1a92f9e65937 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -112,7 +112,7 @@ static const struct block_device_operations rsxx_fops = {
 
 static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
 {
-	generic_start_io_acct(card->queue, bio_data_dir(bio), bio_sectors(bio),
+	generic_start_io_acct(card->queue, bio_op(bio), bio_sectors(bio),
 			     &card->gendisk->part0);
 }
 
@@ -120,8 +120,8 @@ static void disk_stats_complete(struct rsxx_cardinfo *card,
 				struct bio *bio,
 				unsigned long start_time)
 {
-	generic_end_io_acct(card->queue, bio_data_dir(bio),
-				&card->gendisk->part0, start_time);
+	generic_end_io_acct(card->queue, bio_op(bio),
+			    &card->gendisk->part0, start_time);
 }
 
 static void bio_dma_done_cb(struct rsxx_cardinfo *card,
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 78c29044684a..2907a8156aaf 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1277,11 +1277,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 			int offset, unsigned int op, struct bio *bio)
 {
 	unsigned long start_time = jiffies;
-	int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ;
 	struct request_queue *q = zram->disk->queue;
 	int ret;
 
-	generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
+	generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
 			&zram->disk->part0);
 
 	if (!op_is_write(op)) {
@@ -1293,7 +1292,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 		ret = zram_bvec_write(zram, bvec, index, offset, bio);
 	}
 
-	generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
+	generic_end_io_acct(q, op, &zram->disk->part0, start_time);
 
 	zram_slot_lock(zram, index);
 	zram_accessed(zram, index);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 77d811962818..f565a56b898a 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -27,7 +27,8 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
 	int nr_entries = pblk_get_secs(bio);
 	int i, ret;
 
-	generic_start_io_acct(q, WRITE, bio_sectors(bio), &pblk->disk->part0);
+	generic_start_io_acct(q, REQ_OP_WRITE, bio_sectors(bio),
+			      &pblk->disk->part0);
 
 	/* Update the write buffer head (mem) with the entries that we can
 	 * write. The write in itself cannot fail, so there is no need to
@@ -75,7 +76,7 @@ retry:
 	pblk_rl_inserted(&pblk->rl, nr_entries);
 
 out:
-	generic_end_io_acct(q, WRITE, &pblk->disk->part0, start_time);
+	generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
 	pblk_write_should_kick(pblk);
 	return ret;
 }
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 26d414ae25b6..5a46d7f9302f 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -199,7 +199,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
 	struct bio *int_bio = rqd->bio;
 	unsigned long start_time = r_ctx->start_time;
 
-	generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time);
+	generic_end_io_acct(dev->q, REQ_OP_READ, &pblk->disk->part0, start_time);
 
 	if (rqd->error)
 		pblk_log_read_err(pblk, rqd);
@@ -461,7 +461,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 		return NVM_IO_ERR;
 	}
 
-	generic_start_io_acct(q, READ, bio_sectors(bio), &pblk->disk->part0);
+	generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
+			      &pblk->disk->part0);
 
 	bitmap_zero(read_bitmap, nr_secs);
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ae67f5fa8047..97707b0c54ce 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -667,8 +667,7 @@ static void backing_request_endio(struct bio *bio)
 static void bio_complete(struct search *s)
 {
 	if (s->orig_bio) {
-		generic_end_io_acct(s->d->disk->queue,
-				    bio_data_dir(s->orig_bio),
+		generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio),
 				    &s->d->disk->part0, s->start_time);
 
 		trace_bcache_request_end(s->d, s->orig_bio);
@@ -1062,8 +1061,7 @@ static void detached_dev_end_io(struct bio *bio)
 	bio->bi_end_io = ddip->bi_end_io;
 	bio->bi_private = ddip->bi_private;
 
-	generic_end_io_acct(ddip->d->disk->queue,
-			    bio_data_dir(bio),
+	generic_end_io_acct(ddip->d->disk->queue, bio_op(bio),
 			    &ddip->d->disk->part0, ddip->start_time);
 
 	if (bio->bi_status) {
@@ -1120,7 +1118,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
 	}
 
 	atomic_set(&dc->backing_idle, 0);
-	generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
+	generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
 
 	bio_set_dev(bio, dc->bdev);
 	bio->bi_iter.bi_sector += dc->sb.data_offset;
@@ -1229,7 +1227,6 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
 	struct search *s;
 	struct closure *cl;
 	struct bcache_device *d = bio->bi_disk->private_data;
-	int rw = bio_data_dir(bio);
 
 	if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
 		bio->bi_status = BLK_STS_IOERR;
@@ -1237,7 +1234,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
 		return BLK_QC_T_NONE;
 	}
 
-	generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
+	generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
 
 	s = search_alloc(bio, d);
 	cl = &s->cl;
@@ -1254,7 +1251,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
 				      flash_dev_nodata,
 				      bcache_wq);
 		return BLK_QC_T_NONE;
-	} else if (rw) {
+	} else if (bio_data_dir(bio)) {
 		bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
 					&KEY(d->id, bio->bi_iter.bi_sector, 0),
 					&KEY(d->id, bio_end_sector(bio), 0));
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b0dd7027848b..20f7e4ef5342 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -609,7 +609,8 @@ static void start_io_acct(struct dm_io *io)
 
 	io->start_time = jiffies;
 
-	generic_start_io_acct(md->queue, rw, bio_sectors(bio), &dm_disk(md)->part0);
+	generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
+			      &dm_disk(md)->part0);
 
 	atomic_set(&dm_disk(md)->part0.in_flight[rw],
 		   atomic_inc_return(&md->pending[rw]));
@@ -628,7 +629,8 @@ static void end_io_acct(struct dm_io *io)
 	int pending;
 	int rw = bio_data_dir(bio);
 
-	generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time);
+	generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
+			    io->start_time);
 
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
diff --git a/drivers/md/md.c b/drivers/md/md.c
index dabe36723d60..f6e58dbca0d4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -335,6 +335,7 @@ EXPORT_SYMBOL(md_handle_request);
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
+	const int sgrp = op_stat_group(bio_op(bio));
 	struct mddev *mddev = q->queuedata;
 	unsigned int sectors;
 	int cpu;
@@ -363,8 +364,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	md_handle_request(mddev, bio);
 
 	cpu = part_stat_lock();
-	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
-	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors);
 	part_stat_unlock();
 
 	return BLK_QC_T_NONE;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 32e0364b48b9..6ee7fd7e4bbd 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -396,16 +396,15 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 		return false;
 
 	*start = jiffies;
-	generic_start_io_acct(disk->queue, bio_data_dir(bio),
-			      bio_sectors(bio), &disk->part0);
+	generic_start_io_acct(disk->queue, bio_op(bio), bio_sectors(bio),
+			      &disk->part0);
 	return true;
 }
 static inline void nd_iostat_end(struct bio *bio, unsigned long start)
 {
 	struct gendisk *disk = bio->bi_disk;
 
-	generic_end_io_acct(disk->queue, bio_data_dir(bio), &disk->part0,
-				start);
+	generic_end_io_acct(disk->queue, bio_op(bio), &disk->part0, start);
 }
 static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
 		unsigned int len)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a00dfff51aa5..ab221c517f4e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -496,9 +496,9 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
-void generic_start_io_acct(struct request_queue *q, int rw,
+void generic_start_io_acct(struct request_queue *q, int op,
 				unsigned long sectors, struct hd_struct *part);
-void generic_end_io_acct(struct request_queue *q, int rw,
+void generic_end_io_acct(struct request_queue *q, int op,
 				struct hd_struct *part,
 				unsigned long start_time);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d2b44de56bc1..2960a96d833c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -401,6 +401,11 @@ static inline bool op_is_sync(unsigned int op)
 		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 }
 
+static inline int op_stat_group(unsigned int op)
+{
+	return op_is_write(op);
+}
+
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE		-1U
 #define BLK_QC_T_SHIFT		16
-- 
cgit v1.2.3


From bdca3c87fb7ad1cc61d231d37eb0d8f90d001e0c Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:40 -0700
Subject: block: Track DISCARD statistics and output them in stat and diskstat

Add tracking of REQ_OP_DISCARD ios to the partition statistics and
append them to the various stat files in /sys as well as
/proc/diskstats.  These are tracked with the same four stats as reads
and writes:

Number of discard ios completed.
Number of discard ios merged
Number of discard sectors completed
Milliseconds spent on discard requests

This is done via adding a new STAT_DISCARD define to genhd.h and then
using it to index that stat field for discard requests.

tj: Refreshed on top of v4.17 and other previous updates.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andy Newell <newella@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/ABI/testing/procfs-diskstats | 10 ++++++++++
 Documentation/block/stat.txt               | 28 ++++++++++++++++------------
 Documentation/iostats.txt                  | 15 +++++++++++++++
 block/genhd.c                              | 13 ++++++++++---
 block/partition-generic.c                  |  9 +++++++--
 include/linux/blk_types.h                  |  8 ++++++++
 include/linux/genhd.h                      |  3 ++-
 7 files changed, 68 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index f91a973a37fe..abac31d216de 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -5,6 +5,7 @@ Description:
 		The /proc/diskstats file displays the I/O statistics
 		of block devices. Each line contains the following 14
 		fields:
+
 		 1 - major number
 		 2 - minor mumber
 		 3 - device name
@@ -19,4 +20,13 @@ Description:
 		12 - I/Os currently in progress
 		13 - time spent doing I/Os (ms)
 		14 - weighted time spent doing I/Os (ms)
+
+		Kernel 4.18+ appends four more fields for discard
+		tracking putting the total at 18:
+
+		15 - discards completed successfully
+		16 - discards merged
+		17 - sectors discarded
+		18 - time spent discarding
+
 		For more details refer to Documentation/iostats.txt
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt
index 0dbc946de2ea..0aace9cc536c 100644
--- a/Documentation/block/stat.txt
+++ b/Documentation/block/stat.txt
@@ -31,28 +31,32 @@ write ticks     milliseconds  total wait time for write requests
 in_flight       requests      number of I/Os currently in flight
 io_ticks        milliseconds  total time this block device has been active
 time_in_queue   milliseconds  total wait time for all requests
+discard I/Os    requests      number of discard I/Os processed
+discard merges  requests      number of discard I/Os merged with in-queue I/O
+discard sectors sectors       number of sectors discarded
+discard ticks   milliseconds  total wait time for discard requests
 
-read I/Os, write I/Os
-=====================
+read I/Os, write I/Os, discard I/0s
+===================================
 
 These values increment when an I/O request completes.
 
-read merges, write merges
-=========================
+read merges, write merges, discard merges
+=========================================
 
 These values increment when an I/O request is merged with an
 already-queued I/O request.
 
-read sectors, write sectors
-===========================
+read sectors, write sectors, discard_sectors
+============================================
 
-These values count the number of sectors read from or written to this
-block device.  The "sectors" in question are the standard UNIX 512-byte
-sectors, not any device- or filesystem-specific block size.  The
-counters are incremented when the I/O completes.
+These values count the number of sectors read from, written to, or
+discarded from this block device.  The "sectors" in question are the
+standard UNIX 512-byte sectors, not any device- or filesystem-specific
+block size.  The counters are incremented when the I/O completes.
 
-read ticks, write ticks
-=======================
+read ticks, write ticks, discard ticks
+======================================
 
 These values count the number of milliseconds that I/O requests have
 waited on this block device.  If there are multiple I/O requests waiting,
diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
index 04d394a2e06c..49df45f90e8a 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/iostats.txt
@@ -31,6 +31,9 @@ Here are examples of these different formats::
       3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
       3    1   hda1 35486 38030 38030 38030
 
+   4.18+ diskstats:
+      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0
+
 On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
 a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
 
@@ -101,6 +104,18 @@ Field 11 -- weighted # of milliseconds spent doing I/Os
     last update of this field.  This can provide an easy measure of both
     I/O completion time and the backlog that may be accumulating.
 
+Field 12 -- # of discards completed
+    This is the total number of discards completed successfully.
+
+Field 13 -- # of discards merged
+    See the description of field 2
+
+Field 14 -- # of sectors discarded
+    This is the total number of sectors discarded successfully.
+
+Field 15 -- # of milliseconds spent discarding
+    This is the total number of milliseconds spent by all discards (as
+    measured from __make_request() to end_that_request_last()).
 
 To avoid introducing performance bottlenecks, no locks are held while
 modifying these counters.  This implies that minor inaccuracies may be
diff --git a/block/genhd.c b/block/genhd.c
index 0711a800d0d4..8cc719a37b32 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1333,8 +1333,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 		part_round_stats(gp->queue, cpu, hd);
 		part_stat_unlock();
 		part_in_flight(gp->queue, hd, inflight);
-		seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
-			   "%u %lu %lu %lu %u %u %u %u\n",
+		seq_printf(seqf, "%4d %7d %s "
+			   "%lu %lu %lu %u "
+			   "%lu %lu %lu %u "
+			   "%u %u %u "
+			   "%lu %lu %lu %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
 			   part_stat_read(hd, ios[STAT_READ]),
@@ -1347,7 +1350,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
 			   inflight[0],
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
-			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
+			   jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
+			   part_stat_read(hd, ios[STAT_DISCARD]),
+			   part_stat_read(hd, merges[STAT_DISCARD]),
+			   part_stat_read(hd, sectors[STAT_DISCARD]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD]))
 			);
 	}
 	disk_part_iter_exit(&piter);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 0ddb06722162..5a8975a1201c 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -130,7 +130,8 @@ ssize_t part_stat_show(struct device *dev,
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8u %8u %8u "
+		"%8lu %8lu %8llu %8u"
 		"\n",
 		part_stat_read(p, ios[STAT_READ]),
 		part_stat_read(p, merges[STAT_READ]),
@@ -142,7 +143,11 @@ ssize_t part_stat_show(struct device *dev,
 		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
 		inflight[0],
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
-		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+		part_stat_read(p, ios[STAT_DISCARD]),
+		part_stat_read(p, merges[STAT_DISCARD]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD])));
 }
 
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 2960a96d833c..f6dfb30737d8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -360,6 +360,7 @@ enum req_flag_bits {
 enum stat_group {
 	STAT_READ,
 	STAT_WRITE,
+	STAT_DISCARD,
 
 	NR_STAT_GROUPS
 };
@@ -401,8 +402,15 @@ static inline bool op_is_sync(unsigned int op)
 		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 }
 
+static inline bool op_is_discard(unsigned int op)
+{
+	return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
+}
+
 static inline int op_stat_group(unsigned int op)
 {
+	if (op_is_discard(op))
+		return STAT_DISCARD;
 	return op_is_write(op);
 }
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a75445446974..57864422a2c8 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -356,7 +356,8 @@ static inline void free_part_stats(struct hd_struct *part)
 
 #define part_stat_read_accum(part, field)				\
 	(part_stat_read(part, field[STAT_READ]) +			\
-	 part_stat_read(part, field[STAT_WRITE]))
+	 part_stat_read(part, field[STAT_WRITE]) +			\
+	 part_stat_read(part, field[STAT_DISCARD]))
 
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
-- 
cgit v1.2.3


From 636620b66d5d4012c4a9c86206013964d3986c4f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Jul 2018 04:47:41 -0700
Subject: blkcg: Track DISCARD statistics and output them in cgroup io.stat

Add tracking of REQ_OP_DISCARD ios to the per-cgroup io.stat.  Two
fields, dbytes and dios, to respectively count the total bytes and
number of discards are added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andy Newell <newella@fb.com>
Cc: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst | 10 ++++++----
 block/blk-cgroup.c                      | 13 +++++++++----
 include/linux/blk-cgroup.h              |  5 ++++-
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 569ce27b85e5..3afe10fa82bc 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1317,17 +1317,19 @@ IO Interface Files
 	Lines are keyed by $MAJ:$MIN device numbers and not ordered.
 	The following nested keys are defined.
 
-	  ======	===================
+	  ======	=====================
 	  rbytes	Bytes read
 	  wbytes	Bytes written
 	  rios		Number of read IOs
 	  wios		Number of write IOs
-	  ======	===================
+	  dbytes	Bytes discarded
+	  dios		Number of discard IOs
+	  ======	=====================
 
 	An example read output follows:
 
-	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353
-	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252
+	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0
+	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
 
   io.weight
 	A read-write flat-keyed file which exists on non-root cgroups.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7e2c19ce1a08..1942357d7165 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -567,6 +567,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 		[BLKG_RWSTAT_WRITE]	= "Write",
 		[BLKG_RWSTAT_SYNC]	= "Sync",
 		[BLKG_RWSTAT_ASYNC]	= "Async",
+		[BLKG_RWSTAT_DISCARD]	= "Discard",
 	};
 	const char *dname = blkg_dev_name(pd->blkg);
 	u64 v;
@@ -580,7 +581,8 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 			   (unsigned long long)atomic64_read(&rwstat->aux_cnt[i]));
 
 	v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) +
-		atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]);
+		atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]) +
+		atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_DISCARD]);
 	seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
 	return v;
 }
@@ -959,7 +961,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 		const char *dname;
 		char *buf;
 		struct blkg_rwstat rwstat;
-		u64 rbytes, wbytes, rios, wios;
+		u64 rbytes, wbytes, rios, wios, dbytes, dios;
 		size_t size = seq_get_buf(sf, &buf), off = 0;
 		int i;
 		bool has_stats = false;
@@ -982,19 +984,22 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 					offsetof(struct blkcg_gq, stat_bytes));
 		rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
 		wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
+		dbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]);
 
 		rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
 					offsetof(struct blkcg_gq, stat_ios));
 		rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
 		wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
+		dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]);
 
 		spin_unlock_irq(blkg->q->queue_lock);
 
 		if (rbytes || wbytes || rios || wios) {
 			has_stats = true;
 			off += scnprintf(buf+off, size-off,
-					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu",
-					 rbytes, wbytes, rios, wios);
+					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
+					 rbytes, wbytes, rios, wios,
+					 dbytes, dios);
 		}
 
 		if (!blkcg_debug_stats)
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index de57de4831d5..3bed5e02a873 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -35,6 +35,7 @@ enum blkg_rwstat_type {
 	BLKG_RWSTAT_WRITE,
 	BLKG_RWSTAT_SYNC,
 	BLKG_RWSTAT_ASYNC,
+	BLKG_RWSTAT_DISCARD,
 
 	BLKG_RWSTAT_NR,
 	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
@@ -649,7 +650,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 {
 	struct percpu_counter *cnt;
 
-	if (op_is_write(op))
+	if (op_is_discard(op))
+		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
+	else if (op_is_write(op))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
-- 
cgit v1.2.3


From bcf7eac3d97f49d8400ba52c71bee5934bf20093 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 17 Jul 2018 00:47:48 +0900
Subject: regmap: add SCCB support

This adds Serial Camera Control Bus (SCCB) support for regmap API that
is intended to be used by some of Omnivision sensor drivers.

The ov772x and ov9650 drivers are going to use this SCCB regmap API.

The ov772x driver was previously only worked with the i2c controller
drivers that support I2C_FUNC_PROTOCOL_MANGLING, because the ov772x
device doesn't support repeated starts.  After commit 0b964d183cbf
("media: ov772x: allow i2c controllers without
I2C_FUNC_PROTOCOL_MANGLING"), reading ov772x register is replaced with
issuing two separated i2c messages in order to avoid repeated start.
Using this SCCB regmap hides the implementation detail.

The ov9650 driver also issues two separated i2c messages to read the
registers as the device doesn't support repeated start.  So it can
make use of this SCCB regmap.

Cc: Mark Brown <broonie@kernel.org>
Cc: Peter Rosin <peda@axentia.se>
Cc: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Cc: Wolfram Sang <wsa@the-dreams.de>
Cc: Sylwester Nawrocki <s.nawrocki@samsung.com>
Cc: Jacopo Mondi <jacopo+renesas@jmondi.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Hans Verkuil <hans.verkuil@cisco.com>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/Kconfig       |   4 ++
 drivers/base/regmap/Makefile      |   1 +
 drivers/base/regmap/regmap-sccb.c | 128 ++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h            |  35 +++++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 drivers/base/regmap/regmap-sccb.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index aff34c0c2a3e..6ad5ef48b61e 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -45,3 +45,7 @@ config REGMAP_IRQ
 config REGMAP_SOUNDWIRE
 	tristate
 	depends on SOUNDWIRE_BUS
+
+config REGMAP_SCCB
+	tristate
+	depends on I2C
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 5ed0023fabda..f5b4e8851d00 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_REGMAP_MMIO) += regmap-mmio.o
 obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o
 obj-$(CONFIG_REGMAP_W1) += regmap-w1.o
 obj-$(CONFIG_REGMAP_SOUNDWIRE) += regmap-sdw.o
+obj-$(CONFIG_REGMAP_SCCB) += regmap-sccb.o
diff --git a/drivers/base/regmap/regmap-sccb.c b/drivers/base/regmap/regmap-sccb.c
new file mode 100644
index 000000000000..b6eb876ea819
--- /dev/null
+++ b/drivers/base/regmap/regmap-sccb.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+// Register map access API - SCCB support
+
+#include <linux/regmap.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+/**
+ * sccb_is_available - Check if the adapter supports SCCB protocol
+ * @adap: I2C adapter
+ *
+ * Return true if the I2C adapter is capable of using SCCB helper functions,
+ * false otherwise.
+ */
+static bool sccb_is_available(struct i2c_adapter *adap)
+{
+	u32 needed_funcs = I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_WRITE_BYTE_DATA;
+
+	/*
+	 * If we ever want support for hardware doing SCCB natively, we will
+	 * introduce a sccb_xfer() callback to struct i2c_algorithm and check
+	 * for it here.
+	 */
+
+	return (i2c_get_functionality(adap) & needed_funcs) == needed_funcs;
+}
+
+/**
+ * regmap_sccb_read - Read data from SCCB slave device
+ * @context: Device that will be interacted wit
+ * @reg: Register to be read from
+ * @val: Pointer to store read value
+ *
+ * This executes the 2-phase write transmission cycle that is followed by a
+ * 2-phase read transmission cycle, returning negative errno else zero on
+ * success.
+ */
+static int regmap_sccb_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	int ret;
+	union i2c_smbus_data data;
+
+	i2c_lock_bus(i2c->adapter, I2C_LOCK_SEGMENT);
+
+	ret = __i2c_smbus_xfer(i2c->adapter, i2c->addr, i2c->flags,
+			       I2C_SMBUS_WRITE, reg, I2C_SMBUS_BYTE, NULL);
+	if (ret < 0)
+		goto out;
+
+	ret = __i2c_smbus_xfer(i2c->adapter, i2c->addr, i2c->flags,
+			       I2C_SMBUS_READ, 0, I2C_SMBUS_BYTE, &data);
+	if (ret < 0)
+		goto out;
+
+	*val = data.byte;
+out:
+	i2c_unlock_bus(i2c->adapter, I2C_LOCK_SEGMENT);
+
+	return ret;
+}
+
+/**
+ * regmap_sccb_write - Write data to SCCB slave device
+ * @context: Device that will be interacted wit
+ * @reg: Register to write to
+ * @val: Value to be written
+ *
+ * This executes the SCCB 3-phase write transmission cycle, returning negative
+ * errno else zero on success.
+ */
+static int regmap_sccb_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	return i2c_smbus_write_byte_data(i2c, reg, val);
+}
+
+static struct regmap_bus regmap_sccb_bus = {
+	.reg_write = regmap_sccb_write,
+	.reg_read = regmap_sccb_read,
+};
+
+static const struct regmap_bus *regmap_get_sccb_bus(struct i2c_client *i2c,
+					const struct regmap_config *config)
+{
+	if (config->val_bits == 8 && config->reg_bits == 8 &&
+			sccb_is_available(i2c->adapter))
+		return &regmap_sccb_bus;
+
+	return ERR_PTR(-ENOTSUPP);
+}
+
+struct regmap *__regmap_init_sccb(struct i2c_client *i2c,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name)
+{
+	const struct regmap_bus *bus = regmap_get_sccb_bus(i2c, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __regmap_init(&i2c->dev, bus, &i2c->dev, config,
+			     lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__regmap_init_sccb);
+
+struct regmap *__devm_regmap_init_sccb(struct i2c_client *i2c,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name)
+{
+	const struct regmap_bus *bus = regmap_get_sccb_bus(i2c, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __devm_regmap_init(&i2c->dev, bus, &i2c->dev, config,
+				  lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_sccb);
+
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4f38068ffb71..52bf358e2c73 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -514,6 +514,10 @@ struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
 				 const char *lock_name);
+struct regmap *__regmap_init_sccb(struct i2c_client *i2c,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name);
 struct regmap *__regmap_init_slimbus(struct slim_device *slimbus,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
@@ -558,6 +562,10 @@ struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
 				      const char *lock_name);
+struct regmap *__devm_regmap_init_sccb(struct i2c_client *i2c,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name);
 struct regmap *__devm_regmap_init_spi(struct spi_device *dev,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
@@ -645,6 +653,19 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 	__regmap_lockdep_wrapper(__regmap_init_i2c, #config,		\
 				i2c, config)
 
+/**
+ * regmap_init_sccb() - Initialise register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_sccb(i2c, config)					\
+	__regmap_lockdep_wrapper(__regmap_init_sccb, #config,		\
+				i2c, config)
+
 /**
  * regmap_init_slimbus() - Initialise register map
  *
@@ -797,6 +818,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	__regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config,	\
 				i2c, config)
 
+/**
+ * devm_regmap_init_sccb() - Initialise managed register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_sccb(i2c, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_sccb, #config,	\
+				i2c, config)
+
 /**
  * devm_regmap_init_spi() - Initialise register map
  *
-- 
cgit v1.2.3


From 7c201ead1bfd19935f689fca69100c335028c047 Mon Sep 17 00:00:00 2001
From: David Lechner <david@lechnology.com>
Date: Mon, 16 Jul 2018 22:20:49 -0500
Subject: spi: spi-bitbang: change flags from u8 to u16

This changes the data type of the flags field in struct spi_bitbang from
u8 to u16. This matches the size of the mode field of struct spi_device
where these flags are also used.

This is done in preparation of adding a new SPI mode flag that will be
used with this field that would otherwise not fit in 8 bits.

Signed-off-by: David Lechner <david@lechnology.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi_bitbang.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 51d8c060e513..c1e61641a7f1 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -8,7 +8,7 @@ struct spi_bitbang {
 	struct mutex		lock;
 	u8			busy;
 	u8			use_dma;
-	u8			flags;		/* extra spi->mode support */
+	u16			flags;		/* extra spi->mode support */
 
 	struct spi_master	*master;
 
-- 
cgit v1.2.3


From 2b6e68119c474a62396b387130a984ad8240dce2 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 18 Jul 2018 16:40:46 +0000
Subject: Input: gpio_keys - add missing include to gpio_keys.h

gpio_keys.h uses 'bool' - type which is defined in linux/types.h.
Include this header.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/gpio_keys.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index 7160df54a6fe..3f84aeb81e48 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -2,6 +2,8 @@
 #ifndef _GPIO_KEYS_H
 #define _GPIO_KEYS_H
 
+#include <linux/types.h>
+
 struct device;
 
 /**
-- 
cgit v1.2.3


From a5fb9fb023a1435f2b42bccd7f547560f3a21dc3 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 18 Jul 2018 15:40:26 -0500
Subject: PCI: OF: Fix I/O space page leak

When testing the R-Car PCIe driver on the Condor board, if the PCIe PHY
driver was left disabled, the kernel crashed with this BUG:

  kernel BUG at lib/ioremap.c:72!
  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
  Modules linked in:
  CPU: 0 PID: 39 Comm: kworker/0:1 Not tainted 4.17.0-dirty #1092
  Hardware name: Renesas Condor board based on r8a77980 (DT)
  Workqueue: events deferred_probe_work_func
  pstate: 80000005 (Nzcv daif -PAN -UAO)
  pc : ioremap_page_range+0x370/0x3c8
  lr : ioremap_page_range+0x40/0x3c8
  sp : ffff000008da39e0
  x29: ffff000008da39e0 x28: 00e8000000000f07
  x27: ffff7dfffee00000 x26: 0140000000000000
  x25: ffff7dfffef00000 x24: 00000000000fe100
  x23: ffff80007b906000 x22: ffff000008ab8000
  x21: ffff000008bb1d58 x20: ffff7dfffef00000
  x19: ffff800009c30fb8 x18: 0000000000000001
  x17: 00000000000152d0 x16: 00000000014012d0
  x15: 0000000000000000 x14: 0720072007200720
  x13: 0720072007200720 x12: 0720072007200720
  x11: 0720072007300730 x10: 00000000000000ae
  x9 : 0000000000000000 x8 : ffff7dffff000000
  x7 : 0000000000000000 x6 : 0000000000000100
  x5 : 0000000000000000 x4 : 000000007b906000
  x3 : ffff80007c61a880 x2 : ffff7dfffeefffff
  x1 : 0000000040000000 x0 : 00e80000fe100f07
  Process kworker/0:1 (pid: 39, stack limit = 0x        (ptrval))
  Call trace:
   ioremap_page_range+0x370/0x3c8
   pci_remap_iospace+0x7c/0xac
   pci_parse_request_of_pci_ranges+0x13c/0x190
   rcar_pcie_probe+0x4c/0xb04
   platform_drv_probe+0x50/0xbc
   driver_probe_device+0x21c/0x308
   __device_attach_driver+0x98/0xc8
   bus_for_each_drv+0x54/0x94
   __device_attach+0xc4/0x12c
   device_initial_probe+0x10/0x18
   bus_probe_device+0x90/0x98
   deferred_probe_work_func+0xb0/0x150
   process_one_work+0x12c/0x29c
   worker_thread+0x200/0x3fc
   kthread+0x108/0x134
   ret_from_fork+0x10/0x18
  Code: f9004ba2 54000080 aa0003fb 17ffff48 (d4210000)

It turned out that pci_remap_iospace() wasn't undone when the driver's
probe failed, and since devm_phy_optional_get() returned -EPROBE_DEFER,
the probe was retried, finally causing the BUG due to trying to remap
already remapped pages.

Introduce the devm_pci_remap_iospace() managed API and replace the
pci_remap_iospace() call with it to fix the bug.

Fixes: dbf9826d5797 ("PCI: generic: Convert to DT resource parsing API")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
[lorenzo.pieralisi@arm.com: split commit/updated the commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pci/of.c    |  2 +-
 drivers/pci/pci.c   | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  2 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index d088c9147f10..69a60d6ebd73 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -612,7 +612,7 @@ int pci_parse_request_of_pci_ranges(struct device *dev,
 
 		switch (resource_type(res)) {
 		case IORESOURCE_IO:
-			err = pci_remap_iospace(res, iobase);
+			err = devm_pci_remap_iospace(dev, res, iobase);
 			if (err) {
 				dev_warn(dev, "error %d: failed to map resource %pR\n",
 					 err, res);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 97acba712e4e..316496e99da9 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3579,6 +3579,44 @@ void pci_unmap_iospace(struct resource *res)
 }
 EXPORT_SYMBOL(pci_unmap_iospace);
 
+static void devm_pci_unmap_iospace(struct device *dev, void *ptr)
+{
+	struct resource **res = ptr;
+
+	pci_unmap_iospace(*res);
+}
+
+/**
+ * devm_pci_remap_iospace - Managed pci_remap_iospace()
+ * @dev: Generic device to remap IO address for
+ * @res: Resource describing the I/O space
+ * @phys_addr: physical address of range to be mapped
+ *
+ * Managed pci_remap_iospace().  Map is automatically unmapped on driver
+ * detach.
+ */
+int devm_pci_remap_iospace(struct device *dev, const struct resource *res,
+			   phys_addr_t phys_addr)
+{
+	const struct resource **ptr;
+	int error;
+
+	ptr = devres_alloc(devm_pci_unmap_iospace, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	error = pci_remap_iospace(res, phys_addr);
+	if (error) {
+		devres_free(ptr);
+	} else	{
+		*ptr = res;
+		devres_add(dev, ptr);
+	}
+
+	return error;
+}
+EXPORT_SYMBOL(devm_pci_remap_iospace);
+
 /**
  * devm_pci_remap_cfgspace - Managed pci_remap_cfgspace()
  * @dev: Generic device to remap IO address for
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..abd5d5e17aee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1240,6 +1240,8 @@ int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr,
 unsigned long pci_address_to_pio(phys_addr_t addr);
 phys_addr_t pci_pio_to_address(unsigned long pio);
 int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
+int devm_pci_remap_iospace(struct device *dev, const struct resource *res,
+			   phys_addr_t phys_addr);
 void pci_unmap_iospace(struct resource *res);
 void __iomem *devm_pci_remap_cfgspace(struct device *dev,
 				      resource_size_t offset,
-- 
cgit v1.2.3


From a48d189ef53146a8df132a327a637c4182f50a16 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Tue, 17 Jul 2018 11:52:57 +0200
Subject: net: Move skb decrypted field, avoid explicity copy

Commit 784abe24c903 ("net: Add decrypted field to skb")
introduced a 'decrypted' field that is explicitly copied on skb
copy and clone.

Move it between headers_start[0] and headers_end[0], so that we
don't need to copy it explicitly as it's copied by the memcpy()
in __copy_skb_header().

While at it, drop the assignment in __skb_clone(), it was
already redundant.

This doesn't change the size of sk_buff or cacheline boundaries.

The 15-bits hole before tc_index becomes a 14-bits hole, and
will be again a 15-bits hole when this change is merged with
commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in
__copy_skb_header()").

v2: as reported by kbuild test robot (oops, I forgot to build
    with CONFIG_TLS_DEVICE it seems), we can't use
    CHECK_SKB_FIELD() on a bit-field member. Just drop the
    check for the moment being, perhaps we could think of some
    magic to also check bit-field members one day.

Fixes: 784abe24c903 ("net: Add decrypted field to skb")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 ++++-----
 net/core/skbuff.c      | 6 ------
 2 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3ceb8dcc54da..14bc9ebe30f2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -630,7 +630,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
- *	@decrypted: Decrypted SKB
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -641,6 +640,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
  *	@csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
  *	@dst_pending_confirm: need to confirm neighbour
+ *	@decrypted: Decrypted SKB
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
@@ -737,11 +737,7 @@ struct sk_buff {
 				peeked:1,
 				head_frag:1,
 				xmit_more:1,
-#ifdef CONFIG_TLS_DEVICE
-				decrypted:1;
-#else
 				__unused:1;
-#endif
 
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
@@ -797,6 +793,9 @@ struct sk_buff {
 	__u8			tc_redirected:1;
 	__u8			tc_from_ingress:1;
 #endif
+#ifdef CONFIG_TLS_DEVICE
+	__u8			decrypted:1;
+#endif
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cfd6c6f35f9c..c4e24ac27464 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -805,9 +805,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	 * It is not yet because we do not want to have a 16 bit hole
 	 */
 	new->queue_mapping = old->queue_mapping;
-#ifdef CONFIG_TLS_DEVICE
-	new->decrypted = old->decrypted;
-#endif
 
 	memcpy(&new->headers_start, &old->headers_start,
 	       offsetof(struct sk_buff, headers_end) -
@@ -868,9 +865,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head_frag);
 	C(data);
 	C(truesize);
-#ifdef CONFIG_TLS_DEVICE
-	C(decrypted);
-#endif
 	refcount_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
-- 
cgit v1.2.3


From eff8ea8f24eac76bc21c25e4ca4ac4ee2dade846 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:30 -0700
Subject: net/mlx5: FW tracer, add hardware structures

This change adds the infrastructure to mlx5 core fw tracer.
It introduces the following 4 new registers:
MLX5_REG_MTRC_CAP  - Used to read tracer capabilities
MLX5_REG_MTRC_CONF - Used to set tracer configurations
MLX5_REG_MTRC_STDB - Used to query tracer strings database
MLX5_REG_MTRC_CTRL - Used to control the tracer

The capability of the tracing can be checked using mcam access
register, therefore, the mcam access register interface will expose
the tracer register.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  4 +++
 include/linux/mlx5/mlx5_ifc.h | 61 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 64 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1cb1c0317b77..4a4125b4279d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -138,6 +138,10 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
+	MLX5_REG_MTRC_CAP	 = 0x9040,
+	MLX5_REG_MTRC_CONF	 = 0x9041,
+	MLX5_REG_MTRC_STDB	 = 0x9042,
+	MLX5_REG_MTRC_CTRL	 = 0x9043,
 	MLX5_REG_MPCNT		 = 0x9051,
 	MLX5_REG_MTPPS		 = 0x9053,
 	MLX5_REG_MTPPSE		 = 0x9054,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1853e7fd6924..bd7b71f54d59 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8112,7 +8112,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
 	u8         mcqi[0x1];
 	u8         reserved_at_1f[0x1];
 
-	u8         regs_95_to_64[0x20];
+	u8         regs_95_to_68[0x1c];
+	u8         tracer_registers[0x4];
+
 	u8         regs_63_to_32[0x20];
 	u8         regs_31_to_0[0x20];
 };
@@ -9187,4 +9189,61 @@ struct mlx5_ifc_create_uctx_in_bits {
 	struct mlx5_ifc_uctx_bits                     uctx;
 };
 
+struct mlx5_ifc_mtrc_string_db_param_bits {
+	u8         string_db_base_address[0x20];
+
+	u8         reserved_at_20[0x8];
+	u8         string_db_size[0x18];
+};
+
+struct mlx5_ifc_mtrc_cap_bits {
+	u8         trace_owner[0x1];
+	u8         trace_to_memory[0x1];
+	u8         reserved_at_2[0x4];
+	u8         trc_ver[0x2];
+	u8         reserved_at_8[0x14];
+	u8         num_string_db[0x4];
+
+	u8         first_string_trace[0x8];
+	u8         num_string_trace[0x8];
+	u8         reserved_at_30[0x28];
+
+	u8         log_max_trace_buffer_size[0x8];
+
+	u8         reserved_at_60[0x20];
+
+	struct mlx5_ifc_mtrc_string_db_param_bits string_db_param[8];
+
+	u8         reserved_at_280[0x180];
+};
+
+struct mlx5_ifc_mtrc_conf_bits {
+	u8         reserved_at_0[0x1c];
+	u8         trace_mode[0x4];
+	u8         reserved_at_20[0x18];
+	u8         log_trace_buffer_size[0x8];
+	u8         trace_mkey[0x20];
+	u8         reserved_at_60[0x3a0];
+};
+
+struct mlx5_ifc_mtrc_stdb_bits {
+	u8         string_db_index[0x4];
+	u8         reserved_at_4[0x4];
+	u8         read_size[0x18];
+	u8         start_offset[0x20];
+	u8         string_db_data[0];
+};
+
+struct mlx5_ifc_mtrc_ctrl_bits {
+	u8         trace_status[0x2];
+	u8         reserved_at_2[0x2];
+	u8         arm_event[0x1];
+	u8         reserved_at_5[0xb];
+	u8         modify_field_select[0x10];
+	u8         reserved_at_20[0x2b];
+	u8         current_timestamp52_32[0x15];
+	u8         current_timestamp31_0[0x20];
+	u8         reserved_at_80[0x180];
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From 5e022dd353b74132bf216a77b169c43e39f5be9e Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:31 -0700
Subject: net/mlx5: Expose MPEGC (Management PCIe General Configuration)
 structures

This patch exposes PRM layout for handling MPEGC (Management PCIe
General Configuration).

This will be used in the downstream patch for configuring MPEGC via the
driver.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 23 +++++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4a4125b4279d..957199c20a0f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -145,6 +145,7 @@ enum {
 	MLX5_REG_MPCNT		 = 0x9051,
 	MLX5_REG_MTPPS		 = 0x9053,
 	MLX5_REG_MTPPSE		 = 0x9054,
+	MLX5_REG_MPEGC		 = 0x9056,
 	MLX5_REG_MCQI		 = 0x9061,
 	MLX5_REG_MCC		 = 0x9062,
 	MLX5_REG_MCDA		 = 0x9063,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index bd7b71f54d59..2de5feaeb74a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8049,6 +8049,19 @@ struct mlx5_ifc_peir_reg_bits {
 	u8         error_type[0x8];
 };
 
+struct mlx5_ifc_mpegc_reg_bits {
+	u8         reserved_at_0[0x30];
+	u8         field_select[0x10];
+
+	u8         tx_overflow_sense[0x1];
+	u8         mark_cqe[0x1];
+	u8         mark_cnp[0x1];
+	u8         reserved_at_43[0x1b];
+	u8         tx_lossy_overflow_oper[0x2];
+
+	u8         reserved_at_60[0x100];
+};
+
 struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         reserved_at_0[0x6d];
 	u8         rx_icrc_encapsulated_counter[0x1];
@@ -8097,7 +8110,11 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7b];
+	u8         reserved_at_0[0x74];
+	u8         mark_tx_action_cnp[0x1];
+	u8         mark_tx_action_cqe[0x1];
+	u8         dynamic_tx_overflow[0x1];
+	u8         reserved_at_77[0x4];
 	u8         pcie_outbound_stalled[0x1];
 	u8         tx_overflow_buffer_pkt[0x1];
 	u8         mtpps_enh_out_per_adj[0x1];
@@ -8112,7 +8129,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
 	u8         mcqi[0x1];
 	u8         reserved_at_1f[0x1];
 
-	u8         regs_95_to_68[0x1c];
+	u8         regs_95_to_87[0x9];
+	u8         mpegc[0x1];
+	u8         regs_85_to_68[0x12];
 	u8         tracer_registers[0x4];
 
 	u8         regs_63_to_32[0x20];
-- 
cgit v1.2.3


From 8da6fe2a18505b9bd977e573d62d33f836c6903c Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:32 -0700
Subject: net/mlx5: Add core support for double vlan push/pop steering action

As newer firmware supports double push/pop in a single FTE, we add
core bits and extend vlan action logic for it.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c   |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c             | 12 +++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c            |  4 +++-
 include/linux/mlx5/fs.h                                      |  4 +++-
 include/linux/mlx5/mlx5_ifc.h                                | 11 +++++++++--
 6 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 09f178a3fcab..0240aee9189e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -138,6 +138,8 @@ TRACE_EVENT(mlx5_fs_del_fg,
 	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
 	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,	 "VLAN_PUSH"},\
 	{MLX5_FLOW_CONTEXT_ACTION_VLAN_POP,	 "VLAN_POP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2,	 "VLAN_PUSH_2"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2,	 "VLAN_POP_2"},\
 	{MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
 
 TRACE_EVENT(mlx5_fs_set_fte,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index cecd201f0b73..8f50ce80ff66 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -70,9 +70,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
 				     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 	else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
-		flow_act.vlan.ethtype = ntohs(attr->vlan_proto);
-		flow_act.vlan.vid = attr->vlan_vid;
-		flow_act.vlan.prio = attr->vlan_prio;
+		flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto);
+		flow_act.vlan[0].vid = attr->vlan_vid;
+		flow_act.vlan[0].prio = attr->vlan_prio;
 	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 5a00deff5457..6a62b84e57f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -349,9 +349,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
 
-	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan.ethtype);
-	MLX5_SET(vlan, vlan, vid, fte->action.vlan.vid);
-	MLX5_SET(vlan, vlan, prio, fte->action.vlan.prio);
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
 
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 49a75d31185e..05e7a5112b74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1464,7 +1464,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
 			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
-			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
 		return true;
 
 	return false;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 757b4a30281e..c40f2fc68655 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -152,6 +152,8 @@ struct mlx5_fs_vlan {
         u8  prio;
 };
 
+#define MLX5_FS_VLAN_DEPTH	2
+
 struct mlx5_flow_act {
 	u32 action;
 	bool has_flow_tag;
@@ -159,7 +161,7 @@ struct mlx5_flow_act {
 	u32 encap_id;
 	u32 modify_id;
 	uintptr_t esp_id;
-	struct mlx5_fs_vlan vlan;
+	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
 	struct ib_counters *counters;
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2de5feaeb74a..ae12120ef021 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -337,7 +337,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         reserved_at_9[0x1];
 	u8         pop_vlan[0x1];
 	u8         push_vlan[0x1];
-	u8         reserved_at_c[0x14];
+	u8         reserved_at_c[0x1];
+	u8         pop_vlan_2[0x1];
+	u8         push_vlan_2[0x1];
+	u8         reserved_at_f[0x11];
 
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
@@ -2386,6 +2389,8 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
 	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
 	MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2  = 0x400,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
 };
 
 struct mlx5_ifc_vlan_bits {
@@ -2416,7 +2421,9 @@ struct mlx5_ifc_flow_context_bits {
 
 	u8         modify_header_id[0x20];
 
-	u8         reserved_at_100[0x100];
+	struct mlx5_ifc_vlan_bits push_vlan_2;
+
+	u8         reserved_at_120[0xe0];
 
 	struct mlx5_ifc_fte_match_param_bits match_value;
 
-- 
cgit v1.2.3


From e2abdcf1d2dd59d7164cf975c50e1587b96b9d04 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:36 -0700
Subject: net/mlx5: Better return types for CQE API

Reduce sizes of return types.
Use bool for binary indication.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f8671c0a43aa..0566c6a94805 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -750,7 +750,7 @@ enum {
 
 #define MLX5_MINI_CQE_ARRAY_SIZE 8
 
-static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
+static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
 {
 	return (cqe->op_own >> 2) & 0x3;
 }
@@ -770,14 +770,14 @@ static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
 	return (cqe->l4_l3_hdr_type >> 2) & 0x3;
 }
 
-static inline u8 cqe_is_tunneled(struct mlx5_cqe64 *cqe)
+static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
 {
 	return cqe->outer_l3_tunneled & 0x1;
 }
 
-static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe)
+static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe)
 {
-	return !!(cqe->l4_l3_hdr_type & 0x1);
+	return cqe->l4_l3_hdr_type & 0x1;
 }
 
 static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe)
-- 
cgit v1.2.3


From d7037ad73daa9598b8caa7d5fdf41e8ceee6ef73 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 8 Jul 2018 12:14:59 +0300
Subject: net/mlx5: Fix QP fragmented buffer allocation

Fix bad alignment of SQ buffer in fragmented QP allocation.
It should start directly after RQ buffer ends.

Take special care of the end case where the RQ buffer does not occupy
a whole page. RQ size is a power of two, so would be the case only for
small RQ sizes (RQ size < PAGE_SIZE).

Fix wrong assignments for sqb->size (mistakenly assigned RQ size),
and for npages value of RQ and SQ.

Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/wq.c    | 34 ++++++++++++++++---------
 include/linux/mlx5/driver.h                     | 18 ++++++++++---
 3 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 323ffe8bf7e4..456f30007ad6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -123,7 +123,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 	int i;
 
 	buf->size = size;
-	buf->npages = 1 << get_order(size);
+	buf->npages = DIV_ROUND_UP(size, PAGE_SIZE);
 	buf->page_shift = PAGE_SHIFT;
 	buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
 			     GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index b97bb72b4db4..86478a6b99c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -113,35 +113,45 @@ err_db_free:
 	return err;
 }
 
-static void mlx5e_qp_set_frag_buf(struct mlx5_frag_buf *buf,
-				  struct mlx5_wq_qp *qp)
+static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
+				 struct mlx5_wq_qp *qp)
 {
+	struct mlx5_frag_buf_ctrl *sq_fbc;
 	struct mlx5_frag_buf *rqb, *sqb;
 
-	rqb = &qp->rq.fbc.frag_buf;
+	rqb  = &qp->rq.fbc.frag_buf;
 	*rqb = *buf;
 	rqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
-	rqb->npages = 1 << get_order(rqb->size);
+	rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
 
-	sqb = &qp->sq.fbc.frag_buf;
-	*sqb = *buf;
-	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
-	sqb->npages = 1 << get_order(sqb->size);
+	sq_fbc = &qp->sq.fbc;
+	sqb    = &sq_fbc->frag_buf;
+	*sqb   = *buf;
+	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->sq);
+	sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
 	sqb->frags += rqb->npages; /* first part is for the rq */
+	if (sq_fbc->strides_offset)
+		sqb->frags--;
 }
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u32 sq_strides_offset;
 	int err;
 
 	mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
 		      MLX5_GET(qpc, qpc, log_rq_size),
 		      &wq->rq.fbc);
-	mlx5_fill_fbc(ilog2(MLX5_SEND_WQE_BB),
-		      MLX5_GET(qpc, qpc, log_sq_size),
-		      &wq->sq.fbc);
+
+	sq_strides_offset =
+		((wq->rq.fbc.frag_sz_m1 + 1) % PAGE_SIZE) / MLX5_SEND_WQE_BB;
+
+	mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
+			     MLX5_GET(qpc, qpc, log_sq_size),
+			     sq_strides_offset,
+			     &wq->sq.fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -156,7 +166,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	mlx5e_qp_set_frag_buf(&wq_ctrl->buf, wq);
+	mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
 
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80cbb7fdce4a..83957920653a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -358,6 +358,7 @@ struct mlx5_frag_buf_ctrl {
 	struct mlx5_frag_buf	frag_buf;
 	u32			sz_m1;
 	u32			frag_sz_m1;
+	u32			strides_offset;
 	u8			log_sz;
 	u8			log_stride;
 	u8			log_frag_strides;
@@ -983,14 +984,22 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
-static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
-				 struct mlx5_frag_buf_ctrl *fbc)
+static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
+					u32 strides_offset,
+					struct mlx5_frag_buf_ctrl *fbc)
 {
 	fbc->log_stride = log_stride;
 	fbc->log_sz     = log_sz;
 	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
 	fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
 	fbc->frag_sz_m1	= (1 << fbc->log_frag_strides) - 1;
+	fbc->strides_offset = strides_offset;
+}
+
+static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+				 struct mlx5_frag_buf_ctrl *fbc)
+{
+	mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc);
 }
 
 static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
@@ -1004,7 +1013,10 @@ static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
 static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 					  u32 ix)
 {
-	unsigned int frag = (ix >> fbc->log_frag_strides);
+	unsigned int frag;
+
+	ix  += fbc->strides_offset;
+	frag = ix >> fbc->log_frag_strides;
 
 	return fbc->frag_buf.frags[frag].buf +
 		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
-- 
cgit v1.2.3


From 8963106eabdc56911e9b65258eb5e9a6b7b3dfda Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Thu, 19 Jul 2018 10:32:12 +0200
Subject: PCI: endpoint: Add MSI-X interfaces

Add PCI_EPC_IRQ_MSIX type.

Add MSI-X callbacks signatures to the ops structure.

Add sysfs interface for set/get MSI-X capability maximum number.

Update documentation accordingly.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 .../PCI/endpoint/function/binding/pci-test.txt     |  2 +
 drivers/pci/endpoint/pci-ep-cfs.c                  | 24 +++++++++
 drivers/pci/endpoint/pci-epc-core.c                | 57 ++++++++++++++++++++++
 include/linux/pci-epc.h                            |  9 ++++
 include/linux/pci-epf.h                            |  1 +
 5 files changed, 93 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/PCI/endpoint/function/binding/pci-test.txt b/Documentation/PCI/endpoint/function/binding/pci-test.txt
index 3b68b955fb50..cd76ba47394b 100644
--- a/Documentation/PCI/endpoint/function/binding/pci-test.txt
+++ b/Documentation/PCI/endpoint/function/binding/pci-test.txt
@@ -15,3 +15,5 @@ subsys_id	 : don't care
 interrupt_pin	 : Should be 1 - INTA, 2 - INTB, 3 - INTC, 4 -INTD
 msi_interrupts	 : Should be 1 to 32 depending on the number of MSI interrupts
 		   to test
+msix_interrupts	 : Should be 1 to 2048 depending on the number of MSI-X
+		   interrupts to test
diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c
index 018ea3433cb5..d1288a0bd530 100644
--- a/drivers/pci/endpoint/pci-ep-cfs.c
+++ b/drivers/pci/endpoint/pci-ep-cfs.c
@@ -286,6 +286,28 @@ static ssize_t pci_epf_msi_interrupts_show(struct config_item *item,
 		       to_pci_epf_group(item)->epf->msi_interrupts);
 }
 
+static ssize_t pci_epf_msix_interrupts_store(struct config_item *item,
+					     const char *page, size_t len)
+{
+	u16 val;
+	int ret;
+
+	ret = kstrtou16(page, 0, &val);
+	if (ret)
+		return ret;
+
+	to_pci_epf_group(item)->epf->msix_interrupts = val;
+
+	return len;
+}
+
+static ssize_t pci_epf_msix_interrupts_show(struct config_item *item,
+					    char *page)
+{
+	return sprintf(page, "%d\n",
+		       to_pci_epf_group(item)->epf->msix_interrupts);
+}
+
 PCI_EPF_HEADER_R(vendorid)
 PCI_EPF_HEADER_W_u16(vendorid)
 
@@ -327,6 +349,7 @@ CONFIGFS_ATTR(pci_epf_, subsys_vendor_id);
 CONFIGFS_ATTR(pci_epf_, subsys_id);
 CONFIGFS_ATTR(pci_epf_, interrupt_pin);
 CONFIGFS_ATTR(pci_epf_, msi_interrupts);
+CONFIGFS_ATTR(pci_epf_, msix_interrupts);
 
 static struct configfs_attribute *pci_epf_attrs[] = {
 	&pci_epf_attr_vendorid,
@@ -340,6 +363,7 @@ static struct configfs_attribute *pci_epf_attrs[] = {
 	&pci_epf_attr_subsys_id,
 	&pci_epf_attr_interrupt_pin,
 	&pci_epf_attr_msi_interrupts,
+	&pci_epf_attr_msix_interrupts,
 	NULL,
 };
 
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index b0ee42739c3c..7d77bd0e5d4a 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -217,6 +217,63 @@ int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts)
 }
 EXPORT_SYMBOL_GPL(pci_epc_set_msi);
 
+/**
+ * pci_epc_get_msix() - get the number of MSI-X interrupt numbers allocated
+ * @epc: the EPC device to which MSI-X interrupts was requested
+ * @func_no: the endpoint function number in the EPC device
+ *
+ * Invoke to get the number of MSI-X interrupts allocated by the RC
+ */
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no)
+{
+	int interrupt;
+	unsigned long flags;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return 0;
+
+	if (!epc->ops->get_msix)
+		return 0;
+
+	spin_lock_irqsave(&epc->lock, flags);
+	interrupt = epc->ops->get_msix(epc, func_no);
+	spin_unlock_irqrestore(&epc->lock, flags);
+
+	if (interrupt < 0)
+		return 0;
+
+	return interrupt + 1;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_msix);
+
+/**
+ * pci_epc_set_msix() - set the number of MSI-X interrupt numbers required
+ * @epc: the EPC device on which MSI-X has to be configured
+ * @func_no: the endpoint function number in the EPC device
+ * @interrupts: number of MSI-X interrupts required by the EPF
+ *
+ * Invoke to set the required number of MSI-X interrupts.
+ */
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts)
+{
+	int ret;
+	unsigned long flags;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
+	    interrupts < 1 || interrupts > 2048)
+		return -EINVAL;
+
+	if (!epc->ops->set_msix)
+		return 0;
+
+	spin_lock_irqsave(&epc->lock, flags);
+	ret = epc->ops->set_msix(epc, func_no, interrupts - 1);
+	spin_unlock_irqrestore(&epc->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_set_msix);
+
 /**
  * pci_epc_unmap_addr() - unmap CPU address from PCI address
  * @epc: the EPC device on which address is allocated
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 243eaa5a66ff..89f079f582df 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -17,6 +17,7 @@ enum pci_epc_irq_type {
 	PCI_EPC_IRQ_UNKNOWN,
 	PCI_EPC_IRQ_LEGACY,
 	PCI_EPC_IRQ_MSI,
+	PCI_EPC_IRQ_MSIX,
 };
 
 /**
@@ -30,6 +31,10 @@ enum pci_epc_irq_type {
  *	     capability register
  * @get_msi: ops to get the number of MSI interrupts allocated by the RC from
  *	     the MSI capability register
+ * @set_msix: ops to set the requested number of MSI-X interrupts in the
+ *	     MSI-X capability register
+ * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC
+ *	     from the MSI-X capability register
  * @raise_irq: ops to raise a legacy or MSI interrupt
  * @start: ops to start the PCI link
  * @stop: ops to stop the PCI link
@@ -48,6 +53,8 @@ struct pci_epc_ops {
 			      phys_addr_t addr);
 	int	(*set_msi)(struct pci_epc *epc, u8 func_no, u8 interrupts);
 	int	(*get_msi)(struct pci_epc *epc, u8 func_no);
+	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts);
+	int	(*get_msix)(struct pci_epc *epc, u8 func_no);
 	int	(*raise_irq)(struct pci_epc *epc, u8 func_no,
 			     enum pci_epc_irq_type type, u8 interrupt_num);
 	int	(*start)(struct pci_epc *epc);
@@ -144,6 +151,8 @@ void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no,
 			phys_addr_t phys_addr);
 int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts);
 int pci_epc_get_msi(struct pci_epc *epc, u8 func_no);
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts);
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no);
 int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
 		      enum pci_epc_irq_type type, u8 interrupt_num);
 int pci_epc_start(struct pci_epc *epc);
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 4e7764935fa8..ec02f58758c8 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -119,6 +119,7 @@ struct pci_epf {
 	struct pci_epf_header	*header;
 	struct pci_epf_bar	bar[6];
 	u8			msi_interrupts;
+	u16			msix_interrupts;
 	u8			func_no;
 
 	struct pci_epc		*epc;
-- 
cgit v1.2.3


From d3c70a98d7d63cae02d50ebfafea04264a767401 Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Thu, 19 Jul 2018 10:32:13 +0200
Subject: PCI: Update xxx_pcie_ep_raise_irq() and pci_epc_raise_irq()
 signatures

Change {cdns, dra7xx, artpec6, dw, rockchip}_pcie_ep_raise_irq() and
pci_epc_raise_irq() signature, namely the interrupt_num variable type
from u8 to u16 to accommodate 2048 maximum MSI-X interrupts.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Alan Douglas <adouglas@cadence.com>
Acked-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Joao Pinto <jpinto@synopsys.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/controller/dwc/pci-dra7xx.c           | 2 +-
 drivers/pci/controller/dwc/pcie-artpec6.c         | 2 +-
 drivers/pci/controller/dwc/pcie-designware-ep.c   | 2 +-
 drivers/pci/controller/dwc/pcie-designware-plat.c | 2 +-
 drivers/pci/controller/dwc/pcie-designware.h      | 2 +-
 drivers/pci/controller/pcie-cadence-ep.c          | 3 ++-
 drivers/pci/controller/pcie-rockchip-ep.c         | 2 +-
 drivers/pci/endpoint/pci-epc-core.c               | 8 ++++----
 include/linux/pci-epc.h                           | 6 +++---
 9 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index 345aab56ce8b..ce9224a36f62 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -370,7 +370,7 @@ static void dra7xx_pcie_raise_msi_irq(struct dra7xx_pcie *dra7xx,
 }
 
 static int dra7xx_pcie_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
-				 enum pci_epc_irq_type type, u8 interrupt_num)
+				 enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
diff --git a/drivers/pci/controller/dwc/pcie-artpec6.c b/drivers/pci/controller/dwc/pcie-artpec6.c
index 128b182648b3..dba83abfe764 100644
--- a/drivers/pci/controller/dwc/pcie-artpec6.c
+++ b/drivers/pci/controller/dwc/pcie-artpec6.c
@@ -427,7 +427,7 @@ static void artpec6_pcie_ep_init(struct dw_pcie_ep *ep)
 }
 
 static int artpec6_pcie_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
-				  enum pci_epc_irq_type type, u8 interrupt_num)
+				  enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 04092a7aba89..69d039de2af6 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -242,7 +242,7 @@ static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 func_no, u8 encode_int)
 }
 
 static int dw_pcie_ep_raise_irq(struct pci_epc *epc, u8 func_no,
-				enum pci_epc_irq_type type, u8 interrupt_num)
+				enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 
diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
index a37dc92a03c7..cca69ac63319 100644
--- a/drivers/pci/controller/dwc/pcie-designware-plat.c
+++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
@@ -81,7 +81,7 @@ static void dw_plat_pcie_ep_init(struct dw_pcie_ep *ep)
 
 static int dw_plat_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
 				     enum pci_epc_irq_type type,
-				     u8 interrupt_num)
+				     u16 interrupt_num)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index bee4e2535a61..9d581c077329 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -191,7 +191,7 @@ enum dw_pcie_as_type {
 struct dw_pcie_ep_ops {
 	void	(*ep_init)(struct dw_pcie_ep *ep);
 	int	(*raise_irq)(struct dw_pcie_ep *ep, u8 func_no,
-			     enum pci_epc_irq_type type, u8 interrupt_num);
+			     enum pci_epc_irq_type type, u16 interrupt_num);
 };
 
 struct dw_pcie_ep {
diff --git a/drivers/pci/controller/pcie-cadence-ep.c b/drivers/pci/controller/pcie-cadence-ep.c
index e3fe4124e3af..208d11f27d5d 100644
--- a/drivers/pci/controller/pcie-cadence-ep.c
+++ b/drivers/pci/controller/pcie-cadence-ep.c
@@ -363,7 +363,8 @@ static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn,
 }
 
 static int cdns_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn,
-				  enum pci_epc_irq_type type, u8 interrupt_num)
+				  enum pci_epc_irq_type type,
+				  u16 interrupt_num)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
 
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
index 6beba8ed7b84..b8163c56a142 100644
--- a/drivers/pci/controller/pcie-rockchip-ep.c
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -472,7 +472,7 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn,
 
 static int rockchip_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn,
 				      enum pci_epc_irq_type type,
-				      u8 interrupt_num)
+				      u16 interrupt_num)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
 
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 7d77bd0e5d4a..c72e656e7e29 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -131,13 +131,13 @@ EXPORT_SYMBOL_GPL(pci_epc_start);
  * pci_epc_raise_irq() - interrupt the host system
  * @epc: the EPC device which has to interrupt the host
  * @func_no: the endpoint function number in the EPC device
- * @type: specify the type of interrupt; legacy or MSI
- * @interrupt_num: the MSI interrupt number
+ * @type: specify the type of interrupt; legacy, MSI or MSI-X
+ * @interrupt_num: the MSI or MSI-X interrupt number
  *
- * Invoke to raise an MSI or legacy interrupt
+ * Invoke to raise an legacy, MSI or MSI-X interrupt
  */
 int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
-		      enum pci_epc_irq_type type, u8 interrupt_num)
+		      enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	int ret;
 	unsigned long flags;
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 89f079f582df..bb2395b56f13 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -35,7 +35,7 @@ enum pci_epc_irq_type {
  *	     MSI-X capability register
  * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC
  *	     from the MSI-X capability register
- * @raise_irq: ops to raise a legacy or MSI interrupt
+ * @raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
  * @start: ops to start the PCI link
  * @stop: ops to stop the PCI link
  * @owner: the module owner containing the ops
@@ -56,7 +56,7 @@ struct pci_epc_ops {
 	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts);
 	int	(*get_msix)(struct pci_epc *epc, u8 func_no);
 	int	(*raise_irq)(struct pci_epc *epc, u8 func_no,
-			     enum pci_epc_irq_type type, u8 interrupt_num);
+			     enum pci_epc_irq_type type, u16 interrupt_num);
 	int	(*start)(struct pci_epc *epc);
 	void	(*stop)(struct pci_epc *epc);
 	struct module *owner;
@@ -154,7 +154,7 @@ int pci_epc_get_msi(struct pci_epc *epc, u8 func_no);
 int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts);
 int pci_epc_get_msix(struct pci_epc *epc, u8 func_no);
 int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
-		      enum pci_epc_irq_type type, u8 interrupt_num);
+		      enum pci_epc_irq_type type, u16 interrupt_num);
 int pci_epc_start(struct pci_epc *epc);
 void pci_epc_stop(struct pci_epc *epc);
 struct pci_epc *pci_epc_get(const char *epc_name);
-- 
cgit v1.2.3


From c2e00e31087e58f6c49b90b4702fc3df4fad6a83 Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Thu, 19 Jul 2018 10:32:19 +0200
Subject: pci-epf-test/pci_endpoint_test: Add MSI-X support

Add MSI-X support and update driver documentation accordingly.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 Documentation/PCI/endpoint/pci-endpoint.txt       |  4 ++--
 Documentation/PCI/endpoint/pci-test-function.txt  |  4 +++-
 Documentation/PCI/endpoint/pci-test-howto.txt     | 22 ++++++++++++++---
 Documentation/ioctl/ioctl-number.txt              |  1 +
 Documentation/misc-devices/pci-endpoint-test.txt  |  3 +++
 drivers/misc/pci_endpoint_test.c                  | 29 ++++++++++++++++-------
 drivers/pci/controller/dwc/pcie-designware-plat.c |  1 +
 drivers/pci/endpoint/functions/pci-epf-test.c     | 29 +++++++++++++++++++++--
 include/linux/pci-epc.h                           |  1 +
 include/uapi/linux/pcitest.h                      |  1 +
 10 files changed, 79 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/PCI/endpoint/pci-endpoint.txt b/Documentation/PCI/endpoint/pci-endpoint.txt
index 9b1d66829290..e86a96b66a6a 100644
--- a/Documentation/PCI/endpoint/pci-endpoint.txt
+++ b/Documentation/PCI/endpoint/pci-endpoint.txt
@@ -44,7 +44,7 @@ by the PCI controller driver.
 	 * clear_bar: ops to reset the BAR
 	 * alloc_addr_space: ops to allocate in PCI controller address space
 	 * free_addr_space: ops to free the allocated address space
-	 * raise_irq: ops to raise a legacy or MSI interrupt
+	 * raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
 	 * start: ops to start the PCI link
 	 * stop: ops to stop the PCI link
 
@@ -96,7 +96,7 @@ by the PCI endpoint function driver.
 *) pci_epc_raise_irq()
 
    The PCI endpoint function driver should use pci_epc_raise_irq() to raise
-   Legacy Interrupt or MSI Interrupt.
+   Legacy Interrupt, MSI or MSI-X Interrupt.
 
 *) pci_epc_mem_alloc_addr()
 
diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.txt
index bf4b5cf6fee6..5916f1f592bb 100644
--- a/Documentation/PCI/endpoint/pci-test-function.txt
+++ b/Documentation/PCI/endpoint/pci-test-function.txt
@@ -36,7 +36,7 @@ that the endpoint device must perform.
 Bitfield Description:
   Bit 0		: raise legacy IRQ
   Bit 1		: raise MSI IRQ
-  Bit 2		: raise MSI-X IRQ (reserved for future implementation)
+  Bit 2		: raise MSI-X IRQ
   Bit 3		: read command (read data from RC buffer)
   Bit 4		: write command (write data to RC buffer)
   Bit 5		: copy command (copy data from one RC buffer to another
@@ -75,6 +75,7 @@ for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands.
 Possible types:
  - Legacy	: 0
  - MSI		: 1
+ - MSI-X	: 2
 
 *) PCI_ENDPOINT_TEST_IRQ_NUMBER
 
@@ -83,3 +84,4 @@ This register contains the triggered ID interrupt.
 Admissible values:
  - Legacy	: 0
  - MSI		: [1 .. 32]
+ - MSI-X	: [1 .. 2048]
diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.txt
index 75f48c3bb191..65f1a137e35c 100644
--- a/Documentation/PCI/endpoint/pci-test-howto.txt
+++ b/Documentation/PCI/endpoint/pci-test-howto.txt
@@ -45,9 +45,9 @@ The PCI endpoint framework populates the directory with the following
 configurable fields.
 
 	# ls functions/pci_epf_test/func1
-	  baseclass_code	interrupt_pin	revid		subsys_vendor_id
-	  cache_line_size	msi_interrupts	subclass_code	vendorid
-	  deviceid          	progif_code	subsys_id
+	  baseclass_code	interrupt_pin	progif_code	subsys_id
+	  cache_line_size	msi_interrupts	revid		subsys_vendorid
+	  deviceid          	msix_interrupts	subclass_code	vendorid
 
 The PCI endpoint function driver populates these entries with default values
 when the device is bound to the driver. The pci-epf-test driver populates
@@ -67,6 +67,7 @@ device, the following commands can be used.
 	# echo 0x104c > functions/pci_epf_test/func1/vendorid
 	# echo 0xb500 > functions/pci_epf_test/func1/deviceid
 	# echo 16 > functions/pci_epf_test/func1/msi_interrupts
+	# echo 8 > functions/pci_epf_test/func1/msix_interrupts
 
 1.5 Binding pci-epf-test Device to EP Controller
 
@@ -153,6 +154,21 @@ following commands.
 	MSI30:          NOT OKAY
 	MSI31:          NOT OKAY
 	MSI32:          NOT OKAY
+	MSIX1:          OKAY
+	MSIX2:          OKAY
+	MSIX3:          OKAY
+	MSIX4:          OKAY
+	MSIX5:          OKAY
+	MSIX6:          OKAY
+	MSIX7:          OKAY
+	MSIX8:          OKAY
+	MSIX9:          NOT OKAY
+	MSIX10:         NOT OKAY
+	MSIX11:         NOT OKAY
+	MSIX12:         NOT OKAY
+	MSIX13:         NOT OKAY
+	[...]
+	MSIX2048:       NOT OKAY
 
 	Read Tests
 
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 480c8609dc58..65259d459fd1 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -166,6 +166,7 @@ Code  Seq#(hex)	Include File		Comments
 'P'	all	linux/soundcard.h	conflict!
 'P'	60-6F	sound/sscape_ioctl.h	conflict!
 'P'	00-0F	drivers/usb/class/usblp.c	conflict!
+'P'	01-07	drivers/misc/pci_endpoint_test.c	conflict!
 'Q'	all	linux/soundcard.h
 'R'	00-1F	linux/random.h		conflict!
 'R'	01	linux/rfkill.h		conflict!
diff --git a/Documentation/misc-devices/pci-endpoint-test.txt b/Documentation/misc-devices/pci-endpoint-test.txt
index 4ebc3594b32c..fdfa0f66d3d0 100644
--- a/Documentation/misc-devices/pci-endpoint-test.txt
+++ b/Documentation/misc-devices/pci-endpoint-test.txt
@@ -10,6 +10,7 @@ The PCI driver for the test device performs the following tests
 	*) verifying addresses programmed in BAR
 	*) raise legacy IRQ
 	*) raise MSI IRQ
+	*) raise MSI-X IRQ
 	*) read data
 	*) write data
 	*) copy data
@@ -25,6 +26,8 @@ ioctl
  PCITEST_LEGACY_IRQ: Tests legacy IRQ
  PCITEST_MSI: Tests message signalled interrupts. The MSI number
 	      to be tested should be passed as argument.
+ PCITEST_MSIX: Tests message signalled interrupts. The MSI-X number
+	      to be tested should be passed as argument.
  PCITEST_WRITE: Perform write tests. The size of the buffer should be passed
 		as argument.
  PCITEST_READ: Perform read tests. The size of the buffer should be passed
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index 349794cbe1f3..f4fef108caff 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -39,13 +39,14 @@
 
 #define IRQ_TYPE_LEGACY				0
 #define IRQ_TYPE_MSI				1
+#define IRQ_TYPE_MSIX				2
 
 #define PCI_ENDPOINT_TEST_MAGIC			0x0
 
 #define PCI_ENDPOINT_TEST_COMMAND		0x4
 #define COMMAND_RAISE_LEGACY_IRQ		BIT(0)
 #define COMMAND_RAISE_MSI_IRQ			BIT(1)
-/* BIT(2) is reserved for raising MSI-X IRQ command */
+#define COMMAND_RAISE_MSIX_IRQ			BIT(2)
 #define COMMAND_READ				BIT(3)
 #define COMMAND_WRITE				BIT(4)
 #define COMMAND_COPY				BIT(5)
@@ -84,7 +85,7 @@ MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test");
 
 static int irq_type = IRQ_TYPE_MSI;
 module_param(irq_type, int, 0444);
-MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI)");
+MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI, 2 - MSI-X)");
 
 enum pci_barno {
 	BAR_0,
@@ -202,16 +203,18 @@ static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test)
 }
 
 static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test,
-				      u8 msi_num)
+				       u16 msi_num, bool msix)
 {
 	u32 val;
 	struct pci_dev *pdev = test->pdev;
 
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE,
-				 IRQ_TYPE_MSI);
+				 msix == false ? IRQ_TYPE_MSI :
+				 IRQ_TYPE_MSIX);
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, msi_num);
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
-				 COMMAND_RAISE_MSI_IRQ);
+				 msix == false ? COMMAND_RAISE_MSI_IRQ :
+				 COMMAND_RAISE_MSIX_IRQ);
 	val = wait_for_completion_timeout(&test->irq_raised,
 					  msecs_to_jiffies(1000));
 	if (!val)
@@ -456,7 +459,8 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
 		ret = pci_endpoint_test_legacy_irq(test);
 		break;
 	case PCITEST_MSI:
-		ret = pci_endpoint_test_msi_irq(test, arg);
+	case PCITEST_MSIX:
+		ret = pci_endpoint_test_msi_irq(test, arg, cmd == PCITEST_MSIX);
 		break;
 	case PCITEST_WRITE:
 		ret = pci_endpoint_test_write(test, arg);
@@ -542,6 +546,12 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 			dev_err(dev, "Failed to get MSI interrupts\n");
 		test->num_irqs = irq;
 		break;
+	case IRQ_TYPE_MSIX:
+		irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI-X interrupts\n");
+		test->num_irqs = irq;
+		break;
 	default:
 		dev_err(dev, "Invalid IRQ type selected\n");
 	}
@@ -558,8 +568,9 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 				       pci_endpoint_test_irqhandler,
 				       IRQF_SHARED, DRV_MODULE_NAME, test);
 		if (err)
-			dev_err(dev, "failed to request IRQ %d for MSI %d\n",
-				pci_irq_vector(pdev, i), i + 1);
+			dev_err(dev, "Failed to request IRQ %d for MSI%s %d\n",
+				pci_irq_vector(pdev, i),
+				irq_type == IRQ_TYPE_MSIX ? "-X" : "", i + 1);
 	}
 
 	for (bar = BAR_0; bar <= BAR_5; bar++) {
@@ -625,6 +636,7 @@ err_iounmap:
 
 err_disable_msi:
 	pci_disable_msi(pdev);
+	pci_disable_msix(pdev);
 	pci_release_regions(pdev);
 
 err_disable_pdev:
@@ -656,6 +668,7 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev)
 	for (i = 0; i < test->num_irqs; i++)
 		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), test);
 	pci_disable_msi(pdev);
+	pci_disable_msix(pdev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 }
diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
index 3f8a3aa3a91e..c12bf794d69c 100644
--- a/drivers/pci/controller/dwc/pcie-designware-plat.c
+++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
@@ -77,6 +77,7 @@ static void dw_plat_pcie_ep_init(struct dw_pcie_ep *ep)
 		dw_pcie_ep_reset_bar(pci, bar);
 
 	epc->features |= EPC_FEATURE_NO_LINKUP_NOTIFIER;
+	epc->features |= EPC_FEATURE_MSIX_AVAILABLE;
 }
 
 static int dw_plat_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index db4b23672004..3e86fa3c7da3 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -20,10 +20,11 @@
 
 #define IRQ_TYPE_LEGACY			0
 #define IRQ_TYPE_MSI			1
+#define IRQ_TYPE_MSIX			2
 
 #define COMMAND_RAISE_LEGACY_IRQ	BIT(0)
 #define COMMAND_RAISE_MSI_IRQ		BIT(1)
-/* BIT(2) is reserved for raising MSI-X IRQ command */
+#define COMMAND_RAISE_MSIX_IRQ		BIT(2)
 #define COMMAND_READ			BIT(3)
 #define COMMAND_WRITE			BIT(4)
 #define COMMAND_COPY			BIT(5)
@@ -47,6 +48,7 @@ struct pci_epf_test {
 	struct pci_epf		*epf;
 	enum pci_barno		test_reg_bar;
 	bool			linkup_notifier;
+	bool			msix_available;
 	struct delayed_work	cmd_handler;
 };
 
@@ -266,6 +268,9 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq_type,
 	case IRQ_TYPE_MSI:
 		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI, irq);
 		break;
+	case IRQ_TYPE_MSIX:
+		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX, irq);
+		break;
 	default:
 		dev_err(dev, "Failed to raise IRQ, unknown type\n");
 		break;
@@ -292,7 +297,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 	reg->command = 0;
 	reg->status = 0;
 
-	if (reg->irq_type > IRQ_TYPE_MSI) {
+	if (reg->irq_type > IRQ_TYPE_MSIX) {
 		dev_err(dev, "Failed to detect IRQ type\n");
 		goto reset_handler;
 	}
@@ -346,6 +351,16 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 		goto reset_handler;
 	}
 
+	if (command & COMMAND_RAISE_MSIX_IRQ) {
+		count = pci_epc_get_msix(epc, epf->func_no);
+		if (reg->irq_number > count || count <= 0)
+			goto reset_handler;
+		reg->status = STATUS_IRQ_RAISED;
+		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX,
+				  reg->irq_number);
+		goto reset_handler;
+	}
+
 reset_handler:
 	queue_delayed_work(kpcitest_workqueue, &epf_test->cmd_handler,
 			   msecs_to_jiffies(1));
@@ -459,6 +474,8 @@ static int pci_epf_test_bind(struct pci_epf *epf)
 	else
 		epf_test->linkup_notifier = true;
 
+	epf_test->msix_available = epc->features & EPC_FEATURE_MSIX_AVAILABLE;
+
 	epf_test->test_reg_bar = EPC_FEATURE_GET_BAR(epc->features);
 
 	ret = pci_epc_write_header(epc, epf->func_no, header);
@@ -481,6 +498,14 @@ static int pci_epf_test_bind(struct pci_epf *epf)
 		return ret;
 	}
 
+	if (epf_test->msix_available) {
+		ret = pci_epc_set_msix(epc, epf->func_no, epf->msix_interrupts);
+		if (ret) {
+			dev_err(dev, "MSI-X configuration failed\n");
+			return ret;
+		}
+	}
+
 	if (!epf_test->linkup_notifier)
 		queue_work(kpcitest_workqueue, &epf_test->cmd_handler.work);
 
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index bb2395b56f13..37dab8116901 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -102,6 +102,7 @@ struct pci_epc {
 
 #define EPC_FEATURE_NO_LINKUP_NOTIFIER		BIT(0)
 #define EPC_FEATURE_BAR_MASK			(BIT(1) | BIT(2) | BIT(3))
+#define EPC_FEATURE_MSIX_AVAILABLE		BIT(4)
 #define EPC_FEATURE_SET_BAR(features, bar)	\
 		(features |= (EPC_FEATURE_BAR_MASK & (bar << 1)))
 #define EPC_FEATURE_GET_BAR(features)		\
diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h
index 953cf036cb26..d746fb159dcd 100644
--- a/include/uapi/linux/pcitest.h
+++ b/include/uapi/linux/pcitest.h
@@ -16,5 +16,6 @@
 #define PCITEST_WRITE		_IOW('P', 0x4, unsigned long)
 #define PCITEST_READ		_IOW('P', 0x5, unsigned long)
 #define PCITEST_COPY		_IOW('P', 0x6, unsigned long)
+#define PCITEST_MSIX		_IOW('P', 0x7, int)
 
 #endif /* __UAPI_LINUX_PCITEST_H */
-- 
cgit v1.2.3


From d70420bcd447e5400a0116b20b9b6088bb32448e Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 18 Jul 2018 22:33:02 +0200
Subject: mtd: adapt misleading comment in mtd_oob_ops structure

A comment in the kernel doc of the mtd_oob_ops structure tells that it
is not possible to write more than one page with OOB. This is actually
true for only a few MTD devices like 'onenand' but it is definitely not
a general limitation. While this would benefit to be handled elsewhere
either by the MTD layer or by the limited drivers, let's update this
comment to reflect the reality.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 include/linux/mtd/mtd.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a86c4fa93115..cd0be91bdefa 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -67,9 +67,11 @@ struct mtd_erase_region_info {
  * @datbuf:	data buffer - if NULL only oob data are read/written
  * @oobbuf:	oob data buffer
  *
- * Note, it is allowed to read more than one OOB area at one go, but not write.
- * The interface assumes that the OOB write requests program only one page's
- * OOB area.
+ * Note, some MTD drivers do not allow you to write more than one OOB area at
+ * one go. If you try to do that on such an MTD device, -EINVAL will be
+ * returned. If you want to make your implementation portable on all kind of MTD
+ * devices you should split the write request into several sub-requests when the
+ * request crosses a page boundary.
  */
 struct mtd_oob_ops {
 	unsigned int	mode;
-- 
cgit v1.2.3


From 0d6030ac041f6835974deb88a1a9c299b4adc3ad Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Wed, 18 Jul 2018 10:42:17 +0200
Subject: mtd: rawnand: Expose _notsupp() helpers for raw page accessors

Some implementations simply can't disable their ECC engine. Expose
helpers returning -ENOTSUPP so that the caller knows that raw accesses
are not supported instead of silently falling back to non-raw
accessors.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/mtd/rawnand.h      |  4 ++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 4fa5e20d9690..c4b74630f4c5 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -2966,6 +2966,23 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
 }
 EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
 
+/**
+ * nand_read_page_raw_notsupp - dummy read raw page function
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
+ * @page: page number to read
+ *
+ * Returns -ENOTSUPP unconditionally.
+ */
+int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+			       u8 *buf, int oob_required, int page)
+{
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(nand_read_page_raw_notsupp);
+
 /**
  * nand_read_page_raw - [INTERN] read raw page data without ecc
  * @mtd: mtd info structure
@@ -3960,6 +3977,22 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
 	return ret;
 }
 
+/**
+ * nand_write_page_raw_notsupp - dummy raw page write function
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @buf: data buffer
+ * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
+ *
+ * Returns -ENOTSUPP unconditionally.
+ */
+int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+				const u8 *buf, int oob_required, int page)
+{
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(nand_write_page_raw_notsupp);
 
 /**
  * nand_write_page_raw - [INTERN] raw page write function
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 11c2426fc363..f60fad29eae6 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1681,10 +1681,14 @@ int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
 /* Default read_page_raw implementation */
 int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 		       uint8_t *buf, int oob_required, int page);
+int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+			       u8 *buf, int oob_required, int page);
 
 /* Default write_page_raw implementation */
 int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 			const uint8_t *buf, int oob_required, int page);
+int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+				const u8 *buf, int oob_required, int page);
 
 /* Reset and initialize a NAND device */
 int nand_reset(struct nand_chip *chip, int chipnr);
-- 
cgit v1.2.3


From 60ed982a4e78ff938824a750dbac8a10e5b472ef Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Thu, 21 Jun 2018 16:48:26 -0700
Subject: PCI/AER: Move internal declarations to drivers/pci/pci.h

Since pci_aer_init() and pci_no_aer() are used only internally, move their
declarations to the PCI internal header file.  Also, no one cares about
return value of pci_aer_init(), so make it void.

Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h      | 8 ++++++++
 drivers/pci/pcie/aer.c | 4 ++--
 include/linux/pci.h    | 4 ----
 3 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4f723442f602..52bc5b350dfb 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -480,4 +480,12 @@ static inline int devm_of_pci_get_host_bridge_resources(struct device *dev,
 }
 #endif
 
+#ifdef CONFIG_PCIEAER
+void pci_no_aer(void);
+void pci_aer_init(struct pci_dev *dev);
+#else
+static inline void pci_no_aer(void) { }
+static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
+#endif
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index e6d5255d718c..0c6fe22eaf75 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -382,10 +382,10 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 	return 0;
 }
 
-int pci_aer_init(struct pci_dev *dev)
+void pci_aer_init(struct pci_dev *dev)
 {
 	dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-	return pci_cleanup_aer_error_status_regs(dev);
+	pci_cleanup_aer_error_status_regs(dev);
 }
 
 #define AER_AGENT_RECEIVER		0
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..b4ffea05c999 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1468,13 +1468,9 @@ static inline bool pcie_aspm_support_enabled(void) { return false; }
 #endif
 
 #ifdef CONFIG_PCIEAER
-void pci_no_aer(void);
 bool pci_aer_available(void);
-int pci_aer_init(struct pci_dev *dev);
 #else
-static inline void pci_no_aer(void) { }
 static inline bool pci_aer_available(void) { return false; }
-static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
 #endif
 
 #ifdef CONFIG_PCIE_ECRC
-- 
cgit v1.2.3


From db89ccbe52c7885644ba578c7771e57620f879b1 Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Sat, 30 Jun 2018 15:07:17 -0500
Subject: PCI/AER: Define aer_stats structure for AER capable devices

Define a structure to hold the AER statistics.  There are 2 groups of
statistics: dev_* counters that are to be collected for all AER capable
devices and rootport_* counters that are collected for all (AER capable)
rootports only.  Allocate and free this structure when device is added or
released (thus counters survive the lifetime of the device).

Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h      |  2 ++
 drivers/pci/pcie/aer.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++--
 drivers/pci/probe.c    |  1 +
 include/linux/pci.h    |  1 +
 4 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 52bc5b350dfb..1877a14e06a9 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -483,9 +483,11 @@ static inline int devm_of_pci_get_host_bridge_resources(struct device *dev,
 #ifdef CONFIG_PCIEAER
 void pci_no_aer(void);
 void pci_aer_init(struct pci_dev *dev);
+void pci_aer_exit(struct pci_dev *dev);
 #else
 static inline void pci_no_aer(void) { }
 static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
+static inline void pci_aer_exit(struct pci_dev *d) { }
 #endif
 
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 0c6fe22eaf75..fe1b9d22a331 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -32,6 +32,9 @@
 
 #define AER_ERROR_SOURCES_MAX		100
 
+#define AER_MAX_TYPEOF_COR_ERRS		16	/* as per PCI_ERR_COR_STATUS */
+#define AER_MAX_TYPEOF_UNCOR_ERRS	26	/* as per PCI_ERR_UNCOR_STATUS*/
+
 struct aer_err_source {
 	unsigned int status;
 	unsigned int id;
@@ -56,6 +59,42 @@ struct aer_rpc {
 					 */
 };
 
+/* AER stats for the device */
+struct aer_stats {
+
+	/*
+	 * Fields for all AER capable devices. They indicate the errors
+	 * "as seen by this device". Note that this may mean that if an
+	 * end point is causing problems, the AER counters may increment
+	 * at its link partner (e.g. root port) because the errors will be
+	 * "seen" by the link partner and not the the problematic end point
+	 * itself (which may report all counters as 0 as it never saw any
+	 * problems).
+	 */
+	/* Counters for different type of correctable errors */
+	u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
+	/* Counters for different type of fatal uncorrectable errors */
+	u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+	/* Counters for different type of nonfatal uncorrectable errors */
+	u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+	/* Total number of ERR_COR sent by this device */
+	u64 dev_total_cor_errs;
+	/* Total number of ERR_FATAL sent by this device */
+	u64 dev_total_fatal_errs;
+	/* Total number of ERR_NONFATAL sent by this device */
+	u64 dev_total_nonfatal_errs;
+
+	/*
+	 * Fields for Root ports & root complex event collectors only, these
+	 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
+	 * messages received by the root port / event collector, INCLUDING the
+	 * ones that are generated internally (by the rootport itself)
+	 */
+	u64 rootport_total_cor_errs;
+	u64 rootport_total_fatal_errs;
+	u64 rootport_total_nonfatal_errs;
+};
+
 #define AER_LOG_TLP_MASKS		(PCI_ERR_UNC_POISON_TLP|	\
 					PCI_ERR_UNC_ECRC|		\
 					PCI_ERR_UNC_UNSUP|		\
@@ -385,9 +424,19 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 void pci_aer_init(struct pci_dev *dev)
 {
 	dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+
+	if (dev->aer_cap)
+		dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
+
 	pci_cleanup_aer_error_status_regs(dev);
 }
 
+void pci_aer_exit(struct pci_dev *dev)
+{
+	kfree(dev->aer_stats);
+	dev->aer_stats = NULL;
+}
+
 #define AER_AGENT_RECEIVER		0
 #define AER_AGENT_REQUESTER		1
 #define AER_AGENT_COMPLETER		2
@@ -438,7 +487,7 @@ static const char *aer_error_layer[] = {
 	"Transaction Layer"
 };
 
-static const char *aer_correctable_error_string[] = {
+static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
 	"RxErr",			/* Bit Position 0	*/
 	NULL,
 	NULL,
@@ -457,7 +506,7 @@ static const char *aer_correctable_error_string[] = {
 	"HeaderOF",			/* Bit Position 15	*/
 };
 
-static const char *aer_uncorrectable_error_string[] = {
+static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
 	"Undefined",			/* Bit Position 0	*/
 	NULL,
 	NULL,
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..48edd0c9e4bc 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2064,6 +2064,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
 static void pci_release_capabilities(struct pci_dev *dev)
 {
+	pci_aer_exit(dev);
 	pci_vpd_release(dev);
 	pci_iov_release(dev);
 	pci_free_cap_save_buffers(dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b4ffea05c999..6bc0aa0fc33f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -299,6 +299,7 @@ struct pci_dev {
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
 #ifdef CONFIG_PCIEAER
 	u16		aer_cap;	/* AER capability offset */
+	struct aer_stats *aer_stats;	/* AER stats for this device */
 #endif
 	u8		pcie_cap;	/* PCIe capability offset */
 	u8		msi_cap;	/* MSI capability offset */
-- 
cgit v1.2.3


From 3eca993740b8eb40f514b90b1877a4dbcf0a6710 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Thu, 19 Jul 2018 16:55:34 -0400
Subject: timekeeping: Replace read_boot_clock64() with
 read_persistent_wall_and_boot_offset()

If architecture does not support exact boot time, it is challenging to
estimate boot time without having a reference to the current persistent
clock value. Yet, it cannot read the persistent clock time again, because
this may lead to math discrepancies with the caller of read_boot_clock64()
who have read the persistent clock at a different time.

This is why it is better to provide two values simultaneously: the
persistent clock value, and the boot time.

Replace read_boot_clock64() with:
read_persistent_wall_and_boot_offset(wall_time, boot_offset)

Where wall_time is returned by read_persistent_clock() And boot_offset is
wall_time - boot time, which defaults to 0.

Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: steven.sistare@oracle.com
Cc: daniel.m.jordan@oracle.com
Cc: linux@armlinux.org.uk
Cc: schwidefsky@de.ibm.com
Cc: heiko.carstens@de.ibm.com
Cc: john.stultz@linaro.org
Cc: sboyd@codeaurora.org
Cc: hpa@zytor.com
Cc: douly.fnst@cn.fujitsu.com
Cc: peterz@infradead.org
Cc: prarit@redhat.com
Cc: feng.tang@intel.com
Cc: pmladek@suse.com
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: linux-s390@vger.kernel.org
Cc: boris.ostrovsky@oracle.com
Cc: jgross@suse.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180719205545.16512-16-pasha.tatashin@oracle.com
---
 include/linux/timekeeping.h |  3 ++-
 kernel/time/timekeeping.c   | 59 +++++++++++++++++++++++----------------------
 2 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..686bc27acef0 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
 extern int persistent_clock_is_local;
 
 extern void read_persistent_clock64(struct timespec64 *ts);
-extern void read_boot_clock64(struct timespec64 *ts);
+void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
+					   struct timespec64 *boot_offset);
 extern int update_persistent_clock64(struct timespec64 now);
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4786df904c22..cb738f825c12 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -17,6 +17,7 @@
 #include <linux/nmi.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/clock.h>
 #include <linux/syscore_ops.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
@@ -1496,18 +1497,20 @@ void __weak read_persistent_clock64(struct timespec64 *ts64)
 }
 
 /**
- * read_boot_clock64 -  Return time of the system start.
+ * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
+ *                                        from the boot.
  *
  * Weak dummy function for arches that do not yet support it.
- * Function to read the exact time the system has been started.
- * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported.
- *
- *  XXX - Do be sure to remove it once all arches implement it.
+ * wall_time	- current time as returned by persistent clock
+ * boot_offset	- offset that is defined as wall_time - boot_time
+ *		  default to 0.
  */
-void __weak read_boot_clock64(struct timespec64 *ts)
+void __weak __init
+read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
+				     struct timespec64 *boot_offset)
 {
-	ts->tv_sec = 0;
-	ts->tv_nsec = 0;
+	read_persistent_clock64(wall_time);
+	*boot_offset = (struct timespec64){0};
 }
 
 /* Flag for if timekeeping_resume() has injected sleeptime */
@@ -1521,28 +1524,29 @@ static bool persistent_clock_exists;
  */
 void __init timekeeping_init(void)
 {
+	struct timespec64 wall_time, boot_offset, wall_to_mono;
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct clocksource *clock;
 	unsigned long flags;
-	struct timespec64 now, boot, tmp;
-
-	read_persistent_clock64(&now);
-	if (!timespec64_valid_strict(&now)) {
-		pr_warn("WARNING: Persistent clock returned invalid value!\n"
-			"         Check your CMOS/BIOS settings.\n");
-		now.tv_sec = 0;
-		now.tv_nsec = 0;
-	} else if (now.tv_sec || now.tv_nsec)
-		persistent_clock_exists = true;
 
-	read_boot_clock64(&boot);
-	if (!timespec64_valid_strict(&boot)) {
-		pr_warn("WARNING: Boot clock returned invalid value!\n"
-			"         Check your CMOS/BIOS settings.\n");
-		boot.tv_sec = 0;
-		boot.tv_nsec = 0;
+	read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
+	if (timespec64_valid_strict(&wall_time) &&
+	    timespec64_to_ns(&wall_time) > 0) {
+		persistent_clock_exists = true;
+	} else {
+		pr_warn("Persistent clock returned invalid value");
+		wall_time = (struct timespec64){0};
 	}
 
+	if (timespec64_compare(&wall_time, &boot_offset) < 0)
+		boot_offset = (struct timespec64){0};
+
+	/*
+	 * We want set wall_to_mono, so the following is true:
+	 * wall time + wall_to_mono = boot time
+	 */
+	wall_to_mono = timespec64_sub(boot_offset, wall_time);
+
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 	ntp_init();
@@ -1552,13 +1556,10 @@ void __init timekeeping_init(void)
 		clock->enable(clock);
 	tk_setup_internals(tk, clock);
 
-	tk_set_xtime(tk, &now);
+	tk_set_xtime(tk, &wall_time);
 	tk->raw_sec = 0;
-	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
-		boot = tk_xtime(tk);
 
-	set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
-	tk_set_wall_to_mono(tk, tmp);
+	tk_set_wall_to_mono(tk, wall_to_mono);
 
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 
-- 
cgit v1.2.3


From 5d2a4e91a541cb04d20d11602f0f9340291322ac Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Thu, 19 Jul 2018 16:55:41 -0400
Subject: sched/clock: Move sched clock initialization and merge with generic
 clock

sched_clock_postinit() initializes a generic clock on systems where no
other clock is provided. This function may be called only after
timekeeping_init().

Rename sched_clock_postinit to generic_clock_inti() and call it from
sched_clock_init(). Move the call for sched_clock_init() until after
time_init().

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: steven.sistare@oracle.com
Cc: daniel.m.jordan@oracle.com
Cc: linux@armlinux.org.uk
Cc: schwidefsky@de.ibm.com
Cc: heiko.carstens@de.ibm.com
Cc: john.stultz@linaro.org
Cc: sboyd@codeaurora.org
Cc: hpa@zytor.com
Cc: douly.fnst@cn.fujitsu.com
Cc: prarit@redhat.com
Cc: feng.tang@intel.com
Cc: pmladek@suse.com
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: linux-s390@vger.kernel.org
Cc: boris.ostrovsky@oracle.com
Cc: jgross@suse.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180719205545.16512-23-pasha.tatashin@oracle.com
---
 include/linux/sched_clock.h |  5 ++---
 init/main.c                 |  4 ++--
 kernel/sched/clock.c        | 27 +++++++++++++++++----------
 kernel/sched/core.c         |  1 -
 kernel/time/sched_clock.c   |  2 +-
 5 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 411b52e424e1..abe28d5cb3f4 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -9,17 +9,16 @@
 #define LINUX_SCHED_CLOCK
 
 #ifdef CONFIG_GENERIC_SCHED_CLOCK
-extern void sched_clock_postinit(void);
+extern void generic_sched_clock_init(void);
 
 extern void sched_clock_register(u64 (*read)(void), int bits,
 				 unsigned long rate);
 #else
-static inline void sched_clock_postinit(void) { }
+static inline void generic_sched_clock_init(void) { }
 
 static inline void sched_clock_register(u64 (*read)(void), int bits,
 					unsigned long rate)
 {
-	;
 }
 #endif
 
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..162d931c9511 100644
--- a/init/main.c
+++ b/init/main.c
@@ -79,7 +79,7 @@
 #include <linux/pti.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
-#include <linux/sched_clock.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/context_tracking.h>
@@ -642,7 +642,7 @@ asmlinkage __visible void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
-	sched_clock_postinit();
+	sched_clock_init();
 	printk_safe_init();
 	perf_event_init();
 	profile_init();
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 10c83e73837a..0e9dbb2d9aea 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -53,6 +53,7 @@
  *
  */
 #include "sched.h"
+#include <linux/sched_clock.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -68,11 +69,6 @@ EXPORT_SYMBOL_GPL(sched_clock);
 
 __read_mostly int sched_clock_running;
 
-void sched_clock_init(void)
-{
-	sched_clock_running = 1;
-}
-
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 /*
  * We must start with !__sched_clock_stable because the unstable -> stable
@@ -199,6 +195,15 @@ void clear_sched_clock_stable(void)
 		__clear_sched_clock_stable();
 }
 
+static void __sched_clock_gtod_offset(void)
+{
+	__gtod_offset = (sched_clock() + __sched_clock_offset) - ktime_get_ns();
+}
+
+void __init sched_clock_init(void)
+{
+	sched_clock_running = 1;
+}
 /*
  * We run this as late_initcall() such that it runs after all built-in drivers,
  * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
@@ -385,8 +390,6 @@ void sched_clock_tick(void)
 
 void sched_clock_tick_stable(void)
 {
-	u64 gtod, clock;
-
 	if (!sched_clock_stable())
 		return;
 
@@ -398,9 +401,7 @@ void sched_clock_tick_stable(void)
 	 * TSC to be unstable, any computation will be computing crap.
 	 */
 	local_irq_disable();
-	gtod = ktime_get_ns();
-	clock = sched_clock();
-	__gtod_offset = (clock + __sched_clock_offset) - gtod;
+	__sched_clock_gtod_offset();
 	local_irq_enable();
 }
 
@@ -434,6 +435,12 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
+void __init sched_clock_init(void)
+{
+	sched_clock_running = 1;
+	generic_sched_clock_init();
+}
+
 u64 sched_clock_cpu(int cpu)
 {
 	if (unlikely(!sched_clock_running))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..552406e9713b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5954,7 +5954,6 @@ void __init sched_init(void)
 	int i, j;
 	unsigned long alloc_size = 0, ptr;
 
-	sched_clock_init();
 	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 2d8f05aad442..cbc72c2c1fca 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -237,7 +237,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
-void __init sched_clock_postinit(void)
+void __init generic_sched_clock_init(void)
 {
 	/*
 	 * If no sched_clock() function has been provided at that point,
-- 
cgit v1.2.3


From 381634cad15b711e033a2638d558232b60f753f6 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:11 -0500
Subject: PCI: Hide pci_reset_bridge_secondary_bus() from drivers

Rename pci_reset_bridge_secondary_bus() to pci_bridge_secondary_bus_reset()
and move the declaration from linux/pci.h to drivers/pci.h to be used
internally in PCI directory only.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/hotplug/pciehp_hpc.c |  2 +-
 drivers/pci/pci.c                | 11 +++++------
 drivers/pci/pci.h                |  1 +
 drivers/pci/pcie/aer.c           |  2 +-
 drivers/pci/pcie/err.c           |  2 +-
 include/linux/pci.h              |  1 -
 6 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index bbaa2114f953..8dae23221344 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -746,7 +746,7 @@ int pciehp_reset_slot(struct slot *slot, int probe)
 	if (pciehp_poll_mode)
 		del_timer_sync(&ctrl->poll_timer);
 
-	rc = pci_reset_bridge_secondary_bus(ctrl->pcie->port);
+	rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port);
 
 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask);
 	pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 98d149070205..236220cb0f77 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4224,19 +4224,18 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
 }
 
 /**
- * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
+ * pci_bridge_secondary_bus_reset - Reset the secondary bus on a PCI bridge.
  * @dev: Bridge device
  *
  * Use the bridge control register to assert reset on the secondary bus.
  * Devices on the secondary bus are left in power-on state.
  */
-int pci_reset_bridge_secondary_bus(struct pci_dev *dev)
+int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
 {
 	pcibios_reset_secondary_bus(dev);
 
 	return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
 }
-EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus);
 
 static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
 {
@@ -4253,7 +4252,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
 	if (probe)
 		return 0;
 
-	return pci_reset_bridge_secondary_bus(dev->bus->self);
+	return pci_bridge_secondary_bus_reset(dev->bus->self);
 }
 
 static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe)
@@ -4860,7 +4859,7 @@ static int pci_bus_reset(struct pci_bus *bus, int probe)
 
 	might_sleep();
 
-	ret = pci_reset_bridge_secondary_bus(bus->self);
+	ret = pci_bridge_secondary_bus_reset(bus->self);
 
 	pci_bus_unlock(bus);
 
@@ -4924,7 +4923,7 @@ int pci_try_reset_bus(struct pci_bus *bus)
 
 	if (pci_bus_trylock(bus)) {
 		might_sleep();
-		rc = pci_reset_bridge_secondary_bus(bus->self);
+		rc = pci_bridge_secondary_bus_reset(bus->self);
 		pci_bus_unlock(bus);
 	} else
 		rc = -EAGAIN;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c358e7a07f3f..f784263ad587 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -33,6 +33,7 @@ int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai,
 		  enum pci_mmap_api mmap_api);
 
 int pci_probe_reset_function(struct pci_dev *dev);
+int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index bde723db3d65..8c12efca9259 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1314,7 +1314,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
 	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
 
-	rc = pci_reset_bridge_secondary_bus(dev);
+	rc = pci_bridge_secondary_bus_reset(dev);
 	pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n");
 
 	/* Clear Root Error Status */
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 03075cff86f4..ae72f88d3ca2 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -177,7 +177,7 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev)
 {
 	int rc;
 
-	rc = pci_reset_bridge_secondary_bus(dev);
+	rc = pci_bridge_secondary_bus_reset(dev);
 	pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6ba818449095..03520ff23d73 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1103,7 +1103,6 @@ int pci_reset_bus(struct pci_bus *bus);
 int pci_try_reset_bus(struct pci_bus *bus);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
-int pci_reset_bridge_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
-- 
cgit v1.2.3


From 811c5cb37df46b0cd714dbd053d19cdb97d08cff Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:12 -0500
Subject: PCI: Unify try slot and bus reset API

Drivers are expected to call pci_try_reset_slot() or pci_try_reset_bus() by
querying if a system supports hotplug or not.  A survey showed that most
drivers don't do this and we are leaking hotplug capability to the user.

Hide pci_try_slot_reset() from drivers and embed into pci_try_bus_reset().
Change pci_try_reset_bus() parameter from struct pci_bus to struct pci_dev.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/infiniband/hw/hfi1/pcie.c |  2 +-
 drivers/pci/pci.c                 | 21 ++++++++++++++++-----
 drivers/vfio/pci/vfio_pci.c       |  6 ++----
 include/linux/pci.h               |  3 +--
 4 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 4570c4dc93d9..df4f2d390be8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -905,7 +905,7 @@ static int trigger_sbr(struct hfi1_devdata *dd)
 	 * delay after a reset is required.  Per spec requirements,
 	 * the link is either working or not after that point.
 	 */
-	return pci_try_reset_bus(dev->bus);
+	return pci_try_reset_bus(dev);
 }
 
 /*
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 236220cb0f77..a31e6dbf21c3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4817,12 +4817,12 @@ int pci_reset_slot(struct pci_slot *slot)
 EXPORT_SYMBOL_GPL(pci_reset_slot);
 
 /**
- * pci_try_reset_slot - Try to reset a PCI slot
+ * __pci_try_reset_slot - Try to reset a PCI slot
  * @slot: PCI slot to reset
  *
  * Same as above except return -EAGAIN if the slot cannot be locked
  */
-int pci_try_reset_slot(struct pci_slot *slot)
+static int __pci_try_reset_slot(struct pci_slot *slot)
 {
 	int rc;
 
@@ -4843,7 +4843,6 @@ int pci_try_reset_slot(struct pci_slot *slot)
 
 	return rc;
 }
-EXPORT_SYMBOL_GPL(pci_try_reset_slot);
 
 static int pci_bus_reset(struct pci_bus *bus, int probe)
 {
@@ -4906,12 +4905,12 @@ int pci_reset_bus(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(pci_reset_bus);
 
 /**
- * pci_try_reset_bus - Try to reset a PCI bus
+ * __pci_try_reset_bus - Try to reset a PCI bus
  * @bus: top level PCI bus to reset
  *
  * Same as above except return -EAGAIN if the bus cannot be locked
  */
-int pci_try_reset_bus(struct pci_bus *bus)
+static int __pci_try_reset_bus(struct pci_bus *bus)
 {
 	int rc;
 
@@ -4932,6 +4931,18 @@ int pci_try_reset_bus(struct pci_bus *bus)
 
 	return rc;
 }
+
+/**
+ * pci_try_reset_bus - Try to reset a PCI bus
+ * @pdev: top level PCI device to reset via slot/bus
+ *
+ * Same as above except return -EAGAIN if the bus cannot be locked
+ */
+int pci_try_reset_bus(struct pci_dev *pdev)
+{
+	return pci_probe_reset_slot(pdev->slot) ?
+	    __pci_try_reset_slot(pdev->slot) : __pci_try_reset_bus(pdev->bus);
+}
 EXPORT_SYMBOL_GPL(pci_try_reset_bus);
 
 /**
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b423a309a6e0..71018ec9065b 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1010,8 +1010,7 @@ reset_info_exit:
 						    &info, slot);
 		if (!ret)
 			/* User has access, do the reset */
-			ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
-				     pci_try_reset_bus(vdev->pdev->bus);
+			ret = pci_try_reset_bus(vdev->pdev);
 
 hot_reset_release:
 		for (i--; i >= 0; i--)
@@ -1373,8 +1372,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 	}
 
 	if (needs_reset)
-		ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
-			     pci_try_reset_bus(vdev->pdev->bus);
+		ret = pci_try_reset_bus(vdev->pdev);
 
 put_devs:
 	for (i = 0; i < devs.cur_index; i++) {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 03520ff23d73..19fb82559145 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1097,10 +1097,9 @@ int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_reset_slot(struct pci_slot *slot);
-int pci_try_reset_slot(struct pci_slot *slot);
 int pci_probe_reset_bus(struct pci_bus *bus);
 int pci_reset_bus(struct pci_bus *bus);
-int pci_try_reset_bus(struct pci_bus *bus);
+int pci_try_reset_bus(struct pci_dev *dev);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
-- 
cgit v1.2.3


From fe32e2fa656c29d5d25f959f8e6168ac405d9ab4 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:14 -0500
Subject: PCI: Deprecate pci_reset_bus() and pci_reset_slot() functions

pci_reset_bus() and pci_reset_slot() functions are not being used by any
code.  Remove them from the kernel in favor of pci_try_reset_bus() and
pci_try_reset_slot() functions.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c   | 53 +----------------------------------------------------
 include/linux/pci.h |  2 --
 2 files changed, 1 insertion(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index a31e6dbf21c3..8ee9f386c1ee 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4784,7 +4784,7 @@ int pci_probe_reset_slot(struct pci_slot *slot)
 EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
 
 /**
- * pci_reset_slot - reset a PCI slot
+ * __pci_try_reset_slot - Try to reset a PCI slot
  * @slot: PCI slot to reset
  *
  * A PCI bus may host multiple slots, each slot may support a reset mechanism
@@ -4796,30 +4796,6 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
  * through this function.  PCI config space of all devices in the slot and
  * behind the slot is saved before and restored after reset.
  *
- * Return 0 on success, non-zero on error.
- */
-int pci_reset_slot(struct pci_slot *slot)
-{
-	int rc;
-
-	rc = pci_slot_reset(slot, 1);
-	if (rc)
-		return rc;
-
-	pci_slot_save_and_disable(slot);
-
-	rc = pci_slot_reset(slot, 0);
-
-	pci_slot_restore(slot);
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(pci_reset_slot);
-
-/**
- * __pci_try_reset_slot - Try to reset a PCI slot
- * @slot: PCI slot to reset
- *
  * Same as above except return -EAGAIN if the slot cannot be locked
  */
 static int __pci_try_reset_slot(struct pci_slot *slot)
@@ -4877,33 +4853,6 @@ int pci_probe_reset_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
 
-/**
- * pci_reset_bus - reset a PCI bus
- * @bus: top level PCI bus to reset
- *
- * Do a bus reset on the given bus and any subordinate buses, saving
- * and restoring state of all devices.
- *
- * Return 0 on success, non-zero on error.
- */
-int pci_reset_bus(struct pci_bus *bus)
-{
-	int rc;
-
-	rc = pci_bus_reset(bus, 1);
-	if (rc)
-		return rc;
-
-	pci_bus_save_and_disable(bus);
-
-	rc = pci_bus_reset(bus, 0);
-
-	pci_bus_restore(bus);
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(pci_reset_bus);
-
 /**
  * __pci_try_reset_bus - Try to reset a PCI bus
  * @bus: top level PCI bus to reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 19fb82559145..cf53ecb50789 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1096,9 +1096,7 @@ int pci_reset_function(struct pci_dev *dev);
 int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
-int pci_reset_slot(struct pci_slot *slot);
 int pci_probe_reset_bus(struct pci_bus *bus);
-int pci_reset_bus(struct pci_bus *bus);
 int pci_try_reset_bus(struct pci_dev *dev);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
-- 
cgit v1.2.3


From c6a44ba950d147e15fe6dab6455a52f91d8fe625 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:15 -0500
Subject: PCI: Rename pci_try_reset_bus() to pci_reset_bus()

Now that the old implementation of pci_reset_bus() is gone, replace
pci_try_reset_bus() with pci_reset_bus().

Compared to the old implementation, new code will fail immmediately with
-EAGAIN if object lock cannot be obtained.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/infiniband/hw/hfi1/pcie.c |  2 +-
 drivers/pci/pci.c                 | 16 ++++++++--------
 drivers/vfio/pci/vfio_pci.c       |  4 ++--
 include/linux/pci.h               |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index df4f2d390be8..baf7c324f7b8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -905,7 +905,7 @@ static int trigger_sbr(struct hfi1_devdata *dd)
 	 * delay after a reset is required.  Per spec requirements,
 	 * the link is either working or not after that point.
 	 */
-	return pci_try_reset_bus(dev);
+	return pci_reset_bus(dev);
 }
 
 /*
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8ee9f386c1ee..d123c2b173da 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4784,7 +4784,7 @@ int pci_probe_reset_slot(struct pci_slot *slot)
 EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
 
 /**
- * __pci_try_reset_slot - Try to reset a PCI slot
+ * __pci_reset_slot - Try to reset a PCI slot
  * @slot: PCI slot to reset
  *
  * A PCI bus may host multiple slots, each slot may support a reset mechanism
@@ -4798,7 +4798,7 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
  *
  * Same as above except return -EAGAIN if the slot cannot be locked
  */
-static int __pci_try_reset_slot(struct pci_slot *slot)
+static int __pci_reset_slot(struct pci_slot *slot)
 {
 	int rc;
 
@@ -4854,12 +4854,12 @@ int pci_probe_reset_bus(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
 
 /**
- * __pci_try_reset_bus - Try to reset a PCI bus
+ * __pci_reset_bus - Try to reset a PCI bus
  * @bus: top level PCI bus to reset
  *
  * Same as above except return -EAGAIN if the bus cannot be locked
  */
-static int __pci_try_reset_bus(struct pci_bus *bus)
+static int __pci_reset_bus(struct pci_bus *bus)
 {
 	int rc;
 
@@ -4882,17 +4882,17 @@ static int __pci_try_reset_bus(struct pci_bus *bus)
 }
 
 /**
- * pci_try_reset_bus - Try to reset a PCI bus
+ * pci_reset_bus - Try to reset a PCI bus
  * @pdev: top level PCI device to reset via slot/bus
  *
  * Same as above except return -EAGAIN if the bus cannot be locked
  */
-int pci_try_reset_bus(struct pci_dev *pdev)
+int pci_reset_bus(struct pci_dev *pdev)
 {
 	return pci_probe_reset_slot(pdev->slot) ?
-	    __pci_try_reset_slot(pdev->slot) : __pci_try_reset_bus(pdev->bus);
+	    __pci_reset_slot(pdev->slot) : __pci_reset_bus(pdev->bus);
 }
-EXPORT_SYMBOL_GPL(pci_try_reset_bus);
+EXPORT_SYMBOL_GPL(pci_reset_bus);
 
 /**
  * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 71018ec9065b..345c0dc8a6dc 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1010,7 +1010,7 @@ reset_info_exit:
 						    &info, slot);
 		if (!ret)
 			/* User has access, do the reset */
-			ret = pci_try_reset_bus(vdev->pdev);
+			ret = pci_reset_bus(vdev->pdev);
 
 hot_reset_release:
 		for (i--; i >= 0; i--)
@@ -1372,7 +1372,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 	}
 
 	if (needs_reset)
-		ret = pci_try_reset_bus(vdev->pdev);
+		ret = pci_reset_bus(vdev->pdev);
 
 put_devs:
 	for (i = 0; i < devs.cur_index; i++) {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cf53ecb50789..307b336496f6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1097,7 +1097,7 @@ int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_probe_reset_bus(struct pci_bus *bus);
-int pci_try_reset_bus(struct pci_dev *dev);
+int pci_reset_bus(struct pci_dev *dev);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
-- 
cgit v1.2.3


From b976690f5db26fbc7c2be413bfa0fbd270547a94 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 18 Jul 2018 11:41:06 +0200
Subject: x86/mm/pti: Introduce pti_finalize()

Introduce a new function to finalize the kernel mappings for the userspace
page-table after all ro/nx protections have been applied to the kernel
mappings.

Also move the call to pti_clone_kernel_text() to that function so that it
will run on 32 bit kernels too.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Pavel Machek <pavel@ucw.cz>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: linux-mm@kvack.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Waiman Long <llong@redhat.com>
Cc: "David H . Gutteridge" <dhgutteridge@sympatico.ca>
Cc: joro@8bytes.org
Link: https://lkml.kernel.org/r/1531906876-13451-30-git-send-email-joro@8bytes.org
---
 arch/x86/include/asm/pti.h |  3 +--
 arch/x86/mm/init_64.c      |  6 ------
 arch/x86/mm/pti.c          | 14 +++++++++++++-
 include/linux/pti.h        |  1 +
 init/main.c                |  7 +++++++
 5 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 38a17f1d5c9d..5df09a0b80b8 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -6,10 +6,9 @@
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 extern void pti_init(void);
 extern void pti_check_boottime_disable(void);
-extern void pti_clone_kernel_text(void);
+extern void pti_finalize(void);
 #else
 static inline void pti_check_boottime_disable(void) { }
-static inline void pti_clone_kernel_text(void) { }
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a688617c727e..9b19f9a8948e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1291,12 +1291,6 @@ void mark_rodata_ro(void)
 			(unsigned long) __va(__pa_symbol(_sdata)));
 
 	debug_checkwx();
-
-	/*
-	 * Do this after all of the manipulation of the
-	 * kernel text page tables are complete.
-	 */
-	pti_clone_kernel_text();
 }
 
 int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index e41ee93c430d..fcfb815d420f 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -462,7 +462,7 @@ static inline bool pti_kernel_image_global_ok(void)
  * For some configurations, map all of kernel text into the user page
  * tables.  This reduces TLB misses, especially on non-PCID systems.
  */
-void pti_clone_kernel_text(void)
+static void pti_clone_kernel_text(void)
 {
 	/*
 	 * rodata is part of the kernel image and is normally
@@ -526,3 +526,15 @@ void __init pti_init(void)
 	pti_setup_espfix64();
 	pti_setup_vsyscall();
 }
+
+/*
+ * Finalize the kernel mappings in the userspace page-table.
+ */
+void pti_finalize(void)
+{
+	/*
+	 * Do this after all of the manipulation of the
+	 * kernel text page tables are complete.
+	 */
+	pti_clone_kernel_text();
+}
diff --git a/include/linux/pti.h b/include/linux/pti.h
index 0174883a935a..1a941efcaa62 100644
--- a/include/linux/pti.h
+++ b/include/linux/pti.h
@@ -6,6 +6,7 @@
 #include <asm/pti.h>
 #else
 static inline void pti_init(void) { }
+static inline void pti_finalize(void) { }
 #endif
 
 #endif
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..fcfef46c935a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1065,6 +1065,13 @@ static int __ref kernel_init(void *unused)
 	jump_label_invalidate_initmem();
 	free_initmem();
 	mark_readonly();
+
+	/*
+	 * Kernel mappings are now finalized - update the userspace page-table
+	 * to finalize PTI.
+	 */
+	pti_finalize();
+
 	system_state = SYSTEM_RUNNING;
 	numa_default_policy();
 
-- 
cgit v1.2.3


From 985e695074d35768cb04d65f58bca45f7bf1a99d Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Fri, 13 Jul 2018 14:06:42 +0200
Subject: timekeeping/ntp: Constify some function arguments

Add 'const' to some function arguments and variables to make it easier
to read the code.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
[jstultz: Also fixup pre-existing checkpatch warnings for
 prototype arguments with no variable name]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h        |  2 +-
 kernel/time/ntp.c                  |  6 +++---
 kernel/time/ntp_internal.h         |  4 ++--
 kernel/time/timekeeping.c          | 29 +++++++++++++++--------------
 kernel/time/timekeeping_debug.c    |  2 +-
 kernel/time/timekeeping_internal.h |  2 +-
 6 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..edace6b656e9 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -177,7 +177,7 @@ static inline time64_t ktime_get_clocktai_seconds(void)
 extern bool timekeeping_rtc_skipsuspend(void);
 extern bool timekeeping_rtc_skipresume(void);
 
-extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
+extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
 
 /*
  * struct system_time_snapshot - simultaneous raw/real time capture with
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index a627cae8baab..c5e0cba3b39c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -642,7 +642,7 @@ void ntp_notify_cmos_timer(void)
 /*
  * Propagate a new txc->status value into the NTP state:
  */
-static inline void process_adj_status(struct timex *txc)
+static inline void process_adj_status(const struct timex *txc)
 {
 	if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
 		time_state = TIME_OK;
@@ -665,7 +665,7 @@ static inline void process_adj_status(struct timex *txc)
 }
 
 
-static inline void process_adjtimex_modes(struct timex *txc, s32 *time_tai)
+static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai)
 {
 	if (txc->modes & ADJ_STATUS)
 		process_adj_status(txc);
@@ -716,7 +716,7 @@ static inline void process_adjtimex_modes(struct timex *txc, s32 *time_tai)
  * adjtimex mainly allows reading (and writing, if superuser) of
  * kernel time-keeping variables. used by xntpd.
  */
-int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
+int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai)
 {
 	int result;
 
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 909bd1f1bfb1..c24b0e13f011 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,6 +8,6 @@ extern void ntp_clear(void);
 extern u64 ntp_tick_length(void);
 extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(time64_t secs);
-extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
-extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
+extern int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai);
+extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ad779c2ec53a..7033ac1fee65 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -105,7 +105,7 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
 	}
 }
 
-static inline struct timespec64 tk_xtime(struct timekeeper *tk)
+static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
 {
 	struct timespec64 ts;
 
@@ -162,7 +162,7 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  * a read of the fast-timekeeper tkrs (which is protected by its own locking
  * and update logic).
  */
-static inline u64 tk_clock_read(struct tk_read_base *tkr)
+static inline u64 tk_clock_read(const struct tk_read_base *tkr)
 {
 	struct clocksource *clock = READ_ONCE(tkr->clock);
 
@@ -211,7 +211,7 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 	}
 }
 
-static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	u64 now, last, mask, max, delta;
@@ -255,7 +255,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 {
 }
-static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
 {
 	u64 cycle_now, delta;
 
@@ -352,7 +352,7 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta)
+static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
 {
 	u64 nsec;
 
@@ -363,7 +363,7 @@ static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta)
 	return nsec + arch_gettimeoffset();
 }
 
-static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
 {
 	u64 delta;
 
@@ -371,7 +371,7 @@ static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
 	return timekeeping_delta_to_ns(tkr, delta);
 }
 
-static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, u64 cycles)
+static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
 {
 	u64 delta;
 
@@ -394,7 +394,8 @@ static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, u64 cycles)
  * slightly wrong timestamp (a few nanoseconds). See
  * @ktime_get_mono_fast_ns.
  */
-static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
+static void update_fast_timekeeper(const struct tk_read_base *tkr,
+				   struct tk_fast *tkf)
 {
 	struct tk_read_base *base = tkf->base;
 
@@ -549,10 +550,10 @@ EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
  * number of cycles every time until timekeeping is resumed at which time the
  * proper readout base for the fast timekeeper will be restored automatically.
  */
-static void halt_fast_timekeeper(struct timekeeper *tk)
+static void halt_fast_timekeeper(const struct timekeeper *tk)
 {
 	static struct tk_read_base tkr_dummy;
-	struct tk_read_base *tkr = &tk->tkr_mono;
+	const struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 	cycles_at_suspend = tk_clock_read(tkr);
@@ -1277,7 +1278,7 @@ EXPORT_SYMBOL(do_settimeofday64);
  *
  * Adds or subtracts an offset value from the current time.
  */
-static int timekeeping_inject_offset(struct timespec64 *ts)
+static int timekeeping_inject_offset(const struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
@@ -1585,7 +1586,7 @@ static struct timespec64 timekeeping_suspend_time;
  * adds the sleep offset to the timekeeping variables.
  */
 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
-					   struct timespec64 *delta)
+					   const struct timespec64 *delta)
 {
 	if (!timespec64_valid_strict(delta)) {
 		printk_deferred(KERN_WARNING
@@ -1646,7 +1647,7 @@ bool timekeeping_rtc_skipsuspend(void)
  * This function should only be called by rtc_resume(), and allows
  * a suspend offset to be injected into the timekeeping values.
  */
-void timekeeping_inject_sleeptime64(struct timespec64 *delta)
+void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
@@ -2240,7 +2241,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
 /**
  * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
  */
-static int timekeeping_validate_timex(struct timex *txc)
+static int timekeeping_validate_timex(const struct timex *txc)
 {
 	if (txc->modes & ADJ_ADJTIME) {
 		/* singleshot must not be used with any other mode bits */
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 0754cadfa9e6..238e4be60229 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -70,7 +70,7 @@ static int __init tk_debug_sleep_time_init(void)
 }
 late_initcall(tk_debug_sleep_time_init);
 
-void tk_debug_account_sleep_time(struct timespec64 *t)
+void tk_debug_account_sleep_time(const struct timespec64 *t)
 {
 	/* Cap bin index so we don't overflow the array */
 	int bin = min(fls(t->tv_sec), NUM_BINS-1);
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index cf5c0828ee31..bcbb52db2256 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -8,7 +8,7 @@
 #include <linux/time.h>
 
 #ifdef CONFIG_DEBUG_FS
-extern void tk_debug_account_sleep_time(struct timespec64 *t);
+extern void tk_debug_account_sleep_time(const struct timespec64 *t);
 #else
 #define tk_debug_account_sleep_time(x)
 #endif
-- 
cgit v1.2.3


From 39232ed5a1793f67b11430c43ed8a9ed6e96c6eb Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Tue, 17 Jul 2018 15:55:16 +0800
Subject: time: Introduce one suspend clocksource to compensate the suspend
 time

On some hardware with multiple clocksources, we have coarse grained
clocksources that support the CLOCK_SOURCE_SUSPEND_NONSTOP flag, but
which are less than ideal for timekeeping whereas other clocksources
can be better candidates but halt on suspend.

Currently, the timekeeping core only supports timing suspend using
CLOCK_SOURCE_SUSPEND_NONSTOP clocksources if that clocksource is the
current clocksource for timekeeping.

As a result, some architectures try to implement read_persistent_clock64()
using those non-stop clocksources, but isn't really ideal, which will
introduce more duplicate code. To fix this, provide logic to allow a
registered SUSPEND_NONSTOP clocksource, which isn't the current
clocksource, to be used to calculate the suspend time.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
[jstultz: minor tweaks to merge with previous resume changes]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/clocksource.h |   3 +
 kernel/time/clocksource.c   | 149 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/timekeeping.c   |  22 ++++---
 3 files changed, 166 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7dff1963c185..308918928767 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -194,6 +194,9 @@ extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
 extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
+extern void
+clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles);
+extern u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 now);
 
 extern u64
 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f89a78e2792b..f74fb00d8064 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -94,6 +94,8 @@ EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
 /*[Clocksource internal variables]---------
  * curr_clocksource:
  *	currently selected clocksource.
+ * suspend_clocksource:
+ *	used to calculate the suspend time.
  * clocksource_list:
  *	linked list with the registered clocksources
  * clocksource_mutex:
@@ -102,10 +104,12 @@ EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
  *	Name of the user-specified clocksource.
  */
 static struct clocksource *curr_clocksource;
+static struct clocksource *suspend_clocksource;
 static LIST_HEAD(clocksource_list);
 static DEFINE_MUTEX(clocksource_mutex);
 static char override_name[CS_NAME_LEN];
 static int finished_booting;
+static u64 suspend_start;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
@@ -447,6 +451,140 @@ static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
 
 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
+static bool clocksource_is_suspend(struct clocksource *cs)
+{
+	return cs == suspend_clocksource;
+}
+
+static void __clocksource_suspend_select(struct clocksource *cs)
+{
+	/*
+	 * Skip the clocksource which will be stopped in suspend state.
+	 */
+	if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
+		return;
+
+	/*
+	 * The nonstop clocksource can be selected as the suspend clocksource to
+	 * calculate the suspend time, so it should not supply suspend/resume
+	 * interfaces to suspend the nonstop clocksource when system suspends.
+	 */
+	if (cs->suspend || cs->resume) {
+		pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
+			cs->name);
+	}
+
+	/* Pick the best rating. */
+	if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
+		suspend_clocksource = cs;
+}
+
+/**
+ * clocksource_suspend_select - Select the best clocksource for suspend timing
+ * @fallback:	if select a fallback clocksource
+ */
+static void clocksource_suspend_select(bool fallback)
+{
+	struct clocksource *cs, *old_suspend;
+
+	old_suspend = suspend_clocksource;
+	if (fallback)
+		suspend_clocksource = NULL;
+
+	list_for_each_entry(cs, &clocksource_list, list) {
+		/* Skip current if we were requested for a fallback. */
+		if (fallback && cs == old_suspend)
+			continue;
+
+		__clocksource_suspend_select(cs);
+	}
+}
+
+/**
+ * clocksource_start_suspend_timing - Start measuring the suspend timing
+ * @cs:			current clocksource from timekeeping
+ * @start_cycles:	current cycles from timekeeping
+ *
+ * This function will save the start cycle values of suspend timer to calculate
+ * the suspend time when resuming system.
+ *
+ * This function is called late in the suspend process from timekeeping_suspend(),
+ * that means processes are freezed, non-boot cpus and interrupts are disabled
+ * now. It is therefore possible to start the suspend timer without taking the
+ * clocksource mutex.
+ */
+void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
+{
+	if (!suspend_clocksource)
+		return;
+
+	/*
+	 * If current clocksource is the suspend timer, we should use the
+	 * tkr_mono.cycle_last value as suspend_start to avoid same reading
+	 * from suspend timer.
+	 */
+	if (clocksource_is_suspend(cs)) {
+		suspend_start = start_cycles;
+		return;
+	}
+
+	if (suspend_clocksource->enable &&
+	    suspend_clocksource->enable(suspend_clocksource)) {
+		pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
+		return;
+	}
+
+	suspend_start = suspend_clocksource->read(suspend_clocksource);
+}
+
+/**
+ * clocksource_stop_suspend_timing - Stop measuring the suspend timing
+ * @cs:		current clocksource from timekeeping
+ * @cycle_now:	current cycles from timekeeping
+ *
+ * This function will calculate the suspend time from suspend timer.
+ *
+ * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
+ *
+ * This function is called early in the resume process from timekeeping_resume(),
+ * that means there is only one cpu, no processes are running and the interrupts
+ * are disabled. It is therefore possible to stop the suspend timer without
+ * taking the clocksource mutex.
+ */
+u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
+{
+	u64 now, delta, nsec = 0;
+
+	if (!suspend_clocksource)
+		return 0;
+
+	/*
+	 * If current clocksource is the suspend timer, we should use the
+	 * tkr_mono.cycle_last value from timekeeping as current cycle to
+	 * avoid same reading from suspend timer.
+	 */
+	if (clocksource_is_suspend(cs))
+		now = cycle_now;
+	else
+		now = suspend_clocksource->read(suspend_clocksource);
+
+	if (now > suspend_start) {
+		delta = clocksource_delta(now, suspend_start,
+					  suspend_clocksource->mask);
+		nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
+				       suspend_clocksource->shift);
+	}
+
+	/*
+	 * Disable the suspend timer to save power if current clocksource is
+	 * not the suspend timer.
+	 */
+	if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
+		suspend_clocksource->disable(suspend_clocksource);
+
+	return nsec;
+}
+
 /**
  * clocksource_suspend - suspend the clocksource(s)
  */
@@ -792,6 +930,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 
 	clocksource_select();
 	clocksource_select_watchdog(false);
+	__clocksource_suspend_select(cs);
 	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
@@ -820,6 +959,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
 
 	clocksource_select();
 	clocksource_select_watchdog(false);
+	clocksource_suspend_select(false);
 	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
@@ -845,6 +985,15 @@ static int clocksource_unbind(struct clocksource *cs)
 			return -EBUSY;
 	}
 
+	if (clocksource_is_suspend(cs)) {
+		/*
+		 * Select and try to install a replacement suspend clocksource.
+		 * If no replacement suspend clocksource, we will just let the
+		 * clocksource go and have no suspend clocksource.
+		 */
+		clocksource_suspend_select(true);
+	}
+
 	clocksource_watchdog_lock(&flags);
 	clocksource_dequeue_watchdog(cs);
 	list_del_init(&cs->list);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 19414b16cf9e..d9e659a12c76 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1692,7 +1692,7 @@ void timekeeping_resume(void)
 	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
-	u64 cycle_now;
+	u64 cycle_now, nsec;
 	bool inject_sleeptime = false;
 
 	read_persistent_clock64(&ts_new);
@@ -1716,13 +1716,8 @@ void timekeeping_resume(void)
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
 	cycle_now = tk_clock_read(&tk->tkr_mono);
-	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-		cycle_now > tk->tkr_mono.cycle_last) {
-		u64 nsec, cyc_delta;
-
-		cyc_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
-					      tk->tkr_mono.mask);
-		nsec = mul_u64_u32_shr(cyc_delta, clock->mult, clock->shift);
+	nsec = clocksource_stop_suspend_timing(clock, cycle_now);
+	if (nsec > 0) {
 		ts_delta = ns_to_timespec64(nsec);
 		inject_sleeptime = true;
 	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
@@ -1757,6 +1752,8 @@ int timekeeping_suspend(void)
 	unsigned long flags;
 	struct timespec64		delta, delta_delta;
 	static struct timespec64	old_delta;
+	struct clocksource *curr_clock;
+	u64 cycle_now;
 
 	read_persistent_clock64(&timekeeping_suspend_time);
 
@@ -1775,6 +1772,15 @@ int timekeeping_suspend(void)
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
+	/*
+	 * Since we've called forward_now, cycle_last stores the value
+	 * just read from the current clocksource. Save this to potentially
+	 * use in suspend timing.
+	 */
+	curr_clock = tk->tkr_mono.clock;
+	cycle_now = tk->tkr_mono.cycle_last;
+	clocksource_start_suspend_timing(curr_clock, cycle_now);
+
 	if (persistent_clock_exists) {
 		/*
 		 * To avoid drift caused by repeated suspend/resumes,
-- 
cgit v1.2.3


From b51dab46c6adfbb7e80cd0f59ae17b8a30d94b1a Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 18 Jul 2018 06:27:22 -0700
Subject: qed: Add qed APIs for PHY module query.

This patch adds qed APIs for reading the PHY module.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h  | 16 ++++++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c | 23 ++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 49 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 16 ++++++++++
 include/linux/qed/qed_if.h                 | 15 +++++++++
 5 files changed, 119 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index bee10c1781fb..8faceb691657 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12444,6 +12444,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_STATS_TYPE_ISCSI           3
 #define DRV_MSG_CODE_STATS_TYPE_RDMA            4
 
+#define DRV_MSG_CODE_TRANSCEIVER_READ           0x00160000
+
 #define DRV_MSG_CODE_MASK_PARITIES              0x001a0000
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
@@ -12543,6 +12545,15 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF		0x2
 
+#define DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET		0
+#define DRV_MB_PARAM_TRANSCEIVER_PORT_MASK		0x00000003
+#define DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET		2
+#define DRV_MB_PARAM_TRANSCEIVER_SIZE_MASK		0x000000FC
+#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET	8
+#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK	0x0000FF00
+#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET		16
+#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_MASK		0xFFFF0000
+
 	/* Resource Allocation params - Driver version support */
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT	16
@@ -12596,6 +12607,9 @@ struct public_drv_mb {
 #define FW_MSG_CODE_PHY_OK			0x00110000
 #define FW_MSG_CODE_OK				0x00160000
 #define FW_MSG_CODE_ERROR			0x00170000
+#define FW_MSG_CODE_TRANSCEIVER_DIAG_OK		0x00160000
+#define FW_MSG_CODE_TRANSCEIVER_DIAG_ERROR	0x00170000
+#define FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT	0x00020000
 
 #define FW_MSG_CODE_OS_WOL_SUPPORTED            0x00800000
 #define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED        0x00810000
@@ -12687,6 +12701,8 @@ struct mcp_public_data {
 	struct public_func func[MCP_GLOB_FUNC_MAX];
 };
 
+#define MAX_I2C_TRANSACTION_SIZE	16
+
 /* OCBB definitions */
 enum tlvs {
 	/* Category 1: Device Properties */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 0cbc74d6ca8b..158944aa6097 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2102,6 +2102,28 @@ out:
 	return status;
 }
 
+static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf,
+				  u8 dev_addr, u32 offset, u32 len)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int rc = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	rc = qed_mcp_phy_sfp_read(hwfn, ptt, MFW_PORT(hwfn), dev_addr,
+				  offset, len, buf);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_memory = &qed_selftest_memory,
 	.selftest_interrupt = &qed_selftest_interrupt,
@@ -2144,6 +2166,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
 	.update_wol = &qed_update_wol,
+	.read_module_eeprom = &qed_read_module_eeprom,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 4e0b443c9519..62a220fce6e1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2463,6 +2463,55 @@ out:
 	return rc;
 }
 
+int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf)
+{
+	u32 bytes_left, bytes_to_copy, buf_size, nvm_offset = 0;
+	u32 resp, param;
+	int rc;
+
+	nvm_offset |= (port << DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET) &
+		       DRV_MB_PARAM_TRANSCEIVER_PORT_MASK;
+	nvm_offset |= (addr << DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET) &
+		       DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK;
+
+	addr = offset;
+	offset = 0;
+	bytes_left = len;
+	while (bytes_left > 0) {
+		bytes_to_copy = min_t(u32, bytes_left,
+				      MAX_I2C_TRANSACTION_SIZE);
+		nvm_offset &= (DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK |
+			       DRV_MB_PARAM_TRANSCEIVER_PORT_MASK);
+		nvm_offset |= ((addr + offset) <<
+			       DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET) &
+			       DRV_MB_PARAM_TRANSCEIVER_OFFSET_MASK;
+		nvm_offset |= (bytes_to_copy <<
+			       DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET) &
+			       DRV_MB_PARAM_TRANSCEIVER_SIZE_MASK;
+		rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+					DRV_MSG_CODE_TRANSCEIVER_READ,
+					nvm_offset, &resp, &param, &buf_size,
+					(u32 *)(p_buf + offset));
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to send a transceiver read command to the MFW. rc = %d.\n",
+				  rc);
+			return rc;
+		}
+
+		if (resp == FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT)
+			return -ENODEV;
+		else if (resp != FW_MSG_CODE_TRANSCEIVER_DIAG_OK)
+			return -EINVAL;
+
+		offset += buf_size;
+		bytes_left -= buf_size;
+	}
+
+	return 0;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 drv_mb_param = 0, rsp, param;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 632a838f1fe3..047976d5c6e9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -839,6 +839,22 @@ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
 		       u32 *o_mcp_resp,
 		       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf);
 
+/**
+ * @brief Read from sfp
+ *
+ *  @param p_hwfn - hw function
+ *  @param p_ptt  - PTT required for register access
+ *  @param port   - transceiver port
+ *  @param addr   - I2C address
+ *  @param offset - offset in sfp
+ *  @param len    - buffer length
+ *  @param p_buf  - buffer to read into
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf);
+
 /**
  * @brief indicates whether the MFW objects [under mcp_info] are accessible
  *
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b4040023cbfb..8cd34645e892 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -759,6 +759,9 @@ struct qed_generic_tlvs {
 	u8 mac[QED_TLV_MAC_COUNT][ETH_ALEN];
 };
 
+#define QED_I2C_DEV_ADDR_A0 0xA0
+#define QED_I2C_DEV_ADDR_A2 0xA2
+
 #define QED_NVM_SIGNATURE 0x12435687
 
 enum qed_nvm_flash_cmd {
@@ -1026,6 +1029,18 @@ struct qed_common_ops {
  * @param enabled - true iff WoL should be enabled.
  */
 	int (*update_wol) (struct qed_dev *cdev, bool enabled);
+
+/**
+ * @brief read_module_eeprom
+ *
+ * @param cdev
+ * @param buf - buffer
+ * @param dev_addr - PHY device memory region
+ * @param offset - offset into eeprom contents to be read
+ * @param len - buffer length, i.e., max bytes to be read
+ */
+	int (*read_module_eeprom)(struct qed_dev *cdev,
+				  char *buf, u8 dev_addr, u32 offset, u32 len);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From 2db1581e1f432ac6b4efe152c57fdfb4de85c154 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sun, 8 Jul 2018 14:23:21 +0800
Subject: Revert "iommu/vt-d: Clean up pasid quirk for pre-production devices"

This reverts commit ab96746aaa344fb720a198245a837e266fad3b62.

The commit ab96746aaa34 ("iommu/vt-d: Clean up pasid quirk for
pre-production devices") triggers ECS mode on some platforms
which have broken ECS support. As the result, graphic device
will be inoperable on boot.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107017

Cc: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 32 ++++++++++++++++++++++++++++++--
 include/linux/intel-iommu.h |  1 +
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index b344a883f116..115ff26e9ced 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -484,14 +484,37 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int intel_iommu_ecs = 1;
+static int intel_iommu_pasid28;
 static int iommu_identity_mapping;
 
 #define IDENTMAP_ALL		1
 #define IDENTMAP_GFX		2
 #define IDENTMAP_AZALIA		4
 
-#define ecs_enabled(iommu)	(intel_iommu_ecs && ecap_ecs(iommu->ecap))
-#define pasid_enabled(iommu)	(ecs_enabled(iommu) && ecap_pasid(iommu->ecap))
+/* Broadwell and Skylake have broken ECS support — normal so-called "second
+ * level" translation of DMA requests-without-PASID doesn't actually happen
+ * unless you also set the NESTE bit in an extended context-entry. Which of
+ * course means that SVM doesn't work because it's trying to do nested
+ * translation of the physical addresses it finds in the process page tables,
+ * through the IOVA->phys mapping found in the "second level" page tables.
+ *
+ * The VT-d specification was retroactively changed to change the definition
+ * of the capability bits and pretend that Broadwell/Skylake never happened...
+ * but unfortunately the wrong bit was changed. It's ECS which is broken, but
+ * for some reason it was the PASID capability bit which was redefined (from
+ * bit 28 on BDW/SKL to bit 40 in future).
+ *
+ * So our test for ECS needs to eschew those implementations which set the old
+ * PASID capabiity bit 28, since those are the ones on which ECS is broken.
+ * Unless we are working around the 'pasid28' limitations, that is, by putting
+ * the device into passthrough mode for normal DMA and thus masking the bug.
+ */
+#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
+			    (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
+/* PASID support is thus enabled if ECS is enabled and *either* of the old
+ * or new capability bits are set. */
+#define pasid_enabled(iommu) (ecs_enabled(iommu) &&			\
+			      (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
 
 int intel_iommu_gfx_mapped;
 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -554,6 +577,11 @@ static int __init intel_iommu_setup(char *str)
 			printk(KERN_INFO
 				"Intel-IOMMU: disable extended context table support\n");
 			intel_iommu_ecs = 0;
+		} else if (!strncmp(str, "pasid28", 7)) {
+			printk(KERN_INFO
+				"Intel-IOMMU: enable pre-production PASID support\n");
+			intel_iommu_pasid28 = 1;
+			iommu_identity_mapping |= IDENTMAP_GFX;
 		} else if (!strncmp(str, "tboot_noforce", 13)) {
 			printk(KERN_INFO
 				"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1df940196ab2..ef169d67df92 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -121,6 +121,7 @@
 #define ecap_srs(e)		((e >> 31) & 0x1)
 #define ecap_ers(e)		((e >> 30) & 0x1)
 #define ecap_prs(e)		((e >> 29) & 0x1)
+#define ecap_broken_pasid(e)	((e >> 28) & 0x1)
 #define ecap_dis(e)		((e >> 27) & 0x1)
 #define ecap_nest(e)		((e >> 26) & 0x1)
 #define ecap_mts(e)		((e >> 25) & 0x1)
-- 
cgit v1.2.3


From 51261aac51a05c791ef880a100ac2ceed201ef72 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:55 +0800
Subject: iommu/vt-d: Avoid using idr_for_each_entry()

idr_for_each_entry() is used to iteratte over idr elements
of a given type. It isn't suitable for the globle pasid idr
since the pasid idr consumer could specify different types
of pointers to bind with a pasid.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c   | 14 ++++++++++----
 include/linux/intel-iommu.h |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 2cc0aac93201..36cc1d1b8afc 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -298,6 +298,7 @@ static const struct mmu_notifier_ops intel_mmuops = {
 };
 
 static DEFINE_MUTEX(pasid_mutex);
+static LIST_HEAD(global_svm_list);
 
 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
 {
@@ -329,13 +330,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 
 	mutex_lock(&pasid_mutex);
 	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
-		int i;
+		struct intel_svm *t;
 
-		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
-			if (svm->mm != mm ||
-			    (svm->flags & SVM_FLAG_PRIVATE_PASID))
+		list_for_each_entry(t, &global_svm_list, list) {
+			if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
 				continue;
 
+			svm = t;
 			if (svm->pasid >= pasid_max) {
 				dev_warn(dev,
 					 "Limited PASID width. Cannot use existing PASID %d\n",
@@ -404,6 +405,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		svm->mm = mm;
 		svm->flags = flags;
 		INIT_LIST_HEAD_RCU(&svm->devs);
+		INIT_LIST_HEAD(&svm->list);
 		ret = -ENOMEM;
 		if (mm) {
 			ret = mmu_notifier_register(&svm->notifier, mm);
@@ -430,6 +432,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		 */
 		if (cap_caching_mode(iommu->cap))
 			intel_flush_pasid_dev(svm, sdev, svm->pasid);
+
+		list_add_tail(&svm->list, &global_svm_list);
 	}
 	list_add_rcu(&sdev->list, &svm->devs);
 
@@ -485,6 +489,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 					if (svm->mm)
 						mmu_notifier_unregister(&svm->notifier, svm->mm);
 
+					list_del(&svm->list);
+
 					/* We mandate that no page faults may be outstanding
 					 * for the PASID when intel_svm_unbind_mm() is called.
 					 * If that is not obeyed, subtle errors will happen.
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6692b40ca814..e1e193855581 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -487,6 +487,7 @@ struct intel_svm {
 	int flags;
 	int pasid;
 	struct list_head devs;
+	struct list_head list;
 };
 
 extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
-- 
cgit v1.2.3


From af39507305fb83a5d3c475c2851f4d59545d8a18 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:56 +0800
Subject: iommu/vt-d: Apply global PASID in SVA

This patch applies the global pasid name space in the shared
virtual address (SVA) implementation.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c   | 22 +++++++++++-----------
 include/linux/intel-iommu.h |  1 -
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 36cc1d1b8afc..56e65d0b1871 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -26,6 +26,8 @@
 #include <linux/interrupt.h>
 #include <asm/page.h>
 
+#include "intel-pasid.h"
+
 #define PASID_ENTRY_P		BIT_ULL(0)
 #define PASID_ENTRY_FLPM_5LP	BIT_ULL(9)
 #define PASID_ENTRY_SRE		BIT_ULL(11)
@@ -85,8 +87,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 				iommu->name);
 	}
 
-	idr_init(&iommu->pasid_idr);
-
 	return 0;
 }
 
@@ -102,7 +102,7 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
 		free_pages((unsigned long)iommu->pasid_state_table, order);
 		iommu->pasid_state_table = NULL;
 	}
-	idr_destroy(&iommu->pasid_idr);
+
 	return 0;
 }
 
@@ -392,9 +392,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 			pasid_max = iommu->pasid_max;
 
 		/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
-		ret = idr_alloc(&iommu->pasid_idr, svm,
-				!!cap_caching_mode(iommu->cap),
-				pasid_max - 1, GFP_KERNEL);
+		ret = intel_pasid_alloc_id(svm,
+					   !!cap_caching_mode(iommu->cap),
+					   pasid_max - 1, GFP_KERNEL);
 		if (ret < 0) {
 			kfree(svm);
 			kfree(sdev);
@@ -410,7 +410,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		if (mm) {
 			ret = mmu_notifier_register(&svm->notifier, mm);
 			if (ret) {
-				idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+				intel_pasid_free_id(svm->pasid);
 				kfree(svm);
 				kfree(sdev);
 				goto out;
@@ -460,7 +460,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 	if (!iommu || !iommu->pasid_table)
 		goto out;
 
-	svm = idr_find(&iommu->pasid_idr, pasid);
+	svm = intel_pasid_lookup_id(pasid);
 	if (!svm)
 		goto out;
 
@@ -485,7 +485,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 					svm->iommu->pasid_table[svm->pasid].val = 0;
 					wmb();
 
-					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+					intel_pasid_free_id(svm->pasid);
 					if (svm->mm)
 						mmu_notifier_unregister(&svm->notifier, svm->mm);
 
@@ -520,7 +520,7 @@ int intel_svm_is_pasid_valid(struct device *dev, int pasid)
 	if (!iommu || !iommu->pasid_table)
 		goto out;
 
-	svm = idr_find(&iommu->pasid_idr, pasid);
+	svm = intel_pasid_lookup_id(pasid);
 	if (!svm)
 		goto out;
 
@@ -618,7 +618,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 
 		if (!svm || svm->pasid != req->pasid) {
 			rcu_read_lock();
-			svm = idr_find(&iommu->pasid_idr, req->pasid);
+			svm = intel_pasid_lookup_id(req->pasid);
 			/* It *can't* go away, because the driver is not permitted
 			 * to unbind the mm while any page faults are outstanding.
 			 * So we only need RCU to protect the internal idr code. */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e1e193855581..2ff15195f73d 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -420,7 +420,6 @@ struct intel_iommu {
 	struct pasid_state_entry *pasid_state_table;
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
-	struct idr pasid_idr;
 	u32 pasid_max;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
-- 
cgit v1.2.3


From 9ddbfb42138d84bb326023616c40a3dc30ea2837 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:57 +0800
Subject: iommu/vt-d: Move device_domain_info to header

This allows the per device iommu data and some helpers to be
used in other files.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 63 +++------------------------------------------
 include/linux/intel-iommu.h | 61 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fa15ed036ddc..bb7f2b2a37ee 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -381,61 +381,6 @@ static int hw_pass_through = 1;
 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
 		if (domain->iommu_refcnt[idx])
 
-struct dmar_domain {
-	int	nid;			/* node id */
-
-	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
-					/* Refcount of devices per iommu */
-
-
-	u16		iommu_did[DMAR_UNITS_SUPPORTED];
-					/* Domain ids per IOMMU. Use u16 since
-					 * domain ids are 16 bit wide according
-					 * to VT-d spec, section 9.3 */
-
-	bool has_iotlb_device;
-	struct list_head devices;	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-	int		flags;		/* flags to find out type of domain */
-
-	int		iommu_coherency;/* indicate coherency of iommu access */
-	int		iommu_snooping; /* indicate snooping control feature*/
-	int		iommu_count;	/* reference count of iommu */
-	int		iommu_superpage;/* Level of superpages supported:
-					   0 == 4KiB (no superpages), 1 == 2MiB,
-					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
-	u64		max_addr;	/* maximum mapped address */
-
-	struct iommu_domain domain;	/* generic domain data structure for
-					   iommu core */
-};
-
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus number */
-	u8 devfn;		/* PCI devfn number */
-	u16 pfsid;		/* SRIOV physical function source ID */
-	u8 pasid_supported:3;
-	u8 pasid_enabled:1;
-	u8 pri_supported:1;
-	u8 pri_enabled:1;
-	u8 ats_supported:1;
-	u8 ats_enabled:1;
-	u8 ats_qdep;
-	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
-	struct intel_iommu *iommu; /* IOMMU used by this device */
-	struct dmar_domain *domain; /* pointer to domain */
-};
-
 struct dmar_rmrr_unit {
 	struct list_head list;		/* list of rmrr units	*/
 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
@@ -604,7 +549,7 @@ static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
 		domains[did & 0xff] = domain;
 }
 
-static inline void *alloc_pgtable_page(int node)
+void *alloc_pgtable_page(int node)
 {
 	struct page *page;
 	void *vaddr = NULL;
@@ -615,7 +560,7 @@ static inline void *alloc_pgtable_page(int node)
 	return vaddr;
 }
 
-static inline void free_pgtable_page(void *vaddr)
+void free_pgtable_page(void *vaddr)
 {
 	free_page((unsigned long)vaddr);
 }
@@ -698,7 +643,7 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
 }
 
 /* This functionin only returns single iommu in a domain */
-static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
+struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 {
 	int iommu_id;
 
@@ -3528,7 +3473,7 @@ static unsigned long intel_alloc_iova(struct device *dev,
 	return iova_pfn;
 }
 
-static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
+struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
 {
 	struct dmar_domain *domain, *tmp;
 	struct dmar_rmrr_unit *rmrr;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2ff15195f73d..2e1fbde020ca 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/iommu.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmar.h>
 
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -387,6 +388,42 @@ struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
 
+struct dmar_domain {
+	int	nid;			/* node id */
+
+	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
+					/* Refcount of devices per iommu */
+
+
+	u16		iommu_did[DMAR_UNITS_SUPPORTED];
+					/* Domain ids per IOMMU. Use u16 since
+					 * domain ids are 16 bit wide according
+					 * to VT-d spec, section 9.3 */
+
+	bool has_iotlb_device;
+	struct list_head devices;	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+	int		flags;		/* flags to find out type of domain */
+
+	int		iommu_coherency;/* indicate coherency of iommu access */
+	int		iommu_snooping; /* indicate snooping control feature*/
+	int		iommu_count;	/* reference count of iommu */
+	int		iommu_superpage;/* Level of superpages supported:
+					   0 == 4KiB (no superpages), 1 == 2MiB,
+					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
+	u64		max_addr;	/* maximum mapped address */
+
+	struct iommu_domain domain;	/* generic domain data structure for
+					   iommu core */
+};
+
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64 		reg_phys; /* physical address of hw register set */
@@ -435,6 +472,25 @@ struct intel_iommu {
 	u32		flags;      /* Software defined flags */
 };
 
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus number */
+	u8 devfn;		/* PCI devfn number */
+	u16 pfsid;		/* SRIOV physical function source ID */
+	u8 pasid_supported:3;
+	u8 pasid_enabled:1;
+	u8 pri_supported:1;
+	u8 pri_enabled:1;
+	u8 ats_supported:1;
+	u8 ats_enabled:1;
+	u8 ats_qdep;
+	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
+	struct intel_iommu *iommu; /* IOMMU used by this device */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
 static inline void __iommu_flush_cache(
 	struct intel_iommu *iommu, void *addr, int size)
 {
@@ -460,6 +516,11 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+struct dmar_domain *get_valid_domain_for_dev(struct device *dev);
+void *alloc_pgtable_page(int node);
+void free_pgtable_page(void *vaddr);
+struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
+
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
-- 
cgit v1.2.3


From 85319dcc8955f8f31828dc8bafff29f6aa011d93 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:58 +0800
Subject: iommu/vt-d: Add for_each_device_domain() helper

This adds a helper named for_each_device_domain() to iterate
over the elements in device_domain_list and invoke a callback
against each element. This allows to search the device_domain
list in other source files.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 21 +++++++++++++++++++++
 include/linux/intel-iommu.h |  2 ++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index bb7f2b2a37ee..f020da439ace 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -448,6 +448,27 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
+/*
+ * Iterate over elements in device_domain_list and call the specified
+ * callback @fn against each element. This helper should only be used
+ * in the context where the device_domain_lock has already been holden.
+ */
+int for_each_device_domain(int (*fn)(struct device_domain_info *info,
+				     void *data), void *data)
+{
+	int ret = 0;
+	struct device_domain_info *info;
+
+	assert_spin_locked(&device_domain_lock);
+	list_for_each_entry(info, &device_domain_list, global) {
+		ret = fn(info, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 const struct iommu_ops intel_iommu_ops;
 
 static bool translation_pre_enabled(struct intel_iommu *iommu)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2e1fbde020ca..4fd4c6fee93e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -520,6 +520,8 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev);
 void *alloc_pgtable_page(int node);
 void free_pgtable_page(void *vaddr);
 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
+int for_each_device_domain(int (*fn)(struct device_domain_info *info,
+				     void *data), void *data);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
-- 
cgit v1.2.3


From cc580e41260dbf1a46269235f1f2b572137d9d03 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:59 +0800
Subject: iommu/vt-d: Per PCI device pasid table interfaces

This patch adds the interfaces for per PCI device pasid
table management. Currently we allocate one pasid table
for all PCI devices under the scope of an IOMMU. It's
insecure in some cases where multiple devices under one
single IOMMU unit support PASID features. With per PCI
device pasid table, we can achieve finer protection and
isolation granularity.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Suggested-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c |   1 +
 drivers/iommu/intel-pasid.c | 179 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel-pasid.h |  18 +++++
 drivers/iommu/intel-svm.c   |   4 -
 include/linux/intel-iommu.h |   2 +
 5 files changed, 200 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f020da439ace..211925a75fb4 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2451,6 +2451,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	info->dev = dev;
 	info->domain = domain;
 	info->iommu = iommu;
+	info->pasid_table = NULL;
 
 	if (dev && dev_is_pci(dev)) {
 		struct pci_dev *pdev = to_pci_dev(info->dev);
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index e918fe01ce7f..fe95c9bd4d33 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -13,6 +13,8 @@
 #include <linux/intel-iommu.h>
 #include <linux/iommu.h>
 #include <linux/memory.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
 #include <linux/spinlock.h>
 
 #include "intel-pasid.h"
@@ -58,3 +60,180 @@ void *intel_pasid_lookup_id(int pasid)
 
 	return p;
 }
+
+/*
+ * Per device pasid table management:
+ */
+static inline void
+device_attach_pasid_table(struct device_domain_info *info,
+			  struct pasid_table *pasid_table)
+{
+	info->pasid_table = pasid_table;
+	list_add(&info->table, &pasid_table->dev);
+}
+
+static inline void
+device_detach_pasid_table(struct device_domain_info *info,
+			  struct pasid_table *pasid_table)
+{
+	info->pasid_table = NULL;
+	list_del(&info->table);
+}
+
+struct pasid_table_opaque {
+	struct pasid_table	**pasid_table;
+	int			segment;
+	int			bus;
+	int			devfn;
+};
+
+static int search_pasid_table(struct device_domain_info *info, void *opaque)
+{
+	struct pasid_table_opaque *data = opaque;
+
+	if (info->iommu->segment == data->segment &&
+	    info->bus == data->bus &&
+	    info->devfn == data->devfn &&
+	    info->pasid_table) {
+		*data->pasid_table = info->pasid_table;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+	struct pasid_table_opaque *data = opaque;
+
+	data->segment = pci_domain_nr(pdev->bus);
+	data->bus = PCI_BUS_NUM(alias);
+	data->devfn = alias & 0xff;
+
+	return for_each_device_domain(&search_pasid_table, data);
+}
+
+/*
+ * Allocate a pasid table for @dev. It should be called in a
+ * single-thread context.
+ */
+int intel_pasid_alloc_table(struct device *dev)
+{
+	struct device_domain_info *info;
+	struct pasid_table *pasid_table;
+	struct pasid_table_opaque data;
+	struct page *pages;
+	size_t size, count;
+	int ret, order;
+
+	info = dev->archdata.iommu;
+	if (WARN_ON(!info || !dev_is_pci(dev) ||
+		    !info->pasid_supported || info->pasid_table))
+		return -EINVAL;
+
+	/* DMA alias device already has a pasid table, use it: */
+	data.pasid_table = &pasid_table;
+	ret = pci_for_each_dma_alias(to_pci_dev(dev),
+				     &get_alias_pasid_table, &data);
+	if (ret)
+		goto attach_out;
+
+	pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC);
+	if (!pasid_table)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&pasid_table->dev);
+
+	size = sizeof(struct pasid_entry);
+	count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id);
+	order = get_order(size * count);
+	pages = alloc_pages_node(info->iommu->node,
+				 GFP_ATOMIC | __GFP_ZERO,
+				 order);
+	if (!pages)
+		return -ENOMEM;
+
+	pasid_table->table = page_address(pages);
+	pasid_table->order = order;
+	pasid_table->max_pasid = count;
+
+attach_out:
+	device_attach_pasid_table(info, pasid_table);
+
+	return 0;
+}
+
+void intel_pasid_free_table(struct device *dev)
+{
+	struct device_domain_info *info;
+	struct pasid_table *pasid_table;
+
+	info = dev->archdata.iommu;
+	if (!info || !dev_is_pci(dev) ||
+	    !info->pasid_supported || !info->pasid_table)
+		return;
+
+	pasid_table = info->pasid_table;
+	device_detach_pasid_table(info, pasid_table);
+
+	if (!list_empty(&pasid_table->dev))
+		return;
+
+	free_pages((unsigned long)pasid_table->table, pasid_table->order);
+	kfree(pasid_table);
+}
+
+struct pasid_table *intel_pasid_get_table(struct device *dev)
+{
+	struct device_domain_info *info;
+
+	info = dev->archdata.iommu;
+	if (!info)
+		return NULL;
+
+	return info->pasid_table;
+}
+
+int intel_pasid_get_dev_max_id(struct device *dev)
+{
+	struct device_domain_info *info;
+
+	info = dev->archdata.iommu;
+	if (!info || !info->pasid_table)
+		return 0;
+
+	return info->pasid_table->max_pasid;
+}
+
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
+{
+	struct pasid_table *pasid_table;
+	struct pasid_entry *entries;
+
+	pasid_table = intel_pasid_get_table(dev);
+	if (WARN_ON(!pasid_table || pasid < 0 ||
+		    pasid >= intel_pasid_get_dev_max_id(dev)))
+		return NULL;
+
+	entries = pasid_table->table;
+
+	return &entries[pasid];
+}
+
+/*
+ * Interfaces for PASID table entry manipulation:
+ */
+static inline void pasid_clear_entry(struct pasid_entry *pe)
+{
+	WRITE_ONCE(pe->val, 0);
+}
+
+void intel_pasid_clear_entry(struct device *dev, int pasid)
+{
+	struct pasid_entry *pe;
+
+	pe = intel_pasid_get_entry(dev, pasid);
+	if (WARN_ON(!pe))
+		return;
+
+	pasid_clear_entry(pe);
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index b1c5296290e5..1c05ed6fc5a5 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -13,9 +13,27 @@
 #define PASID_MIN			0x1
 #define PASID_MAX			0x100000
 
+struct pasid_entry {
+	u64 val;
+};
+
+/* The representative of a PASID table */
+struct pasid_table {
+	void			*table;		/* pasid table pointer */
+	int			order;		/* page order of pasid table */
+	int			max_pasid;	/* max pasid */
+	struct list_head	dev;		/* device list */
+};
+
 extern u32 intel_pasid_max_id;
 int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
 void intel_pasid_free_id(int pasid);
 void *intel_pasid_lookup_id(int pasid);
+int intel_pasid_alloc_table(struct device *dev);
+void intel_pasid_free_table(struct device *dev);
+struct pasid_table *intel_pasid_get_table(struct device *dev);
+int intel_pasid_get_dev_max_id(struct device *dev);
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
+void intel_pasid_clear_entry(struct device *dev, int pasid);
 
 #endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 56e65d0b1871..9b5dc7262677 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -34,10 +34,6 @@
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 
-struct pasid_entry {
-	u64 val;
-};
-
 struct pasid_state_entry {
 	u64 val;
 };
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4fd4c6fee93e..e7901d402337 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -476,6 +476,7 @@ struct intel_iommu {
 struct device_domain_info {
 	struct list_head link;	/* link to domain siblings */
 	struct list_head global; /* link to global list */
+	struct list_head table;	/* link to pasid table */
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
 	u16 pfsid;		/* SRIOV physical function source ID */
@@ -489,6 +490,7 @@ struct device_domain_info {
 	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
 	struct intel_iommu *iommu; /* IOMMU used by this device */
 	struct dmar_domain *domain; /* pointer to domain */
+	struct pasid_table *pasid_table; /* pasid table */
 };
 
 static inline void __iommu_flush_cache(
-- 
cgit v1.2.3


From d9737953d85131436b09668b5e8d3389c37c1f28 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:47:02 +0800
Subject: iommu/vt-d: Remove the obsolete per iommu pasid tables

The obsolete per iommu pasid tables are no longer used. Hence,
clean up them.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c |  6 +++---
 drivers/iommu/intel-svm.c   | 17 ++---------------
 include/linux/intel-iommu.h |  5 ++---
 3 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f1a3c5e5aff0..7ed0221dccf8 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1756,7 +1756,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 	if (pasid_enabled(iommu)) {
 		if (ecap_prs(iommu->ecap))
 			intel_svm_finish_prq(iommu);
-		intel_svm_free_pasid_tables(iommu);
+		intel_svm_exit(iommu);
 	}
 #endif
 }
@@ -3337,7 +3337,7 @@ static int __init init_dmars(void)
 			hw_pass_through = 0;
 #ifdef CONFIG_INTEL_IOMMU_SVM
 		if (pasid_enabled(iommu))
-			intel_svm_alloc_pasid_tables(iommu);
+			intel_svm_init(iommu);
 #endif
 	}
 
@@ -4300,7 +4300,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 	if (pasid_enabled(iommu))
-		intel_svm_alloc_pasid_tables(iommu);
+		intel_svm_init(iommu);
 #endif
 
 	if (dmaru->ignored) {
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a253cdeabd61..eb308363e541 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -38,7 +38,7 @@ struct pasid_state_entry {
 	u64 val;
 };
 
-int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
+int intel_svm_init(struct intel_iommu *iommu)
 {
 	struct page *pages;
 	int order;
@@ -63,15 +63,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 		iommu->pasid_max = 0x20000;
 
 	order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
-	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
-	if (!pages) {
-		pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
-			iommu->name);
-		return -ENOMEM;
-	}
-	iommu->pasid_table = page_address(pages);
-	pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
-
 	if (ecap_dis(iommu->ecap)) {
 		/* Just making it explicit... */
 		BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
@@ -86,14 +77,10 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 	return 0;
 }
 
-int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
+int intel_svm_exit(struct intel_iommu *iommu)
 {
 	int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
 
-	if (iommu->pasid_table) {
-		free_pages((unsigned long)iommu->pasid_table, order);
-		iommu->pasid_table = NULL;
-	}
 	if (iommu->pasid_state_table) {
 		free_pages((unsigned long)iommu->pasid_state_table, order);
 		iommu->pasid_state_table = NULL;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e7901d402337..3c43882d3b77 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -453,7 +453,6 @@ struct intel_iommu {
 	 * devices away to userspace processes (e.g. for DPDK) and don't
 	 * want to trust that userspace will use *only* the PASID it was
 	 * told to. But while it's all driver-arbitrated, we're fine. */
-	struct pasid_entry *pasid_table;
 	struct pasid_state_entry *pasid_state_table;
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
@@ -526,8 +525,8 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info,
 				     void *data), void *data);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
-extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
-extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
+int intel_svm_init(struct intel_iommu *iommu);
+int intel_svm_exit(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
-- 
cgit v1.2.3


From 226ab561075f6f8f3cd5f7b3b7544f3997aab51f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:49:34 -0700
Subject: device-dax: Convert to vmf_insert_mixed and vm_fault_t

Use new return type vm_fault_t for fault and huge_fault handler. For
now, this is just documenting that the function returns a VM_FAULT value
rather than an errno.  Once all instances are converted, vm_fault_t will
become a distinct type.

Commit 1c8f422059ae ("mm: change return type to vm_fault_t")

Previously vm_insert_mixed() returned an error code which driver mapped into
VM_FAULT_* type. The new function vmf_insert_mixed() will replace this
inefficiency by returning VM_FAULT_* type.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/dax/device.c    | 26 +++++++++++---------------
 include/linux/huge_mm.h |  5 +++--
 mm/huge_memory.c        |  4 ++--
 3 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index de2f8297a210..ad5e7b4a15dc 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -244,11 +244,11 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 	return -1;
 }
 
-static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	struct device *dev = &dev_dax->dev;
 	struct dax_region *dax_region;
-	int rc = VM_FAULT_SIGBUS;
 	phys_addr_t phys;
 	pfn_t pfn;
 	unsigned int fault_size = PAGE_SIZE;
@@ -274,17 +274,11 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-	rc = vm_insert_mixed(vmf->vma, vmf->address, pfn);
-
-	if (rc == -ENOMEM)
-		return VM_FAULT_OOM;
-	if (rc < 0 && rc != -EBUSY)
-		return VM_FAULT_SIGBUS;
-
-	return VM_FAULT_NOPAGE;
+	return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
 }
 
-static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	unsigned long pmd_addr = vmf->address & PMD_MASK;
 	struct device *dev = &dev_dax->dev;
@@ -334,7 +328,8 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	unsigned long pud_addr = vmf->address & PUD_MASK;
 	struct device *dev = &dev_dax->dev;
@@ -384,13 +379,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 			vmf->flags & FAULT_FLAG_WRITE);
 }
 #else
-static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	return VM_FAULT_FALLBACK;
 }
 #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 
-static int dev_dax_huge_fault(struct vm_fault *vmf,
+static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
 	int rc, id;
@@ -420,7 +416,7 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
 	return rc;
 }
 
-static int dev_dax_fault(struct vm_fault *vmf)
+static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
 {
 	return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
 }
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8a126259bc4..d3bbf6bea9e9 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -3,6 +3,7 @@
 #define _LINUX_HUGE_MM_H
 
 #include <linux/sched/coredump.h>
+#include <linux/mm_types.h>
 
 #include <linux/fs.h> /* only for vma_is_dax() */
 
@@ -46,9 +47,9 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 			pmd_t *pmd, pfn_t pfn, bool write);
-int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 			pud_t *pud, pfn_t pfn, bool write);
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1cd7c1a57a14..feba371169ca 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -752,7 +752,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 	spin_unlock(ptl);
 }
 
-int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 			pmd_t *pmd, pfn_t pfn, bool write)
 {
 	pgprot_t pgprot = vma->vm_page_prot;
@@ -812,7 +812,7 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 	spin_unlock(ptl);
 }
 
-int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 			pud_t *pud, pfn_t pfn, bool write)
 {
 	pgprot_t pgprot = vma->vm_page_prot;
-- 
cgit v1.2.3


From eef5ba1aa148ca5e6deb1e0aa1de797fa4e12cb7 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 10:51:53 +0200
Subject: i2c: smbus: add unlocked __i2c_smbus_xfer variant

Removes all locking from i2c_smbus_xfer and renames it to __i2c_smbus_xfer,
then adds a new i2c_smbus_xfer function that simply grabs the lock while
calling the unlocked variant.

This is not perfectly equivalent, since i2c_smbus_xfer was callable from
atomic/irq context if you happened to end up emulating SMBus with an I2C
transfer, and that is no longer the case with this patch. It is unknown
(to me) if anything depends on that quirk, but it seems fragile enough to
simply break those cases and require them to call i2c_transfer directly
instead.

While at it, for consistency rename the 2nd to last argument (size) of
the i2c_smbus_xfer declaration to protocol and remove the surplus extern
marker.

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-smbus.c | 28 ++++++++++++++++++++--------
 include/linux/i2c.h          | 11 ++++++++---
 2 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 51970bae3c4a..9cd66cabb84f 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -463,7 +463,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
 			msg[num-1].len++;
 	}
 
-	status = i2c_transfer(adapter, msg, num);
+	status = __i2c_transfer(adapter, msg, num);
 	if (status < 0)
 		goto cleanup;
 	if (status != num) {
@@ -524,9 +524,24 @@ cleanup:
  * This executes an SMBus protocol operation, and returns a negative
  * errno code else zero on success.
  */
-s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
-		   char read_write, u8 command, int protocol,
-		   union i2c_smbus_data *data)
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write,
+		   u8 command, int protocol, union i2c_smbus_data *data)
+{
+	s32 res;
+
+	i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
+	res = __i2c_smbus_xfer(adapter, addr, flags, read_write,
+			       command, protocol, data);
+	i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
+
+	return res;
+}
+EXPORT_SYMBOL(i2c_smbus_xfer);
+
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write,
+		     u8 command, int protocol, union i2c_smbus_data *data)
 {
 	unsigned long orig_jiffies;
 	int try;
@@ -543,8 +558,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
 
 	if (adapter->algo->smbus_xfer) {
-		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
-
 		/* Retry automatically on arbitration loss */
 		orig_jiffies = jiffies;
 		for (res = 0, try = 0; try <= adapter->retries; try++) {
@@ -557,7 +570,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 				       orig_jiffies + adapter->timeout))
 				break;
 		}
-		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
 
 		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
 			goto trace;
@@ -579,7 +591,7 @@ trace:
 
 	return res;
 }
-EXPORT_SYMBOL(i2c_smbus_xfer);
+EXPORT_SYMBOL(__i2c_smbus_xfer);
 
 /**
  * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..465afb092fa7 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -140,9 +140,14 @@ extern int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
    and probably just as fast.
    Note that we use i2c_adapter here, because you do not need a specific
    smbus adapter to call this function. */
-extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
-			  unsigned short flags, char read_write, u8 command,
-			  int size, union i2c_smbus_data *data);
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write, u8 command,
+		   int protocol, union i2c_smbus_data *data);
+
+/* Unlocked flavor */
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write, u8 command,
+		     int protocol, union i2c_smbus_data *data);
 
 /* Now follow the 'nice' access routines. These also document the calling
    conventions of i2c_smbus_xfer. */
-- 
cgit v1.2.3


From 488dee96bb62f0b3d9e678cf42574034d5b033a5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:47 +0000
Subject: kernfs: allow creating kernfs objects with arbitrary uid/gid

This change allows creating kernfs files and directories with arbitrary
uid/gid instead of always using GLOBAL_ROOT_UID/GID by extending
kernfs_create_dir_ns() and kernfs_create_file_ns() with uid/gid arguments.
The "simple" kernfs_create_file() and kernfs_create_dir() are left alone
and always create objects belonging to the global root.

When creating symlinks ownership (uid/gid) is taken from the target kernfs
object.

Co-Developed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  4 +++-
 fs/kernfs/dir.c                          | 29 ++++++++++++++++++++++++++---
 fs/kernfs/file.c                         |  8 ++++++--
 fs/kernfs/inode.c                        |  2 +-
 fs/kernfs/kernfs-internal.h              |  2 ++
 fs/kernfs/symlink.c                      | 11 ++++++++++-
 fs/sysfs/dir.c                           |  4 +++-
 fs/sysfs/file.c                          |  5 +++--
 include/linux/kernfs.h                   | 28 +++++++++++++++++++---------
 kernel/cgroup/cgroup.c                   |  4 +++-
 10 files changed, 76 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 749856a2e736..9af1a21265d3 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -146,6 +146,7 @@ static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 	int ret;
 
 	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 				  0, rft->kf_ops, rft, NULL, NULL);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
@@ -1503,7 +1504,8 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
 	struct kernfs_node *kn;
 	int ret = 0;
 
-	kn = __kernfs_create_file(parent_kn, name, 0444, 0,
+	kn = __kernfs_create_file(parent_kn, name, 0444,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
 				  &kf_mondata_ops, priv, NULL, NULL);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d66cc0777303..4ca0b5c18192 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -619,6 +619,7 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 
 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 					     const char *name, umode_t mode,
+					     kuid_t uid, kgid_t gid,
 					     unsigned flags)
 {
 	struct kernfs_node *kn;
@@ -661,8 +662,22 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	kn->mode = mode;
 	kn->flags = flags;
 
+	if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
+		struct iattr iattr = {
+			.ia_valid = ATTR_UID | ATTR_GID,
+			.ia_uid = uid,
+			.ia_gid = gid,
+		};
+
+		ret = __kernfs_setattr(kn, &iattr);
+		if (ret < 0)
+			goto err_out3;
+	}
+
 	return kn;
 
+ err_out3:
+	idr_remove(&root->ino_idr, kn->id.ino);
  err_out2:
 	kmem_cache_free(kernfs_node_cache, kn);
  err_out1:
@@ -672,11 +687,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
+				    kuid_t uid, kgid_t gid,
 				    unsigned flags)
 {
 	struct kernfs_node *kn;
 
-	kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
+	kn = __kernfs_new_node(kernfs_root(parent),
+			       name, mode, uid, gid, flags);
 	if (kn) {
 		kernfs_get(parent);
 		kn->parent = parent;
@@ -946,6 +963,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 	root->next_generation = 1;
 
 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
+			       GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 			       KERNFS_DIR);
 	if (!kn) {
 		idr_destroy(&root->ino_idr);
@@ -984,6 +1002,8 @@ void kernfs_destroy_root(struct kernfs_root *root)
  * @parent: parent in which to create a new directory
  * @name: name of the new directory
  * @mode: mode of the new directory
+ * @uid: uid of the new directory
+ * @gid: gid of the new directory
  * @priv: opaque data associated with the new directory
  * @ns: optional namespace tag of the directory
  *
@@ -991,13 +1011,15 @@ void kernfs_destroy_root(struct kernfs_root *root)
  */
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
 					 void *priv, const void *ns)
 {
 	struct kernfs_node *kn;
 	int rc;
 
 	/* allocate */
-	kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
+	kn = kernfs_new_node(parent, name, mode | S_IFDIR,
+			     uid, gid, KERNFS_DIR);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
@@ -1028,7 +1050,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 	int rc;
 
 	/* allocate */
-	kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
+	kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
+			     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 2015d8c45e4a..dbf5bc250bfd 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -965,6 +965,8 @@ const struct file_operations kernfs_file_fops = {
  * @parent: directory to create the file in
  * @name: name of the file
  * @mode: mode of the file
+ * @uid: uid of the file
+ * @gid: gid of the file
  * @size: size of the file
  * @ops: kernfs operations for the file
  * @priv: private data for the file
@@ -975,7 +977,8 @@ const struct file_operations kernfs_file_fops = {
  */
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 					 const char *name,
-					 umode_t mode, loff_t size,
+					 umode_t mode, kuid_t uid, kgid_t gid,
+					 loff_t size,
 					 const struct kernfs_ops *ops,
 					 void *priv, const void *ns,
 					 struct lock_class_key *key)
@@ -986,7 +989,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 
 	flags = KERNFS_FILE;
 
-	kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags);
+	kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
+			     uid, gid, flags);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 3d73fe9d56e2..80cebcd94c90 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -63,7 +63,7 @@ out_unlock:
 	return ret;
 }
 
-static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
 {
 	struct kernfs_iattrs *attrs;
 	struct iattr *iattrs;
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 0f260dcca177..3d83b114bb08 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -90,6 +90,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
 int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
 		       u32 request_mask, unsigned int query_flags);
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
 
 /*
  * dir.c
@@ -104,6 +105,7 @@ void kernfs_put_active(struct kernfs_node *kn);
 int kernfs_add_one(struct kernfs_node *kn);
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
+				    kuid_t uid, kgid_t gid,
 				    unsigned flags);
 struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
 						    unsigned int ino);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 08ccabd7047f..5ffed48f3d0e 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -21,6 +21,7 @@
  * @target: target node for the symlink to point to
  *
  * Returns the created node on success, ERR_PTR() value on error.
+ * Ownership of the link matches ownership of the target.
  */
 struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 				       const char *name,
@@ -28,8 +29,16 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 {
 	struct kernfs_node *kn;
 	int error;
+	kuid_t uid = GLOBAL_ROOT_UID;
+	kgid_t gid = GLOBAL_ROOT_GID;
 
-	kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
+	if (target->iattr) {
+		uid = target->iattr->ia_iattr.ia_uid;
+		gid = target->iattr->ia_iattr.ia_gid;
+	}
+
+	kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid,
+			     KERNFS_LINK);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 58eba92a0e41..e39b884f0867 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -52,7 +52,9 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 		return -ENOENT;
 
 	kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
-				  S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns);
+				  S_IRWXU | S_IRUGO | S_IXUGO,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  kobj, ns);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
 			sysfs_warn_dup(parent, kobject_name(kobj));
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5c13f29bfcdb..513fa691ecbd 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -302,8 +302,9 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	if (!attr->ignore_lockdep)
 		key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-	kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
-				  (void *)attr, ns, key);
+	kn = __kernfs_create_file(parent, attr->name,
+				  mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  size, ops, (void *)attr, ns, key);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
 			sysfs_warn_dup(parent, attr->name);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index ab25c8b6d9e3..814643f7ee52 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -15,6 +15,7 @@
 #include <linux/lockdep.h>
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
+#include <linux/uidgid.h>
 #include <linux/wait.h>
 
 struct file;
@@ -325,12 +326,14 @@ void kernfs_destroy_root(struct kernfs_root *root);
 
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
 					 void *priv, const void *ns);
 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 					    const char *name);
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
-					 const char *name,
-					 umode_t mode, loff_t size,
+					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
+					 loff_t size,
 					 const struct kernfs_ops *ops,
 					 void *priv, const void *ns,
 					 struct lock_class_key *key);
@@ -415,12 +418,14 @@ static inline void kernfs_destroy_root(struct kernfs_root *root) { }
 
 static inline struct kernfs_node *
 kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
-		     umode_t mode, void *priv, const void *ns)
+		     umode_t mode, kuid_t uid, kgid_t gid,
+		     void *priv, const void *ns)
 { return ERR_PTR(-ENOSYS); }
 
 static inline struct kernfs_node *
 __kernfs_create_file(struct kernfs_node *parent, const char *name,
-		     umode_t mode, loff_t size, const struct kernfs_ops *ops,
+		     umode_t mode, kuid_t uid, kgid_t gid,
+		     loff_t size, const struct kernfs_ops *ops,
 		     void *priv, const void *ns, struct lock_class_key *key)
 { return ERR_PTR(-ENOSYS); }
 
@@ -498,12 +503,15 @@ static inline struct kernfs_node *
 kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
 		  void *priv)
 {
-	return kernfs_create_dir_ns(parent, name, mode, priv, NULL);
+	return kernfs_create_dir_ns(parent, name, mode,
+				    GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				    priv, NULL);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
-		      umode_t mode, loff_t size, const struct kernfs_ops *ops,
+		      umode_t mode, kuid_t uid, kgid_t gid,
+		      loff_t size, const struct kernfs_ops *ops,
 		      void *priv, const void *ns)
 {
 	struct lock_class_key *key = NULL;
@@ -511,15 +519,17 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	key = (struct lock_class_key *)&ops->lockdep_key;
 #endif
-	return __kernfs_create_file(parent, name, mode, size, ops, priv, ns,
-				    key);
+	return __kernfs_create_file(parent, name, mode, uid, gid,
+				    size, ops, priv, ns, key);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
 		   loff_t size, const struct kernfs_ops *ops, void *priv)
 {
-	return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
+	return kernfs_create_file_ns(parent, name, mode,
+				     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				     size, ops, priv, NULL);
 }
 
 static inline int kernfs_remove_by_name(struct kernfs_node *parent,
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 077370bf8964..35cf3d71f8aa 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3557,7 +3557,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 	key = &cft->lockdep_key;
 #endif
 	kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
-				  cgroup_file_mode(cft), 0, cft->kf_ops, cft,
+				  cgroup_file_mode(cft),
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  0, cft->kf_ops, cft,
 				  NULL, key);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
-- 
cgit v1.2.3


From 5f81880d5204ee2388fd9a75bb850ccd526885b7 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:48 +0000
Subject: sysfs, kobject: allow creating kobject belonging to arbitrary users

Normally kobjects and their sysfs representation belong to global root,
however it is not necessarily the case for objects in separate namespaces.
For example, objects in separate network namespace logically belong to the
container's root and not global root.

This change lays groundwork for allowing network namespace objects
ownership to be transferred to container's root user by defining
get_ownership() callback in ktype structure and using it in sysfs code to
retrieve desired uid/gid when creating sysfs objects for given kobject.

Co-Developed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/sysfs/dir.c          |  7 +++++--
 fs/sysfs/file.c         | 32 ++++++++++++++++++++------------
 fs/sysfs/group.c        | 23 +++++++++++++++++------
 fs/sysfs/sysfs.h        |  5 ++---
 include/linux/kobject.h |  4 ++++
 lib/kobject.c           | 19 +++++++++++++++++++
 6 files changed, 67 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e39b884f0867..feeae8081c22 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -40,6 +40,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
 int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
 	struct kernfs_node *parent, *kn;
+	kuid_t uid;
+	kgid_t gid;
 
 	BUG_ON(!kobj);
 
@@ -51,9 +53,10 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 	if (!parent)
 		return -ENOENT;
 
+	kobject_get_ownership(kobj, &uid, &gid);
+
 	kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
-				  S_IRWXU | S_IRUGO | S_IXUGO,
-				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  S_IRWXU | S_IRUGO | S_IXUGO, uid, gid,
 				  kobj, ns);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 513fa691ecbd..fa46216523cf 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -245,7 +245,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
 
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			   const struct attribute *attr, bool is_bin,
-			   umode_t mode, const void *ns)
+			   umode_t mode, kuid_t uid, kgid_t gid, const void *ns)
 {
 	struct lock_class_key *key = NULL;
 	const struct kernfs_ops *ops;
@@ -302,8 +302,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	if (!attr->ignore_lockdep)
 		key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-	kn = __kernfs_create_file(parent, attr->name,
-				  mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+
+	kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
 				  size, ops, (void *)attr, ns, key);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
@@ -313,12 +313,6 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	return 0;
 }
 
-int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
-		   bool is_bin)
-{
-	return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
-}
-
 /**
  * sysfs_create_file_ns - create an attribute file for an object with custom ns
  * @kobj: object we're creating for
@@ -328,9 +322,14 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
 int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
 			 const void *ns)
 {
+	kuid_t uid;
+	kgid_t gid;
+
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
+	kobject_get_ownership(kobj, &uid, &gid);
+	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
+				      uid, gid, ns);
 
 }
 EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -359,6 +358,8 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 		const struct attribute *attr, const char *group)
 {
 	struct kernfs_node *parent;
+	kuid_t uid;
+	kgid_t gid;
 	int error;
 
 	if (group) {
@@ -371,7 +372,9 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 	if (!parent)
 		return -ENOENT;
 
-	error = sysfs_add_file(parent, attr, false);
+	kobject_get_ownership(kobj, &uid, &gid);
+	error = sysfs_add_file_mode_ns(kobj->sd, attr, false,
+				       attr->mode, uid, gid, NULL);
 	kernfs_put(parent);
 
 	return error;
@@ -487,9 +490,14 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 int sysfs_create_bin_file(struct kobject *kobj,
 			  const struct bin_attribute *attr)
 {
+	kuid_t uid;
+	kgid_t gid;
+
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->sd, &attr->attr, true);
+	kobject_get_ownership(kobj, &uid, &gid);
+	return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
+				      attr->attr.mode, uid, gid, NULL);
 }
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
 
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 4802ec0e1e3a..c7a716c4acc9 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -31,6 +31,7 @@ static void remove_files(struct kernfs_node *parent,
 }
 
 static int create_files(struct kernfs_node *parent, struct kobject *kobj,
+			kuid_t uid, kgid_t gid,
 			const struct attribute_group *grp, int update)
 {
 	struct attribute *const *attr;
@@ -60,7 +61,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 
 			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent, *attr, false,
-						       mode, NULL);
+						       mode, uid, gid, NULL);
 			if (unlikely(error))
 				break;
 		}
@@ -90,7 +91,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent,
 					&(*bin_attr)->attr, true,
-					mode, NULL);
+					mode,
+					uid, gid, NULL);
 			if (error)
 				break;
 		}
@@ -106,6 +108,8 @@ static int internal_create_group(struct kobject *kobj, int update,
 				 const struct attribute_group *grp)
 {
 	struct kernfs_node *kn;
+	kuid_t uid;
+	kgid_t gid;
 	int error;
 
 	BUG_ON(!kobj || (!update && !kobj->sd));
@@ -118,9 +122,11 @@ static int internal_create_group(struct kobject *kobj, int update,
 			kobj->name, grp->name ?: "");
 		return -EINVAL;
 	}
+	kobject_get_ownership(kobj, &uid, &gid);
 	if (grp->name) {
-		kn = kernfs_create_dir(kobj->sd, grp->name,
-				       S_IRWXU | S_IRUGO | S_IXUGO, kobj);
+		kn = kernfs_create_dir_ns(kobj->sd, grp->name,
+					  S_IRWXU | S_IRUGO | S_IXUGO,
+					  uid, gid, kobj, NULL);
 		if (IS_ERR(kn)) {
 			if (PTR_ERR(kn) == -EEXIST)
 				sysfs_warn_dup(kobj->sd, grp->name);
@@ -129,7 +135,7 @@ static int internal_create_group(struct kobject *kobj, int update,
 	} else
 		kn = kobj->sd;
 	kernfs_get(kn);
-	error = create_files(kn, kobj, grp, update);
+	error = create_files(kn, kobj, uid, gid, grp, update);
 	if (error) {
 		if (grp->name)
 			kernfs_remove(kn);
@@ -281,6 +287,8 @@ int sysfs_merge_group(struct kobject *kobj,
 		       const struct attribute_group *grp)
 {
 	struct kernfs_node *parent;
+	kuid_t uid;
+	kgid_t gid;
 	int error = 0;
 	struct attribute *const *attr;
 	int i;
@@ -289,8 +297,11 @@ int sysfs_merge_group(struct kobject *kobj,
 	if (!parent)
 		return -ENOENT;
 
+	kobject_get_ownership(kobj, &uid, &gid);
+
 	for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
-		error = sysfs_add_file(parent, *attr, false);
+		error = sysfs_add_file_mode_ns(parent, *attr, false,
+					       (*attr)->mode, uid, gid, NULL);
 	if (error) {
 		while (--i >= 0)
 			kernfs_remove_by_name(parent, (*--attr)->name);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d098e015fcc9..0050cc0c0236 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -27,11 +27,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
 /*
  * file.c
  */
-int sysfs_add_file(struct kernfs_node *parent,
-		   const struct attribute *attr, bool is_bin);
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			   const struct attribute *attr, bool is_bin,
-			   umode_t amode, const void *ns);
+			   umode_t amode, kuid_t uid, kgid_t gid,
+			   const void *ns);
 
 /*
  * symlink.c
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 7f6f93c3df9c..b49ff230beba 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -26,6 +26,7 @@
 #include <linux/wait.h>
 #include <linux/atomic.h>
 #include <linux/workqueue.h>
+#include <linux/uidgid.h>
 
 #define UEVENT_HELPER_PATH_LEN		256
 #define UEVENT_NUM_ENVP			32	/* number of env pointers */
@@ -114,6 +115,8 @@ extern struct kobject * __must_check kobject_get_unless_zero(
 extern void kobject_put(struct kobject *kobj);
 
 extern const void *kobject_namespace(struct kobject *kobj);
+extern void kobject_get_ownership(struct kobject *kobj,
+				  kuid_t *uid, kgid_t *gid);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
@@ -122,6 +125,7 @@ struct kobj_type {
 	struct attribute **default_attrs;
 	const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
 	const void *(*namespace)(struct kobject *kobj);
+	void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
 };
 
 struct kobj_uevent_env {
diff --git a/lib/kobject.c b/lib/kobject.c
index 18989b5b3b56..f2dc1f756007 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -35,6 +35,25 @@ const void *kobject_namespace(struct kobject *kobj)
 	return kobj->ktype->namespace(kobj);
 }
 
+/**
+ * kobject_get_ownership - get sysfs ownership data for @kobj
+ * @kobj: kobject in question
+ * @uid: kernel user ID for sysfs objects
+ * @gid: kernel group ID for sysfs objects
+ *
+ * Returns initial uid/gid pair that should be used when creating sysfs
+ * representation of given kobject. Normally used to adjust ownership of
+ * objects in a container.
+ */
+void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+	*uid = GLOBAL_ROOT_UID;
+	*gid = GLOBAL_ROOT_GID;
+
+	if (kobj->ktype->get_ownership)
+		kobj->ktype->get_ownership(kobj, uid, gid);
+}
+
 /*
  * populate_dir - populate directory with attributes.
  * @kobj: object we're working on.
-- 
cgit v1.2.3


From 9944e894c1266dc8515c82d1ff752d681215526b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:50 +0000
Subject: driver core: set up ownership of class devices in sysfs

Plumb in get_ownership() callback for devices belonging to a class so that
they can be created with uid/gid different from global root. This will
allow network devices in a container to belong to container's root and not
global root.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/core.c    | 9 +++++++++
 include/linux/device.h | 5 +++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index df3e1a44707a..276c7e3f754c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -866,10 +866,19 @@ static const void *device_namespace(struct kobject *kobj)
 	return ns;
 }
 
+static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+	struct device *dev = kobj_to_dev(kobj);
+
+	if (dev->class && dev->class->get_ownership)
+		dev->class->get_ownership(dev, uid, gid);
+}
+
 static struct kobj_type device_ktype = {
 	.release	= device_release,
 	.sysfs_ops	= &dev_sysfs_ops,
 	.namespace	= device_namespace,
+	.get_ownership	= device_get_ownership,
 };
 
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..fe6ccb6dc119 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -384,6 +384,9 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @shutdown_pre: Called at shut-down time before driver shutdown.
  * @ns_type:	Callbacks so sysfs can detemine namespaces.
  * @namespace:	Namespace of the device belongs to this class.
+ * @get_ownership: Allows class to specify uid/gid of the sysfs directories
+ *		for the devices belonging to the class. Usually tied to
+ *		device's namespace.
  * @pm:		The default device power management operations of this class.
  * @p:		The private data of the driver core, no one other than the
  *		driver core can touch this.
@@ -413,6 +416,8 @@ struct class {
 	const struct kobj_ns_type_operations *ns_type;
 	const void *(*namespace)(struct device *dev);
 
+	void (*get_ownership)(struct device *dev, kuid_t *uid, kgid_t *gid);
+
 	const struct dev_pm_ops *pm;
 
 	struct subsys_private *p;
-- 
cgit v1.2.3


From 9bfdc2611d417be453c3deb7a7ef2ffc718febfa Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Wed, 18 Jul 2018 04:10:44 +0200
Subject: vt: selection: take screen contents from uniscr if available

This preserves whatever was written even if we can't currently display the
given glyph.  Mouse paste won't corrupt any character of wcwidth() == 1
anymore.

Note that for now uniscr doesn't get allocated until something reads
/dev/vcsuN for that console, making this code dormant for most users.

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/selection.c | 11 +++++++----
 drivers/tty/vt/vt.c        | 10 ++++++++++
 include/linux/selection.h  |  1 +
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
index 69ca337d3220..07496c711d7d 100644
--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -57,11 +57,13 @@ static inline void highlight_pointer(const int where)
 	complement_pos(sel_cons, where);
 }
 
-static u16
+static u32
 sel_pos(int n)
 {
+	if (use_unicode)
+		return screen_glyph_unicode(sel_cons, n / 2);
 	return inverse_translate(sel_cons, screen_glyph(sel_cons, n),
-				use_unicode);
+				0);
 }
 
 /**
@@ -90,7 +92,8 @@ static u32 inwordLut[]={
   0x07FFFFFE, /* lowercase         */
 };
 
-static inline int inword(const u16 c) {
+static inline int inword(const u32 c)
+{
 	return c > 0x7f || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
 }
 
@@ -167,7 +170,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
 	struct tiocl_selection v;
 	char *bp, *obp;
 	int i, ps, pe, multiplier;
-	u16 c;
+	u32 c;
 	int mode;
 
 	poke_blanked_console();
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 846dfedb657d..7adcb6dcc50c 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4543,6 +4543,16 @@ u16 screen_glyph(struct vc_data *vc, int offset)
 }
 EXPORT_SYMBOL_GPL(screen_glyph);
 
+u32 screen_glyph_unicode(struct vc_data *vc, int n)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr)
+		return uniscr->lines[n / vc->vc_cols][n % vc->vc_cols];
+	return inverse_translate(vc, screen_glyph(vc, n * 2), 1);
+}
+EXPORT_SYMBOL_GPL(screen_glyph_unicode);
+
 /* used by vcs - note the word offset */
 unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
 {
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 067d2e99c79f..a8f5b97b216f 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -32,6 +32,7 @@ extern unsigned char default_blu[];
 
 extern unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed);
 extern u16 screen_glyph(struct vc_data *vc, int offset);
+extern u32 screen_glyph_unicode(struct vc_data *vc, int offset);
 extern void complement_pos(struct vc_data *vc, int offset);
 extern void invert_screen(struct vc_data *vc, int offset, int count, int shift);
 
-- 
cgit v1.2.3


From f4c6fbc96eb187d6432fbba1f9c0c2db598cdf29 Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Wed, 18 Jul 2018 05:03:22 +0200
Subject: vt: drop unused struct vt_struct

Hasn't been ever used within historic (ie, git) times.

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console_struct.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 2c8d3239899b..fea64f2692a0 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -17,7 +17,6 @@
 #include <linux/vt.h>
 #include <linux/workqueue.h>
 
-struct vt_struct;
 struct uni_pagedir;
 struct uni_screen;
 
@@ -150,7 +149,7 @@ struct vc {
 	struct vc_data *d;
 	struct work_struct SAK_work;
 
-	/* might add  scrmem, vt_struct, kbd  at some time,
+	/* might add  scrmem, kbd  at some time,
 	   to have everything in one place - the disadvantage
 	   would be that vc_cons etc can no longer be static */
 };
-- 
cgit v1.2.3


From a2dca217dae29c4ff6420e8c78d56b3f61ae0797 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:18 +0200
Subject: KVM: arm/arm64: vgic: Define GICD_IIDR fields for GICv2 and GIv3

Instead of hardcoding the shifts and masks in the GICD_IIDR register
emulation, let's add the definition of these fields to the GIC header
files and use them.

This will make things more obvious when we're going to bump the revision
in the IIDR when we'll make guest-visible changes to the implementation.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 10 ++++++++++
 include/linux/irqchip/arm-gic.h    | 10 ++++++++++
 virt/kvm/arm/vgic/vgic-mmio-v2.c   |  3 ++-
 virt/kvm/arm/vgic/vgic-mmio-v3.c   |  3 ++-
 4 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbb872c1b607..b22f9dfa61af 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -61,6 +61,16 @@
 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
 #define GICD_CTLR_ENABLE_G1		(1U << 0)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 /*
  * In systems with a single security state (what we emulate in KVM)
  * the meaning of the interrupt group enable bits is slightly different
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 68d8b1f73682..484f5bfa9f3d 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -71,6 +71,16 @@
 					(GICD_INT_DEF_PRI << 8) |\
 					GICD_INT_DEF_PRI)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
 #define GICH_VMCR			0x8
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index ffc587bf4742..af44e569373a 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -37,7 +37,8 @@ static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
 		value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 		break;
 	case GIC_DIST_IIDR:
-		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 	default:
 		return 0;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 287784095b5b..c03f42409b98 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -81,7 +81,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 		}
 		break;
 	case GICD_IIDR:
-		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 	default:
 		return 0;
-- 
cgit v1.2.3


From 87322099052b6a534cecd3d303fc097d4560b7d0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:22 +0200
Subject: KVM: arm/arm64: vgic: Signal IRQs using their configured group

Now when we have a group configuration on the struct IRQ, use this state
when populating the LR and signaling interrupts as either group 0 or
group 1 to the VM.  Depending on the model of the emulated GIC, and the
guest's configuration of the VMCR, interrupts may be signaled as IRQs or
FIQs to the VM.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic.h | 1 +
 virt/kvm/arm/vgic/vgic-v2.c     | 3 +++
 virt/kvm/arm/vgic/vgic-v3.c     | 6 +-----
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 484f5bfa9f3d..6c4aaf04046c 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -104,6 +104,7 @@
 #define GICH_LR_PENDING_BIT		(1 << 28)
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_EOI			(1 << 19)
+#define GICH_LR_GROUP1			(1 << 30)
 #define GICH_LR_HW			(1 << 31)
 
 #define GICH_VMCR_ENABLE_GRP0_SHIFT	0
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index a5f2e44f1c33..df5e6a6e3186 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -159,6 +159,9 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 		}
 	}
 
+	if (irq->group)
+		val |= GICH_LR_GROUP1;
+
 	if (irq->hw) {
 		val |= GICH_LR_HW;
 		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index cdce653e3c47..530b8491c892 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -197,11 +197,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
 		irq->line_level = false;
 
-	/*
-	 * We currently only support Group1 interrupts, which is a
-	 * known defect. This needs to be addressed at some point.
-	 */
-	if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+	if (irq->group)
 		val |= ICH_LR_GROUP;
 
 	val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
-- 
cgit v1.2.3


From 1fb53567a3633740aac8761eb7023dc5671f0edb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 5 May 2017 13:45:14 -0500
Subject: pids: Move task_pid_type into sched/signal.h

The function is general and inline so there is no need
to hide it inside of exit.c

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 8 ++++++++
 kernel/exit.c                | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 113d1ad1ced7..d8ef0a3d2e7e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -556,6 +556,14 @@ extern bool current_is_single_threaded(void);
 typedef int (*proc_visitor)(struct task_struct *p, void *data);
 void walk_process_tree(struct task_struct *top, proc_visitor, void *);
 
+static inline
+struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+{
+	if (type != PIDTYPE_PID)
+		task = task->group_leader;
+	return task->pids[type].pid;
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/kernel/exit.c b/kernel/exit.c
index c3c7ac560114..16432428fc6c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1001,14 +1001,6 @@ struct wait_opts {
 	int			notask_error;
 };
 
-static inline
-struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
-{
-	if (type != PIDTYPE_PID)
-		task = task->group_leader;
-	return task->pids[type].pid;
-}
-
 static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
 {
 	return	wo->wo_type == PIDTYPE_MAX ||
-- 
cgit v1.2.3


From 7a36094d61bfe9843de5484ff0140227983ac5d5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 26 Sep 2017 12:45:33 -0500
Subject: pids: Compute task_tgid using signal->leader_pid

The cost is the the same and this removes the need
to worry about complications that come from de_thread
and group_leader changing.

__task_pid_nr_ns has been updated to take advantage of this change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/ia64/kernel/asm-offsets.c       |  2 +-
 arch/ia64/kernel/fsys.S              |  8 ++++----
 drivers/platform/x86/thinkpad_acpi.c |  1 +
 fs/fuse/file.c                       |  1 +
 fs/notify/fanotify/fanotify.c        |  1 +
 include/linux/sched.h                |  5 -----
 include/linux/sched/signal.h         |  5 +++++
 include/net/scm.h                    |  1 +
 kernel/pid.c                         | 15 ++++++++-------
 9 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index f4db2168d1b8..f5433bb7f04a 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -50,7 +50,6 @@ void foo(void)
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
-	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
 	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
 	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
 	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
@@ -68,6 +67,7 @@ void foo(void)
 	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
 							     group_stop_count));
 	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+	DEFINE(IA64_SIGNAL_LEADER_PID_OFFSET, offsetof (struct signal_struct, leader_pid));
 
 	BLANK();
 
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index fe742ffafc7a..eaf5a0d6f3e0 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -62,16 +62,16 @@ ENTRY(fsys_getpid)
 	.prologue
 	.altrp b6
 	.body
-	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	add r17=IA64_TASK_SIGNAL_OFFSET,r16
 	;;
-	ld8 r17=[r17]				// r17 = current->group_leader
+	ld8 r17=[r17]				// r17 = current->signal
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
+	add r17=IA64_SIGNAL_LEADER_PID_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	ld8 r17=[r17]				// r17 = current->signal->leader_pid
 	;;
 	add r8=IA64_PID_LEVEL_OFFSET,r17
 	;;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index cae9b0595692..d556e95c532c 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -57,6 +57,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a201fb0ac64f..b00a3f126a89 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/swap.h>
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f90842efea13..6e828cb82e5e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -8,6 +8,7 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/sched/user.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/audit.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87bf02d93a27..a461ff89a3af 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,11 +1202,6 @@ static inline struct pid *task_pid(struct task_struct *task)
 	return task->pids[PIDTYPE_PID].pid;
 }
 
-static inline struct pid *task_tgid(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_PID].pid;
-}
-
 /*
  * Without tasklist or RCU lock it is not safe to dereference
  * the result of task_pgrp/task_session even if task == current,
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d8ef0a3d2e7e..b95a272c1ab5 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -564,6 +564,11 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 	return task->pids[type].pid;
 }
 
+static inline struct pid *task_tgid(struct task_struct *task)
+{
+	return task->signal->leader_pid;
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/include/net/scm.h b/include/net/scm.h
index 903771c8d4e3..1ce365f4c256 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -8,6 +8,7 @@
 #include <linux/security.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
+#include <linux/sched/signal.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
diff --git a/kernel/pid.c b/kernel/pid.c
index 157fe4b19971..d0de2b59f86f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -421,13 +421,14 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
 	if (!ns)
 		ns = task_active_pid_ns(current);
 	if (likely(pid_alive(task))) {
-		if (type != PIDTYPE_PID) {
-			if (type == __PIDTYPE_TGID)
-				type = PIDTYPE_PID;
-
-			task = task->group_leader;
-		}
-		nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
+		struct pid *pid;
+		if (type == PIDTYPE_PID)
+			pid = task_pid(task);
+		else if (type == __PIDTYPE_TGID)
+			pid = task_tgid(task);
+		else
+			pid = rcu_dereference(task->group_leader->pids[type].pid);
+		nr = pid_nr_ns(pid, ns);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 2c4704756cab7cfa031ada4dab361562f0e357c0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 26 Sep 2017 13:06:43 -0500
Subject: pids: Move the pgrp and session pid pointers from task_struct to
 signal_struct

To access these fields the code always has to go to group leader so
going to signal struct is no loss and is actually a fundamental simplification.

This saves a little bit of memory by only allocating the pid pointer array
once instead of once for every thread, and even better this removes a
few potential races caused by the fact that group_leader can be changed
by de_thread, while signal_struct can not.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/ia64/kernel/asm-offsets.c |  2 +-
 arch/ia64/kernel/fsys.S        |  4 ++--
 fs/autofs/autofs_i.h           |  1 +
 include/linux/init_task.h      |  9 ---------
 include/linux/pid.h            |  8 +-------
 include/linux/sched.h          | 22 ++++-----------------
 include/linux/sched/signal.h   | 26 +++++++++++++++++++++---
 init/init_task.c               | 11 ++++++-----
 kernel/fork.c                  | 23 ++++++++++++++++-----
 kernel/pid.c                   | 45 +++++++++++++++++++++---------------------
 10 files changed, 78 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index f5433bb7f04a..c1f8a57855af 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -50,7 +50,7 @@ void foo(void)
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
-	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid));
 	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
 	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index eaf5a0d6f3e0..e85ebdac678b 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -96,11 +96,11 @@ ENTRY(fsys_set_tid_address)
 	.altrp b6
 	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
+	add r17=IA64_TASK_THREAD_PID_OFFSET,r16
 	;;
 	ld4 r9=[r9]
 	tnat.z p6,p7=r32		// check argument register for being NaT
-	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
+	ld8 r17=[r17]				// r17 = current->thread_pid
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
 	add r8=IA64_PID_LEVEL_OFFSET,r17
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 9400a9f6318a..502812289850 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/uaccess.h>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a454b8aeb938..a7083a45a26c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -46,15 +46,6 @@ extern struct cred init_cred;
 #define INIT_CPU_TIMERS(s)
 #endif
 
-#define INIT_PID_LINK(type) 					\
-{								\
-	.node = {						\
-		.next = NULL,					\
-		.pprev = NULL,					\
-	},							\
-	.pid = &init_struct_pid,				\
-}
-
 #define INIT_TASK_COMM "swapper"
 
 /* Attach to the init_task data structure for proper alignment */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 7633d55d9a24..3d4c504dcc8c 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -67,12 +67,6 @@ struct pid
 
 extern struct pid init_struct_pid;
 
-struct pid_link
-{
-	struct hlist_node node;
-	struct pid *pid;
-};
-
 static inline struct pid *get_pid(struct pid *pid)
 {
 	if (pid)
@@ -177,7 +171,7 @@ pid_t pid_vnr(struct pid *pid);
 	do {								\
 		if ((pid) != NULL)					\
 			hlist_for_each_entry_rcu((task),		\
-				&(pid)->tasks[type], pids[type].node) {
+				&(pid)->tasks[type], pid_links[type]) {
 
 			/*
 			 * Both old and new leaders may be attached to
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a461ff89a3af..445bdf5b1f64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -775,7 +775,8 @@ struct task_struct {
 	struct list_head		ptrace_entry;
 
 	/* PID/PID hash table linkage. */
-	struct pid_link			pids[PIDTYPE_MAX];
+	struct pid			*thread_pid;
+	struct hlist_node		pid_links[PIDTYPE_MAX];
 	struct list_head		thread_group;
 	struct list_head		thread_node;
 
@@ -1199,22 +1200,7 @@ struct task_struct {
 
 static inline struct pid *task_pid(struct task_struct *task)
 {
-	return task->pids[PIDTYPE_PID].pid;
-}
-
-/*
- * Without tasklist or RCU lock it is not safe to dereference
- * the result of task_pgrp/task_session even if task == current,
- * we can race with another thread doing sys_setsid/sys_setpgid.
- */
-static inline struct pid *task_pgrp(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_PGID].pid;
-}
-
-static inline struct pid *task_session(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_SID].pid;
+	return task->thread_pid;
 }
 
 /*
@@ -1263,7 +1249,7 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
  */
 static inline int pid_alive(const struct task_struct *p)
 {
-	return p->pids[PIDTYPE_PID].pid != NULL;
+	return p->thread_pid != NULL;
 }
 
 static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b95a272c1ab5..2dcded16eb1e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -146,7 +146,9 @@ struct signal_struct {
 
 #endif
 
+	/* PID/PID hash table linkage. */
 	struct pid *leader_pid;
+	struct pid *pids[PIDTYPE_MAX];
 
 #ifdef CONFIG_NO_HZ_FULL
 	atomic_t tick_dep_mask;
@@ -559,9 +561,12 @@ void walk_process_tree(struct task_struct *top, proc_visitor, void *);
 static inline
 struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 {
-	if (type != PIDTYPE_PID)
-		task = task->group_leader;
-	return task->pids[type].pid;
+	struct pid *pid;
+	if (type == PIDTYPE_PID)
+		pid = task_pid(task);
+	else
+		pid = task->signal->pids[type];
+	return pid;
 }
 
 static inline struct pid *task_tgid(struct task_struct *task)
@@ -569,6 +574,21 @@ static inline struct pid *task_tgid(struct task_struct *task)
 	return task->signal->leader_pid;
 }
 
+/*
+ * Without tasklist or RCU lock it is not safe to dereference
+ * the result of task_pgrp/task_session even if task == current,
+ * we can race with another thread doing sys_setsid/sys_setpgid.
+ */
+static inline struct pid *task_pgrp(struct task_struct *task)
+{
+	return task->signal->pids[PIDTYPE_PGID];
+}
+
+static inline struct pid *task_session(struct task_struct *task)
+{
+	return task->signal->pids[PIDTYPE_SID];
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/init/init_task.c b/init/init_task.c
index 7914ffb8dc73..db12a61259f1 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -34,6 +34,11 @@ static struct signal_struct init_signals = {
 #endif
 	INIT_CPU_TIMERS(init_signals)
 	.leader_pid = &init_struct_pid,
+	.pids = {
+		[PIDTYPE_PID]	= &init_struct_pid,
+		[PIDTYPE_PGID]	= &init_struct_pid,
+		[PIDTYPE_SID]	= &init_struct_pid,
+	},
 	INIT_PREV_CPUTIME(init_signals)
 };
 
@@ -112,11 +117,7 @@ struct task_struct init_task
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
 	.timer_slack_ns = 50000, /* 50 usec default slack */
-	.pids = {
-		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),
-		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),
-		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),
-	},
+	.thread_pid	= &init_struct_pid,
 	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDITSYSCALL
diff --git a/kernel/fork.c b/kernel/fork.c
index 9440d61b925c..d2952162399b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1549,10 +1549,22 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
 static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
 #endif
 
+static inline void init_task_pid_links(struct task_struct *task)
+{
+	enum pid_type type;
+
+	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+		INIT_HLIST_NODE(&task->pid_links[type]);
+	}
+}
+
 static inline void
 init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
 {
-	 task->pids[type].pid = pid;
+	if (type == PIDTYPE_PID)
+		task->thread_pid = pid;
+	else
+		task->signal->pids[type] = pid;
 }
 
 static inline void rcu_copy_process(struct task_struct *p)
@@ -1928,6 +1940,7 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cancel_cgroup;
 	}
 
+	init_task_pid_links(p);
 	if (likely(p->pid)) {
 		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 
@@ -2036,13 +2049,13 @@ fork_out:
 	return ERR_PTR(retval);
 }
 
-static inline void init_idle_pids(struct pid_link *links)
+static inline void init_idle_pids(struct task_struct *idle)
 {
 	enum pid_type type;
 
 	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
-		INIT_HLIST_NODE(&links[type].node); /* not really needed */
-		links[type].pid = &init_struct_pid;
+		INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
+		init_task_pid(idle, type, &init_struct_pid);
 	}
 }
 
@@ -2052,7 +2065,7 @@ struct task_struct *fork_idle(int cpu)
 	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
 			    cpu_to_node(cpu));
 	if (!IS_ERR(task)) {
-		init_idle_pids(task->pids);
+		init_idle_pids(task);
 		init_idle(task, cpu);
 	}
 
diff --git a/kernel/pid.c b/kernel/pid.c
index d0de2b59f86f..f8486d2e2346 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -265,27 +265,35 @@ struct pid *find_vpid(int nr)
 }
 EXPORT_SYMBOL_GPL(find_vpid);
 
+static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
+{
+	return (type == PIDTYPE_PID) ?
+		&task->thread_pid :
+		(type == __PIDTYPE_TGID) ?
+		&task->signal->leader_pid :
+		&task->signal->pids[type];
+}
+
 /*
  * attach_pid() must be called with the tasklist_lock write-held.
  */
 void attach_pid(struct task_struct *task, enum pid_type type)
 {
-	struct pid_link *link = &task->pids[type];
-	hlist_add_head_rcu(&link->node, &link->pid->tasks[type]);
+	struct pid *pid = *task_pid_ptr(task, type);
+	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
 }
 
 static void __change_pid(struct task_struct *task, enum pid_type type,
 			struct pid *new)
 {
-	struct pid_link *link;
+	struct pid **pid_ptr = task_pid_ptr(task, type);
 	struct pid *pid;
 	int tmp;
 
-	link = &task->pids[type];
-	pid = link->pid;
+	pid = *pid_ptr;
 
-	hlist_del_rcu(&link->node);
-	link->pid = new;
+	hlist_del_rcu(&task->pid_links[type]);
+	*pid_ptr = new;
 
 	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
 		if (!hlist_empty(&pid->tasks[tmp]))
@@ -310,8 +318,9 @@ void change_pid(struct task_struct *task, enum pid_type type,
 void transfer_pid(struct task_struct *old, struct task_struct *new,
 			   enum pid_type type)
 {
-	new->pids[type].pid = old->pids[type].pid;
-	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
+	if (type == PIDTYPE_PID)
+		new->thread_pid = old->thread_pid;
+	hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
 }
 
 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
@@ -322,7 +331,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
 		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
 					      lockdep_tasklist_lock_is_held());
 		if (first)
-			result = hlist_entry(first, struct task_struct, pids[(type)].node);
+			result = hlist_entry(first, struct task_struct, pid_links[(type)]);
 	}
 	return result;
 }
@@ -360,9 +369,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
 	struct pid *pid;
 	rcu_read_lock();
-	if (type != PIDTYPE_PID)
-		task = task->group_leader;
-	pid = get_pid(rcu_dereference(task->pids[type].pid));
+	pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
 	rcu_read_unlock();
 	return pid;
 }
@@ -420,16 +427,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
 	rcu_read_lock();
 	if (!ns)
 		ns = task_active_pid_ns(current);
-	if (likely(pid_alive(task))) {
-		struct pid *pid;
-		if (type == PIDTYPE_PID)
-			pid = task_pid(task);
-		else if (type == __PIDTYPE_TGID)
-			pid = task_tgid(task);
-		else
-			pid = rcu_dereference(task->group_leader->pids[type].pid);
-		nr = pid_nr_ns(pid, ns);
-	}
+	if (likely(pid_alive(task)))
+		nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
 	rcu_read_unlock();
 
 	return nr;
-- 
cgit v1.2.3


From 6883f81aac6f44e7df70a6af189b3689ff52cbfb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 4 Jun 2017 04:32:13 -0500
Subject: pid: Implement PIDTYPE_TGID

Everywhere except in the pid array we distinguish between a tasks pid and
a tasks tgid (thread group id).  Even in the enumeration we want that
distinction sometimes so we have added __PIDTYPE_TGID.  With leader_pid
we almost have an implementation of PIDTYPE_TGID in struct signal_struct.

Add PIDTYPE_TGID as a first class member of the pid_type enumeration and
into the pids array.  Then remove the __PIDTYPE_TGID special case and the
leader_pid in signal_struct.

The net size increase is just an extra pointer added to struct pid and
an extra pair of pointers of an hlist_node added to task_struct.

The effect on code maintenance is the removal of a number of special
cases today and the potential to remove many more special cases as
PIDTYPE_TGID gets used to it's fullest.  The long term potential
is allowing zombie thread group leaders to exit, which will remove
a lot more special cases in the code.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/ia64/kernel/asm-offsets.c  | 2 +-
 arch/ia64/kernel/fsys.S         | 4 ++--
 arch/s390/kernel/perf_cpum_sf.c | 2 +-
 fs/exec.c                       | 1 +
 include/linux/pid.h             | 3 +--
 include/linux/sched.h           | 4 ++--
 include/linux/sched/signal.h    | 5 ++---
 init/init_task.c                | 2 +-
 kernel/events/core.c            | 2 +-
 kernel/exit.c                   | 1 +
 kernel/fork.c                   | 3 ++-
 kernel/pid.c                    | 2 --
 kernel/time/itimer.c            | 5 +++--
 kernel/time/posix-cpu-timers.c  | 2 +-
 14 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index c1f8a57855af..00e8e2a1eb19 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -67,7 +67,7 @@ void foo(void)
 	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
 							     group_stop_count));
 	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
-	DEFINE(IA64_SIGNAL_LEADER_PID_OFFSET, offsetof (struct signal_struct, leader_pid));
+	DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID]));
 
 	BLANK();
 
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index e85ebdac678b..d80c99a5f55d 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -68,10 +68,10 @@ ENTRY(fsys_getpid)
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r17=IA64_SIGNAL_LEADER_PID_OFFSET,r17
+	add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld8 r17=[r17]				// r17 = current->signal->leader_pid
+	ld8 r17=[r17]				// r17 = current->signal->pids[PIDTYPE_TGID]
 	;;
 	add r8=IA64_PID_LEVEL_OFFSET,r17
 	;;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 0292d68e7dde..ca0b7ae894bb 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -665,7 +665,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,
 		goto out;
 
 	/* Update the process ID (see also kernel/events/core.c) */
-	data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+	data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
 	data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
 
 	perf_output_sample(&handle, &header, data, event);
diff --git a/fs/exec.c b/fs/exec.c
index 2d4e0075bd24..79a11fbded7a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1146,6 +1146,7 @@ static int de_thread(struct task_struct *tsk)
 		 */
 		tsk->pid = leader->pid;
 		change_pid(tsk, PIDTYPE_PID, task_pid(leader));
+		transfer_pid(leader, tsk, PIDTYPE_TGID);
 		transfer_pid(leader, tsk, PIDTYPE_PGID);
 		transfer_pid(leader, tsk, PIDTYPE_SID);
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 3d4c504dcc8c..14a9a39da9c7 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -7,11 +7,10 @@
 enum pid_type
 {
 	PIDTYPE_PID,
+	PIDTYPE_TGID,
 	PIDTYPE_PGID,
 	PIDTYPE_SID,
 	PIDTYPE_MAX,
-	/* only valid to __task_pid_nr_ns() */
-	__PIDTYPE_TGID
 };
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 445bdf5b1f64..06b4e3bda93a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1275,12 +1275,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
 
 static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
-	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
+	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
 }
 
 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
 {
-	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
+	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
 }
 
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2dcded16eb1e..ee30a5ba475f 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -147,7 +147,6 @@ struct signal_struct {
 #endif
 
 	/* PID/PID hash table linkage. */
-	struct pid *leader_pid;
 	struct pid *pids[PIDTYPE_MAX];
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -571,7 +570,7 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 
 static inline struct pid *task_tgid(struct task_struct *task)
 {
-	return task->signal->leader_pid;
+	return task->signal->pids[PIDTYPE_TGID];
 }
 
 /*
@@ -607,7 +606,7 @@ static inline bool thread_group_leader(struct task_struct *p)
  */
 static inline bool has_group_leader_pid(struct task_struct *p)
 {
-	return task_pid(p) == p->signal->leader_pid;
+	return task_pid(p) == task_tgid(p);
 }
 
 static inline
diff --git a/init/init_task.c b/init/init_task.c
index db12a61259f1..4f97846256d7 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -33,9 +33,9 @@ static struct signal_struct init_signals = {
 	},
 #endif
 	INIT_CPU_TIMERS(init_signals)
-	.leader_pid = &init_struct_pid,
 	.pids = {
 		[PIDTYPE_PID]	= &init_struct_pid,
+		[PIDTYPE_TGID]	= &init_struct_pid,
 		[PIDTYPE_PGID]	= &init_struct_pid,
 		[PIDTYPE_SID]	= &init_struct_pid,
 	},
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80cca2b30c4f..9025b1796ca8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1334,7 +1334,7 @@ static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
 
 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
 {
-	return perf_event_pid_type(event, p, __PIDTYPE_TGID);
+	return perf_event_pid_type(event, p, PIDTYPE_TGID);
 }
 
 static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
diff --git a/kernel/exit.c b/kernel/exit.c
index 16432428fc6c..25582b442955 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -73,6 +73,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 	nr_threads--;
 	detach_pid(p, PIDTYPE_PID);
 	if (group_dead) {
+		detach_pid(p, PIDTYPE_TGID);
 		detach_pid(p, PIDTYPE_PGID);
 		detach_pid(p, PIDTYPE_SID);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index d2952162399b..cc5be0d01ce6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1946,6 +1946,7 @@ static __latent_entropy struct task_struct *copy_process(
 
 		init_task_pid(p, PIDTYPE_PID, pid);
 		if (thread_group_leader(p)) {
+			init_task_pid(p, PIDTYPE_TGID, pid);
 			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
 			init_task_pid(p, PIDTYPE_SID, task_session(current));
 
@@ -1954,7 +1955,6 @@ static __latent_entropy struct task_struct *copy_process(
 				p->signal->flags |= SIGNAL_UNKILLABLE;
 			}
 
-			p->signal->leader_pid = pid;
 			p->signal->tty = tty_kref_get(current->signal->tty);
 			/*
 			 * Inherit has_child_subreaper flag under the same
@@ -1965,6 +1965,7 @@ static __latent_entropy struct task_struct *copy_process(
 							 p->real_parent->signal->is_child_subreaper;
 			list_add_tail(&p->sibling, &p->real_parent->children);
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
+			attach_pid(p, PIDTYPE_TGID);
 			attach_pid(p, PIDTYPE_PGID);
 			attach_pid(p, PIDTYPE_SID);
 			__this_cpu_inc(process_counts);
diff --git a/kernel/pid.c b/kernel/pid.c
index f8486d2e2346..de1cfc4f75a2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -269,8 +269,6 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
 {
 	return (type == PIDTYPE_PID) ?
 		&task->thread_pid :
-		(type == __PIDTYPE_TGID) ?
-		&task->signal->leader_pid :
 		&task->signal->pids[type];
 }
 
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index f26acef5d7b4..9a65713c8309 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -139,9 +139,10 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 {
 	struct signal_struct *sig =
 		container_of(timer, struct signal_struct, real_timer);
+	struct pid *leader_pid = sig->pids[PIDTYPE_TGID];
 
-	trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
-	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
+	trace_itimer_expire(ITIMER_REAL, leader_pid, 0);
+	kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5a6251ac6f7a..40e6fae46cec 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -895,7 +895,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 
 		trace_itimer_expire(signo == SIGPROF ?
 				    ITIMER_PROF : ITIMER_VIRTUAL,
-				    tsk->signal->leader_pid, cur_time);
+				    task_tgid(tsk), cur_time);
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 
-- 
cgit v1.2.3


From 24122c7f4969adeeaeca3fb1656a31569e9aa59b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 20 Jul 2018 14:30:23 -0500
Subject: signal: Pass pid and pid type into send_sigqueue

Make the code more maintainable by performing more of the signal
related work in send_sigqueue.

A quick inspection of do_timer_create will show that this code path
does not lookup a thread group by a thread's pid.  Making it safe
to find the task pointed to by it_pid with "pid_task(it_pid, type)";

This supports the changes needed in fork to tell if a signal was sent
to a single process or a group of processes.

Having the pid to task transition in signal.c will also make it easier
to sort out races with de_thread and and the thread group leader
exiting when it comes time to address that.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  2 +-
 kernel/signal.c              | 14 +++++++++-----
 kernel/time/posix-timers.c   | 13 ++++---------
 3 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ee30a5ba475f..94558ffa82ab 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -330,7 +330,7 @@ extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
+extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
 static inline int restart_syscall(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index 8d8a940422a8..40feb14e276d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1664,17 +1664,20 @@ void sigqueue_free(struct sigqueue *q)
 		__sigqueue_free(q);
 }
 
-int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
+int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
 {
 	int sig = q->info.si_signo;
 	struct sigpending *pending;
+	struct task_struct *t;
 	unsigned long flags;
 	int ret, result;
 
 	BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
 
 	ret = -1;
-	if (!likely(lock_task_sighand(t, &flags)))
+	rcu_read_lock();
+	t = pid_task(pid, type);
+	if (!t || !likely(lock_task_sighand(t, &flags)))
 		goto ret;
 
 	ret = 1; /* the signal is ignored */
@@ -1696,15 +1699,16 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
 	q->info.si_overrun = 0;
 
 	signalfd_notify(t, sig);
-	pending = group ? &t->signal->shared_pending : &t->pending;
+	pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
 	list_add_tail(&q->list, &pending->list);
 	sigaddset(&pending->signal, sig);
-	complete_signal(sig, t, group);
+	complete_signal(sig, t, type != PIDTYPE_PID);
 	result = TRACE_SIGNAL_DELIVERED;
 out:
-	trace_signal_generate(sig, &q->info, t, group, result);
+	trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
 	unlock_task_sighand(t, &flags);
 ret:
+	rcu_read_unlock();
 	return ret;
 }
 
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 2bdf08a2bae9..2d2e739fbc57 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -332,8 +332,8 @@ void posixtimer_rearm(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
-	struct task_struct *task;
-	int shared, ret = -1;
+	enum pid_type type;
+	int ret = -1;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->posixtimer_rearm().
@@ -347,13 +347,8 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	 */
 	timr->sigq->info.si_sys_private = si_private;
 
-	rcu_read_lock();
-	task = pid_task(timr->it_pid, PIDTYPE_PID);
-	if (task) {
-		shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
-		ret = send_sigqueue(timr->sigq, task, shared);
-	}
-	rcu_read_unlock();
+	type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
+	ret = send_sigqueue(timr->sigq, timr->it_pid, type);
 	/* If we failed to send the signal the timer stops. */
 	return ret > 0;
 }
-- 
cgit v1.2.3


From 0102498083d58d8b17759642c602b525215e1a54 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 13 Jul 2018 18:40:57 -0500
Subject: signal: Pass pid type into group_send_sig_info

This passes the information we already have at the call sight
into group_send_sig_info.  Ultimatelly allowing for to better handle
signals sent to a group of processes.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal.h |  4 +++-
 kernel/exit.c          |  3 ++-
 kernel/signal.c        | 10 ++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3c5200137b24..d8f2bf3d41e6 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -254,11 +254,13 @@ static inline int valid_signal(unsigned long sig)
 
 struct timespec;
 struct pt_regs;
+enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
-extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
+extern int group_send_sig_info(int sig, struct siginfo *info,
+			       struct task_struct *p, enum pid_type type);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(sigset_t *);
diff --git a/kernel/exit.c b/kernel/exit.c
index 25582b442955..0e21e6d21f35 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -681,7 +681,8 @@ static void forget_original_parent(struct task_struct *father,
 				t->parent = t->real_parent;
 			if (t->pdeath_signal)
 				group_send_sig_info(t->pdeath_signal,
-						    SEND_SIG_NOINFO, t);
+						    SEND_SIG_NOINFO, t,
+						    PIDTYPE_TGID);
 		}
 		/*
 		 * If this is a threaded reparent there is no need to
diff --git a/kernel/signal.c b/kernel/signal.c
index 40feb14e276d..c7527338fe9d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1274,7 +1274,8 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 /*
  * send signal info to all the members of a group
  */
-int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+			enum pid_type type)
 {
 	int ret;
 
@@ -1301,7 +1302,7 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 	success = 0;
 	retval = -ESRCH;
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
-		int err = group_send_sig_info(sig, info, p);
+		int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
 		success |= !err;
 		retval = err;
 	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
@@ -1317,7 +1318,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 		rcu_read_lock();
 		p = pid_task(pid, PIDTYPE_PID);
 		if (p)
-			error = group_send_sig_info(sig, info, p);
+			error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
 		rcu_read_unlock();
 		if (likely(!p || error != -ESRCH))
 			return error;
@@ -1420,7 +1421,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
 		for_each_process(p) {
 			if (task_pid_vnr(p) > 1 &&
 					!same_thread_group(p, current)) {
-				int err = group_send_sig_info(sig, info, p);
+				int err = group_send_sig_info(sig, info, p,
+							      PIDTYPE_MAX);
 				++count;
 				if (err != -EPERM)
 					retval = err;
-- 
cgit v1.2.3


From 40b3b02535621027f56d248139e0e467573c3098 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 21 Jul 2018 10:45:15 -0500
Subject: signal: Pass pid type into do_send_sig_info

This passes the information we already have at the call sight into
do_send_sig_info.  Ultimately allowing for better handling of signals
sent to a group of processes during fork.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 drivers/tty/sysrq.c    |  2 +-
 fs/fcntl.c             |  6 +++---
 include/linux/signal.h |  2 +-
 kernel/signal.c        | 10 +++++-----
 mm/oom_kill.c          |  4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 6364890575ec..06ed20dd01ba 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -348,7 +348,7 @@ static void send_sig_all(int sig)
 		if (is_global_init(p))
 			continue;
 
-		do_send_sig_info(sig, SEND_SIG_FORCED, p, true);
+		do_send_sig_info(sig, SEND_SIG_FORCED, p, PIDTYPE_MAX);
 	}
 	read_unlock(&tasklist_lock);
 }
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 5d596a00f40b..a04accf6847f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -767,11 +767,11 @@ static void send_sigio_to_task(struct task_struct *p,
 			else
 				si.si_band = mangle_poll(band_table[reason - POLL_IN]);
 			si.si_fd    = fd;
-			if (!do_send_sig_info(signum, &si, p, type != PIDTYPE_PID))
+			if (!do_send_sig_info(signum, &si, p, type))
 				break;
 		/* fall-through: fall back on the old plain SIGIO signal */
 		case 0:
-			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type != PIDTYPE_PID);
+			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
 	}
 }
 
@@ -808,7 +808,7 @@ static void send_sigurg_to_task(struct task_struct *p,
 				struct fown_struct *fown, enum pid_type type)
 {
 	if (sigio_perm(p, fown, SIGURG))
-		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type != PIDTYPE_PID);
+		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
 }
 
 int send_sigurg(struct fown_struct *fown)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index d8f2bf3d41e6..fe125b0335f7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -258,7 +258,7 @@ enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
-				struct task_struct *p, bool group);
+				struct task_struct *p, enum pid_type type);
 extern int group_send_sig_info(int sig, struct siginfo *info,
 			       struct task_struct *p, enum pid_type type);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
diff --git a/kernel/signal.c b/kernel/signal.c
index c7527338fe9d..2c09e6143dd8 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1161,13 +1161,13 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 }
 
 int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
-			bool group)
+			enum pid_type type)
 {
 	unsigned long flags;
 	int ret = -ESRCH;
 
 	if (lock_task_sighand(p, &flags)) {
-		ret = send_signal(sig, info, p, group);
+		ret = send_signal(sig, info, p, type != PIDTYPE_PID);
 		unlock_task_sighand(p, &flags);
 	}
 
@@ -1284,7 +1284,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
 	rcu_read_unlock();
 
 	if (!ret && sig)
-		ret = do_send_sig_info(sig, info, p, true);
+		ret = do_send_sig_info(sig, info, p, type);
 
 	return ret;
 }
@@ -1448,7 +1448,7 @@ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	if (!valid_signal(sig))
 		return -EINVAL;
 
-	return do_send_sig_info(sig, info, p, false);
+	return do_send_sig_info(sig, info, p, PIDTYPE_PID);
 }
 
 #define __si_special(priv) \
@@ -3199,7 +3199,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 		 * probe.  No signal is actually delivered.
 		 */
 		if (!error && sig) {
-			error = do_send_sig_info(sig, info, p, false);
+			error = do_send_sig_info(sig, info, p, PIDTYPE_PID);
 			/*
 			 * If lock_task_sighand() failed we pretend the task
 			 * dies after receiving the signal. The window is tiny,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 84081e77bc51..2cc9b238368f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -920,7 +920,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	 * in order to prevent the OOM victim from depleting the memory
 	 * reserves from the user space under its control.
 	 */
-	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
+	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, PIDTYPE_TGID);
 	mark_oom_victim(victim);
 	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
 		task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
@@ -958,7 +958,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 		 */
 		if (unlikely(p->flags & PF_KTHREAD))
 			continue;
-		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
+		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, PIDTYPE_TGID);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From a3134fb09e0bc5bee76e13bf863173b86f21cf87 Mon Sep 17 00:00:00 2001
From: Rishabh Bhatnagar <rishabhb@codeaurora.org>
Date: Wed, 23 May 2018 17:35:21 -0700
Subject: drivers: soc: Add LLCC driver

LLCC (Last Level Cache Controller) provides additional cache memory
in the system. LLCC is partitioned into multiple slices and each
slice gets its own priority, size, ID and other config parameters.
LLCC driver programs these parameters for each slice. Clients that
are assigned to use LLCC need to get information such size & ID of the
slice they get and activate or deactivate the slice as needed. LLCC driver
provides API for the clients to perform these operations.

Signed-off-by: Channagoud Kadabi <ckadabi@codeaurora.org>
Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/Kconfig           |  17 ++
 drivers/soc/qcom/Makefile          |   2 +
 drivers/soc/qcom/llcc-sdm845.c     |  94 +++++++++++
 drivers/soc/qcom/llcc-slice.c      | 335 +++++++++++++++++++++++++++++++++++++
 include/linux/soc/qcom/llcc-qcom.h | 180 ++++++++++++++++++++
 5 files changed, 628 insertions(+)
 create mode 100644 drivers/soc/qcom/llcc-sdm845.c
 create mode 100644 drivers/soc/qcom/llcc-slice.c
 create mode 100644 include/linux/soc/qcom/llcc-qcom.h

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 9dc02f390ba3..d1a41852ae7a 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -39,6 +39,23 @@ config QCOM_GSBI
           functions for connecting the underlying serial UART, SPI, and I2C
           devices to the output pins.
 
+config QCOM_LLCC
+	tristate "Qualcomm Technologies, Inc. LLCC driver"
+	depends on ARCH_QCOM
+	help
+	  Qualcomm Technologies, Inc. platform specific
+	  Last Level Cache Controller(LLCC) driver. This provides interfaces
+	  to clients that use the LLCC. Say yes here to enable LLCC slice
+	  driver.
+
+config QCOM_SDM845_LLCC
+	tristate "Qualcomm Technologies, Inc. SDM845 LLCC driver"
+	depends on QCOM_LLCC
+	help
+	  Say yes here to enable the LLCC driver for SDM845. This provides
+	  data required to configure LLCC so that clients can start using the
+	  LLCC slices.
+
 config QCOM_MDT_LOADER
 	tristate
 	select QCOM_SCM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 19dcf957cb3a..5921454c4ddd 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -15,3 +15,5 @@ obj-$(CONFIG_QCOM_SMP2P)	+= smp2p.o
 obj-$(CONFIG_QCOM_SMSM)	+= smsm.o
 obj-$(CONFIG_QCOM_WCNSS_CTRL) += wcnss_ctrl.o
 obj-$(CONFIG_QCOM_APR) += apr.o
+obj-$(CONFIG_QCOM_LLCC) += llcc-slice.o
+obj-$(CONFIG_QCOM_SDM845_LLCC) += llcc-sdm845.o
diff --git a/drivers/soc/qcom/llcc-sdm845.c b/drivers/soc/qcom/llcc-sdm845.c
new file mode 100644
index 000000000000..2e1e4f0a5db8
--- /dev/null
+++ b/drivers/soc/qcom/llcc-sdm845.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+
+/*
+ * SCT(System Cache Table) entry contains of the following members:
+ * usecase_id: Unique id for the client's use case
+ * slice_id: llcc slice id for each client
+ * max_cap: The maximum capacity of the cache slice provided in KB
+ * priority: Priority of the client used to select victim line for replacement
+ * fixed_size: Boolean indicating if the slice has a fixed capacity
+ * bonus_ways: Bonus ways are additional ways to be used for any slice,
+ *		if client ends up using more than reserved cache ways. Bonus
+ *		ways are allocated only if they are not reserved for some
+ *		other client.
+ * res_ways: Reserved ways for the cache slice, the reserved ways cannot
+ *		be used by any other client than the one its assigned to.
+ * cache_mode: Each slice operates as a cache, this controls the mode of the
+ *             slice: normal or TCM(Tightly Coupled Memory)
+ * probe_target_ways: Determines what ways to probe for access hit. When
+ *                    configured to 1 only bonus and reserved ways are probed.
+ *                    When configured to 0 all ways in llcc are probed.
+ * dis_cap_alloc: Disable capacity based allocation for a client
+ * retain_on_pc: If this bit is set and client has maintained active vote
+ *               then the ways assigned to this client are not flushed on power
+ *               collapse.
+ * activate_on_init: Activate the slice immediately after the SCT is programmed
+ */
+#define SCT_ENTRY(uid, sid, mc, p, fs, bway, rway, cmod, ptw, dca, rp, a) \
+	{					\
+		.usecase_id = uid,		\
+		.slice_id = sid,		\
+		.max_cap = mc,			\
+		.priority = p,			\
+		.fixed_size = fs,		\
+		.bonus_ways = bway,		\
+		.res_ways = rway,		\
+		.cache_mode = cmod,		\
+		.probe_target_ways = ptw,	\
+		.dis_cap_alloc = dca,		\
+		.retain_on_pc = rp,		\
+		.activate_on_init = a,		\
+	}
+
+static struct llcc_slice_config sdm845_data[] =  {
+	SCT_ENTRY(LLCC_CPUSS,    1,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 1),
+	SCT_ENTRY(LLCC_VIDSC0,   2,  512,  2, 1, 0x0,   0x0f0, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_VIDSC1,   3,  512,  2, 1, 0x0,   0x0f0, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_ROTATOR,  4,  563,  2, 1, 0x0,   0x00e, 2, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_VOICE,    5,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_AUDIO,    6,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDMHPGRW, 7,  1024, 2, 0, 0xfc,  0xf00, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDM,      8,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_CMPT,     10, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_GPUHTW,   11, 512,  1, 1, 0xc,   0x0,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_GPU,      12, 2304, 1, 0, 0xff0, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MMUHWT,   13, 256,  2, 0, 0x0,   0x1,   0, 0, 1, 0, 1),
+	SCT_ENTRY(LLCC_CMPTDMA,  15, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_DISP,     16, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_VIDFW,    17, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDMHPFX,  20, 1024, 2, 1, 0x0,   0xf00, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDMPNG,   21, 1024, 0, 1, 0x1e,  0x0,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_AUDHW,    22, 1024, 1, 1, 0xffc, 0x2,   0, 0, 1, 1, 0),
+};
+
+static int sdm845_qcom_llcc_probe(struct platform_device *pdev)
+{
+	return qcom_llcc_probe(pdev, sdm845_data, ARRAY_SIZE(sdm845_data));
+}
+
+static const struct of_device_id sdm845_qcom_llcc_of_match[] = {
+	{ .compatible = "qcom,sdm845-llcc", },
+	{ }
+};
+
+static struct platform_driver sdm845_qcom_llcc_driver = {
+	.driver = {
+		.name = "sdm845-llcc",
+		.of_match_table = sdm845_qcom_llcc_of_match,
+	},
+	.probe = sdm845_qcom_llcc_probe,
+};
+module_platform_driver(sdm845_qcom_llcc_driver);
+
+MODULE_DESCRIPTION("QCOM sdm845 LLCC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/soc/qcom/llcc-slice.c b/drivers/soc/qcom/llcc-slice.c
new file mode 100644
index 000000000000..fcaad1a4b41d
--- /dev/null
+++ b/drivers/soc/qcom/llcc-slice.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ *
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+
+#define ACTIVATE                      BIT(0)
+#define DEACTIVATE                    BIT(1)
+#define ACT_CTRL_OPCODE_ACTIVATE      BIT(0)
+#define ACT_CTRL_OPCODE_DEACTIVATE    BIT(1)
+#define ACT_CTRL_ACT_TRIG             BIT(0)
+#define ACT_CTRL_OPCODE_SHIFT         0x01
+#define ATTR1_PROBE_TARGET_WAYS_SHIFT 0x02
+#define ATTR1_FIXED_SIZE_SHIFT        0x03
+#define ATTR1_PRIORITY_SHIFT          0x04
+#define ATTR1_MAX_CAP_SHIFT           0x10
+#define ATTR0_RES_WAYS_MASK           GENMASK(11, 0)
+#define ATTR0_BONUS_WAYS_MASK         GENMASK(27, 16)
+#define ATTR0_BONUS_WAYS_SHIFT        0x10
+#define LLCC_STATUS_READ_DELAY        100
+
+#define CACHE_LINE_SIZE_SHIFT         6
+
+#define LLCC_COMMON_STATUS0           0x0003000c
+#define LLCC_LB_CNT_MASK              GENMASK(31, 28)
+#define LLCC_LB_CNT_SHIFT             28
+
+#define MAX_CAP_TO_BYTES(n)           (n * SZ_1K)
+#define LLCC_TRP_ACT_CTRLn(n)         (n * SZ_4K)
+#define LLCC_TRP_STATUSn(n)           (4 + n * SZ_4K)
+#define LLCC_TRP_ATTR0_CFGn(n)        (0x21000 + SZ_8 * n)
+#define LLCC_TRP_ATTR1_CFGn(n)        (0x21004 + SZ_8 * n)
+
+#define BANK_OFFSET_STRIDE	      0x80000
+
+static struct llcc_drv_data *drv_data;
+
+static const struct regmap_config llcc_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.fast_io = true,
+};
+
+/**
+ * llcc_slice_getd - get llcc slice descriptor
+ * @uid: usecase_id for the client
+ *
+ * A pointer to llcc slice descriptor will be returned on success and
+ * and error pointer is returned on failure
+ */
+struct llcc_slice_desc *llcc_slice_getd(u32 uid)
+{
+	const struct llcc_slice_config *cfg;
+	struct llcc_slice_desc *desc;
+	u32 sz, count;
+
+	cfg = drv_data->cfg;
+	sz = drv_data->cfg_size;
+
+	for (count = 0; cfg && count < sz; count++, cfg++)
+		if (cfg->usecase_id == uid)
+			break;
+
+	if (count == sz || !cfg)
+		return ERR_PTR(-ENODEV);
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return ERR_PTR(-ENOMEM);
+
+	desc->slice_id = cfg->slice_id;
+	desc->slice_size = cfg->max_cap;
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_getd);
+
+/**
+ * llcc_slice_putd - llcc slice descritpor
+ * @desc: Pointer to llcc slice descriptor
+ */
+void llcc_slice_putd(struct llcc_slice_desc *desc)
+{
+	kfree(desc);
+}
+EXPORT_SYMBOL_GPL(llcc_slice_putd);
+
+static int llcc_update_act_ctrl(u32 sid,
+				u32 act_ctrl_reg_val, u32 status)
+{
+	u32 act_ctrl_reg;
+	u32 status_reg;
+	u32 slice_status;
+	int ret;
+
+	act_ctrl_reg = drv_data->bcast_off + LLCC_TRP_ACT_CTRLn(sid);
+	status_reg = drv_data->bcast_off + LLCC_TRP_STATUSn(sid);
+
+	/* Set the ACTIVE trigger */
+	act_ctrl_reg_val |= ACT_CTRL_ACT_TRIG;
+	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	if (ret)
+		return ret;
+
+	/* Clear the ACTIVE trigger */
+	act_ctrl_reg_val &= ~ACT_CTRL_ACT_TRIG;
+	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	if (ret)
+		return ret;
+
+	ret = regmap_read_poll_timeout(drv_data->regmap, status_reg,
+				      slice_status, !(slice_status & status),
+				      0, LLCC_STATUS_READ_DELAY);
+	return ret;
+}
+
+/**
+ * llcc_slice_activate - Activate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value of zero will be returned on success and a negative errno will
+ * be returned in error cases
+ */
+int llcc_slice_activate(struct llcc_slice_desc *desc)
+{
+	int ret;
+	u32 act_ctrl_val;
+
+	mutex_lock(&drv_data->lock);
+	if (test_bit(desc->slice_id, drv_data->bitmap)) {
+		mutex_unlock(&drv_data->lock);
+		return 0;
+	}
+
+	act_ctrl_val = ACT_CTRL_OPCODE_ACTIVATE << ACT_CTRL_OPCODE_SHIFT;
+
+	ret = llcc_update_act_ctrl(desc->slice_id, act_ctrl_val,
+				  DEACTIVATE);
+	if (ret) {
+		mutex_unlock(&drv_data->lock);
+		return ret;
+	}
+
+	__set_bit(desc->slice_id, drv_data->bitmap);
+	mutex_unlock(&drv_data->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_activate);
+
+/**
+ * llcc_slice_deactivate - Deactivate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value of zero will be returned on success and a negative errno will
+ * be returned in error cases
+ */
+int llcc_slice_deactivate(struct llcc_slice_desc *desc)
+{
+	u32 act_ctrl_val;
+	int ret;
+
+	mutex_lock(&drv_data->lock);
+	if (!test_bit(desc->slice_id, drv_data->bitmap)) {
+		mutex_unlock(&drv_data->lock);
+		return 0;
+	}
+	act_ctrl_val = ACT_CTRL_OPCODE_DEACTIVATE << ACT_CTRL_OPCODE_SHIFT;
+
+	ret = llcc_update_act_ctrl(desc->slice_id, act_ctrl_val,
+				  ACTIVATE);
+	if (ret) {
+		mutex_unlock(&drv_data->lock);
+		return ret;
+	}
+
+	__clear_bit(desc->slice_id, drv_data->bitmap);
+	mutex_unlock(&drv_data->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_deactivate);
+
+/**
+ * llcc_get_slice_id - return the slice id
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_get_slice_id(struct llcc_slice_desc *desc)
+{
+	return desc->slice_id;
+}
+EXPORT_SYMBOL_GPL(llcc_get_slice_id);
+
+/**
+ * llcc_get_slice_size - return the slice id
+ * @desc: Pointer to llcc slice descriptor
+ */
+size_t llcc_get_slice_size(struct llcc_slice_desc *desc)
+{
+	return desc->slice_size;
+}
+EXPORT_SYMBOL_GPL(llcc_get_slice_size);
+
+static int qcom_llcc_cfg_program(struct platform_device *pdev)
+{
+	int i;
+	u32 attr1_cfg;
+	u32 attr0_cfg;
+	u32 attr1_val;
+	u32 attr0_val;
+	u32 max_cap_cacheline;
+	u32 sz;
+	int ret;
+	const struct llcc_slice_config *llcc_table;
+	struct llcc_slice_desc desc;
+	u32 bcast_off = drv_data->bcast_off;
+
+	sz = drv_data->cfg_size;
+	llcc_table = drv_data->cfg;
+
+	for (i = 0; i < sz; i++) {
+		attr1_cfg = bcast_off +
+				LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
+		attr0_cfg = bcast_off +
+				LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
+
+		attr1_val = llcc_table[i].cache_mode;
+		attr1_val |= llcc_table[i].probe_target_ways <<
+				ATTR1_PROBE_TARGET_WAYS_SHIFT;
+		attr1_val |= llcc_table[i].fixed_size <<
+				ATTR1_FIXED_SIZE_SHIFT;
+		attr1_val |= llcc_table[i].priority <<
+				ATTR1_PRIORITY_SHIFT;
+
+		max_cap_cacheline = MAX_CAP_TO_BYTES(llcc_table[i].max_cap);
+
+		/* LLCC instances can vary for each target.
+		 * The SW writes to broadcast register which gets propagated
+		 * to each llcc instace (llcc0,.. llccN).
+		 * Since the size of the memory is divided equally amongst the
+		 * llcc instances, we need to configure the max cap accordingly.
+		 */
+		max_cap_cacheline = max_cap_cacheline / drv_data->num_banks;
+		max_cap_cacheline >>= CACHE_LINE_SIZE_SHIFT;
+		attr1_val |= max_cap_cacheline << ATTR1_MAX_CAP_SHIFT;
+
+		attr0_val = llcc_table[i].res_ways & ATTR0_RES_WAYS_MASK;
+		attr0_val |= llcc_table[i].bonus_ways << ATTR0_BONUS_WAYS_SHIFT;
+
+		ret = regmap_write(drv_data->regmap, attr1_cfg, attr1_val);
+		if (ret)
+			return ret;
+		ret = regmap_write(drv_data->regmap, attr0_cfg, attr0_val);
+		if (ret)
+			return ret;
+		if (llcc_table[i].activate_on_init) {
+			desc.slice_id = llcc_table[i].slice_id;
+			ret = llcc_slice_activate(&desc);
+		}
+	}
+	return ret;
+}
+
+int qcom_llcc_probe(struct platform_device *pdev,
+		      const struct llcc_slice_config *llcc_cfg, u32 sz)
+{
+	u32 num_banks;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	void __iomem *base;
+	int ret, i;
+
+	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
+	if (!drv_data)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drv_data->regmap = devm_regmap_init_mmio(dev, base,
+					&llcc_regmap_config);
+	if (IS_ERR(drv_data->regmap))
+		return PTR_ERR(drv_data->regmap);
+
+	ret = regmap_read(drv_data->regmap, LLCC_COMMON_STATUS0,
+						&num_banks);
+	if (ret)
+		return ret;
+
+	num_banks &= LLCC_LB_CNT_MASK;
+	num_banks >>= LLCC_LB_CNT_SHIFT;
+	drv_data->num_banks = num_banks;
+
+	for (i = 0; i < sz; i++)
+		if (llcc_cfg[i].slice_id > drv_data->max_slices)
+			drv_data->max_slices = llcc_cfg[i].slice_id;
+
+	drv_data->offsets = devm_kcalloc(dev, num_banks, sizeof(u32),
+							GFP_KERNEL);
+	if (!drv_data->offsets)
+		return -ENOMEM;
+
+	for (i = 0; i < num_banks; i++)
+		drv_data->offsets[i] = i * BANK_OFFSET_STRIDE;
+
+	drv_data->bcast_off = num_banks * BANK_OFFSET_STRIDE;
+
+	drv_data->bitmap = devm_kcalloc(dev,
+	BITS_TO_LONGS(drv_data->max_slices), sizeof(unsigned long),
+						GFP_KERNEL);
+	if (!drv_data->bitmap)
+		return -ENOMEM;
+
+	drv_data->cfg = llcc_cfg;
+	drv_data->cfg_size = sz;
+	mutex_init(&drv_data->lock);
+	platform_set_drvdata(pdev, drv_data);
+
+	return qcom_llcc_cfg_program(pdev);
+}
+EXPORT_SYMBOL_GPL(qcom_llcc_probe);
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
new file mode 100644
index 000000000000..7e3b9c605ab2
--- /dev/null
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ *
+ */
+
+#include <linux/platform_device.h>
+#ifndef __LLCC_QCOM__
+#define __LLCC_QCOM__
+
+#define LLCC_CPUSS       1
+#define LLCC_VIDSC0      2
+#define LLCC_VIDSC1      3
+#define LLCC_ROTATOR     4
+#define LLCC_VOICE       5
+#define LLCC_AUDIO       6
+#define LLCC_MDMHPGRW    7
+#define LLCC_MDM         8
+#define LLCC_CMPT        10
+#define LLCC_GPUHTW      11
+#define LLCC_GPU         12
+#define LLCC_MMUHWT      13
+#define LLCC_CMPTDMA     15
+#define LLCC_DISP        16
+#define LLCC_VIDFW       17
+#define LLCC_MDMHPFX     20
+#define LLCC_MDMPNG      21
+#define LLCC_AUDHW       22
+
+/**
+ * llcc_slice_desc - Cache slice descriptor
+ * @slice_id: llcc slice id
+ * @slice_size: Size allocated for the llcc slice
+ */
+struct llcc_slice_desc {
+	u32 slice_id;
+	size_t slice_size;
+};
+
+/**
+ * llcc_slice_config - Data associated with the llcc slice
+ * @usecase_id: usecase id for which the llcc slice is used
+ * @slice_id: llcc slice id assigned to each slice
+ * @max_cap: maximum capacity of the llcc slice
+ * @priority: priority of the llcc slice
+ * @fixed_size: whether the llcc slice can grow beyond its size
+ * @bonus_ways: bonus ways associated with llcc slice
+ * @res_ways: reserved ways associated with llcc slice
+ * @cache_mode: mode of the llcc slice
+ * @probe_target_ways: Probe only reserved and bonus ways on a cache miss
+ * @dis_cap_alloc: Disable capacity based allocation
+ * @retain_on_pc: Retain through power collapse
+ * @activate_on_init: activate the slice on init
+ */
+struct llcc_slice_config {
+	u32 usecase_id;
+	u32 slice_id;
+	u32 max_cap;
+	u32 priority;
+	bool fixed_size;
+	u32 bonus_ways;
+	u32 res_ways;
+	u32 cache_mode;
+	u32 probe_target_ways;
+	bool dis_cap_alloc;
+	bool retain_on_pc;
+	bool activate_on_init;
+};
+
+/**
+ * llcc_drv_data - Data associated with the llcc driver
+ * @regmap: regmap associated with the llcc device
+ * @cfg: pointer to the data structure for slice configuration
+ * @lock: mutex associated with each slice
+ * @cfg_size: size of the config data table
+ * @max_slices: max slices as read from device tree
+ * @bcast_off: Offset of the broadcast bank
+ * @num_banks: Number of llcc banks
+ * @bitmap: Bit map to track the active slice ids
+ * @offsets: Pointer to the bank offsets array
+ */
+struct llcc_drv_data {
+	struct regmap *regmap;
+	const struct llcc_slice_config *cfg;
+	struct mutex lock;
+	u32 cfg_size;
+	u32 max_slices;
+	u32 bcast_off;
+	u32 num_banks;
+	unsigned long *bitmap;
+	u32 *offsets;
+};
+
+#if IS_ENABLED(CONFIG_QCOM_LLCC)
+/**
+ * llcc_slice_getd - get llcc slice descriptor
+ * @uid: usecase_id of the client
+ */
+struct llcc_slice_desc *llcc_slice_getd(u32 uid);
+
+/**
+ * llcc_slice_putd - llcc slice descritpor
+ * @desc: Pointer to llcc slice descriptor
+ */
+void llcc_slice_putd(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_get_slice_id - get slice id
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_get_slice_id(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_get_slice_size - llcc slice size
+ * @desc: Pointer to llcc slice descriptor
+ */
+size_t llcc_get_slice_size(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_slice_activate - Activate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_slice_activate(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_slice_deactivate - Deactivate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_slice_deactivate(struct llcc_slice_desc *desc);
+
+/**
+ * qcom_llcc_probe - program the sct table
+ * @pdev: platform device pointer
+ * @table: soc sct table
+ * @sz: Size of the config table
+ */
+int qcom_llcc_probe(struct platform_device *pdev,
+		      const struct llcc_slice_config *table, u32 sz);
+#else
+static inline struct llcc_slice_desc *llcc_slice_getd(u32 uid)
+{
+	return NULL;
+}
+
+static inline void llcc_slice_putd(struct llcc_slice_desc *desc)
+{
+
+};
+
+static inline int llcc_get_slice_id(struct llcc_slice_desc *desc)
+{
+	return -EINVAL;
+}
+
+static inline size_t llcc_get_slice_size(struct llcc_slice_desc *desc)
+{
+	return 0;
+}
+static inline int llcc_slice_activate(struct llcc_slice_desc *desc)
+{
+	return -EINVAL;
+}
+
+static inline int llcc_slice_deactivate(struct llcc_slice_desc *desc)
+{
+	return -EINVAL;
+}
+static inline int qcom_llcc_probe(struct platform_device *pdev,
+		      const struct llcc_slice_config *table, u32 sz)
+{
+	return -ENODEV;
+}
+
+static inline int qcom_llcc_remove(struct platform_device *pdev)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From a0b1561f846191a1967e8f5a26d8ef59c1b9162a Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@linaro.org>
Date: Tue, 12 Jun 2018 15:23:13 +0200
Subject: firmware: qcom: scm: add a dummy qcom_scm_assign_mem()

Add a dummy qcom_scm_assign_mem() to enable building drivers when
CONFIG_COMPILE_TEST=y && CONFIG_QCOM_SCM=n.

All other qcom_scm_* functions already have a dummy version.

Signed-off-by: Niklas Cassel <niklas.cassel@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/qcom_scm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index b401b962afff..5d65521260b3 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -87,6 +87,10 @@ static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
 static inline int
 qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; }
 static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
+static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
+				      unsigned int *src,
+				      struct qcom_scm_vmperm *newvm,
+				      int dest_cnt) { return -ENODEV; }
 static inline void qcom_scm_cpu_power_down(u32 flags) {}
 static inline u32 qcom_scm_get_version(void) { return 0; }
 static inline u32
-- 
cgit v1.2.3


From 3928d4f5ee37cdc523894f6e549e6aae521d8980 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 21 Jul 2018 13:48:51 -0700
Subject: mm: use helper functions for allocating and freeing vm_area structs

The vm_area_struct is one of the most fundamental memory management
objects, but the management of it is entirely open-coded evertwhere,
ranging from allocation and freeing (using kmem_cache_[z]alloc and
kmem_cache_free) to initializing all the fields.

We want to unify this in order to end up having some unified
initialization of the vmas, and the first step to this is to at least
have basic allocation functions.

Right now those functions are literally just wrappers around the
kmem_cache_*() calls.  This is a purely mechanical conversion:

    # new vma:
    kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL) -> vm_area_alloc()

    # copy old vma
    kmem_cache_alloc(vm_area_cachep, GFP_KERNEL) -> vm_area_dup(old)

    # free vma
    kmem_cache_free(vm_area_cachep, vma) -> vm_area_free(vma)

to the point where the old vma passed in to the vm_area_dup() function
isn't even used yet (because I've left all the old manual initialization
alone).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c |  4 ++--
 arch/ia64/mm/init.c        |  8 ++++----
 fs/exec.c                  |  4 ++--
 include/linux/mm.h         |  4 +++-
 kernel/fork.c              | 21 ++++++++++++++++++---
 mm/mmap.c                  | 22 +++++++++++-----------
 mm/nommu.c                 |  8 ++++----
 7 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 3b38c717008a..e859246badca 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2278,7 +2278,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
@@ -2346,7 +2346,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	return 0;
 
 error:
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 error_kmem:
 	pfm_rvfree(smpl_buf, size);
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 18278b448530..3f2321bffb72 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -114,7 +114,7 @@ ia64_init_addr_space (void)
 	 * the problem.  When the process attempts to write to the register backing store
 	 * for the first time, it will get a SEGFAULT in this case.
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (vma) {
 		INIT_LIST_HEAD(&vma->anon_vma_chain);
 		vma->vm_mm = current->mm;
@@ -125,7 +125,7 @@ ia64_init_addr_space (void)
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, vma);
+			vm_area_free(vma);
 			return;
 		}
 		up_write(&current->mm->mmap_sem);
@@ -133,7 +133,7 @@ ia64_init_addr_space (void)
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
-		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+		vma = vm_area_alloc();
 		if (vma) {
 			INIT_LIST_HEAD(&vma->anon_vma_chain);
 			vma->vm_mm = current->mm;
@@ -144,7 +144,7 @@ ia64_init_addr_space (void)
 			down_write(&current->mm->mmap_sem);
 			if (insert_vm_struct(current->mm, vma)) {
 				up_write(&current->mm->mmap_sem);
-				kmem_cache_free(vm_area_cachep, vma);
+				vm_area_free(vma);
 				return;
 			}
 			up_write(&current->mm->mmap_sem);
diff --git a/fs/exec.c b/fs/exec.c
index 2d4e0075bd24..9bd83989ea25 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = bprm->mm;
 
-	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	bprm->vma = vma = vm_area_alloc();
 	if (!vma)
 		return -ENOMEM;
 
@@ -326,7 +326,7 @@ err:
 	up_write(&mm->mmap_sem);
 err_free:
 	bprm->vma = NULL;
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return err;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3982c83fdcbf..de2fd86c6154 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -155,7 +155,9 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
  * mmap() functions).
  */
 
-extern struct kmem_cache *vm_area_cachep;
+struct vm_area_struct *vm_area_alloc(void);
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
+void vm_area_free(struct vm_area_struct *);
 
 #ifndef CONFIG_MMU
 extern struct rb_root nommu_region_tree;
diff --git a/kernel/fork.c b/kernel/fork.c
index 9440d61b925c..0e23deb5acfc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -303,11 +303,26 @@ struct kmem_cache *files_cachep;
 struct kmem_cache *fs_cachep;
 
 /* SLAB cache for vm_area_struct structures */
-struct kmem_cache *vm_area_cachep;
+static struct kmem_cache *vm_area_cachep;
 
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+struct vm_area_struct *vm_area_alloc(void)
+{
+	return kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+	return kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+	kmem_cache_free(vm_area_cachep, vma);
+}
+
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
 	void *stack = task_stack_page(tsk);
@@ -455,7 +470,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 				goto fail_nomem;
 			charge = len;
 		}
-		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+		tmp = vm_area_dup(mpnt);
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
@@ -539,7 +554,7 @@ fail_uprobe_end:
 fail_nomem_anon_vma_fork:
 	mpol_put(vma_policy(tmp));
 fail_nomem_policy:
-	kmem_cache_free(vm_area_cachep, tmp);
+	vm_area_free(tmp);
 fail_nomem:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
diff --git a/mm/mmap.c b/mm/mmap.c
index 5801b5f0a634..4286ad2dd1f5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -182,7 +182,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	mpol_put(vma_policy(vma));
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return next;
 }
 
@@ -911,7 +911,7 @@ again:
 			anon_vma_merge(vma, next);
 		mm->map_count--;
 		mpol_put(vma_policy(next));
-		kmem_cache_free(vm_area_cachep, next);
+		vm_area_free(next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
 		 * we must remove another next too. It would clutter
@@ -1729,7 +1729,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
@@ -1832,7 +1832,7 @@ allow_write_and_free_vma:
 	if (vm_flags & VM_DENYWRITE)
 		allow_write_access(file);
 free_vma:
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 unacct_error:
 	if (charged)
 		vm_unacct_memory(charged);
@@ -2620,7 +2620,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 			return err;
 	}
 
-	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	new = vm_area_dup(vma);
 	if (!new)
 		return -ENOMEM;
 
@@ -2669,7 +2669,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  out_free_mpol:
 	mpol_put(vma_policy(new));
  out_free_vma:
-	kmem_cache_free(vm_area_cachep, new);
+	vm_area_free(new);
 	return err;
 }
 
@@ -2984,7 +2984,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma) {
 		vm_unacct_memory(len >> PAGE_SHIFT);
 		return -ENOMEM;
@@ -3202,7 +3202,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		}
 		*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
 	} else {
-		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+		new_vma = vm_area_dup(vma);
 		if (!new_vma)
 			goto out;
 		*new_vma = *vma;
@@ -3226,7 +3226,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 out_free_mempol:
 	mpol_put(vma_policy(new_vma));
 out_free_vma:
-	kmem_cache_free(vm_area_cachep, new_vma);
+	vm_area_free(new_vma);
 out:
 	return NULL;
 }
@@ -3350,7 +3350,7 @@ static struct vm_area_struct *__install_special_mapping(
 	int ret;
 	struct vm_area_struct *vma;
 
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (unlikely(vma == NULL))
 		return ERR_PTR(-ENOMEM);
 
@@ -3376,7 +3376,7 @@ static struct vm_area_struct *__install_special_mapping(
 	return vma;
 
 out:
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return ERR_PTR(ret);
 }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 4452d8bd9ae4..006e3fe65017 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -769,7 +769,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	put_nommu_region(vma->vm_region);
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 }
 
 /*
@@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file,
 	if (!region)
 		goto error_getting_region;
 
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma)
 		goto error_getting_vma;
 
@@ -1368,7 +1368,7 @@ error:
 	kmem_cache_free(vm_region_jar, region);
 	if (vma->vm_file)
 		fput(vma->vm_file);
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return ret;
 
 sharing_violation:
@@ -1469,7 +1469,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!region)
 		return -ENOMEM;
 
-	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	new = vm_area_dup(vma);
 	if (!new) {
 		kmem_cache_free(vm_region_jar, region);
 		return -ENOMEM;
-- 
cgit v1.2.3


From 490fc053865c9cc40f1085ef8a5504f5341f79d2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 21 Jul 2018 15:24:03 -0700
Subject: mm: make vm_area_alloc() initialize core fields

Like vm_area_dup(), it initializes the anon_vma_chain head, and the
basic mm pointer.

The rest of the fields end up being different for different users,
although the plan is to also initialize the 'vm_ops' field to a dummy
entry.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c |  4 +---
 arch/ia64/mm/init.c        |  8 ++------
 fs/exec.c                  |  4 +---
 include/linux/mm.h         |  2 +-
 kernel/fork.c              | 10 ++++++++--
 mm/mmap.c                  | 12 +++---------
 mm/nommu.c                 |  3 +--
 7 files changed, 17 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index e859246badca..46bff1661836 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2278,17 +2278,15 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
 	}
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	/*
 	 * partially initialize the vma for the sampling buffer
 	 */
-	vma->vm_mm	     = mm;
 	vma->vm_file	     = get_file(filp);
 	vma->vm_flags	     = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
 	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 3f2321bffb72..bdb14a369137 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -114,10 +114,8 @@ ia64_init_addr_space (void)
 	 * the problem.  When the process attempts to write to the register backing store
 	 * for the first time, it will get a SEGFAULT in this case.
 	 */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(current->mm);
 	if (vma) {
-		INIT_LIST_HEAD(&vma->anon_vma_chain);
-		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
 		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
@@ -133,10 +131,8 @@ ia64_init_addr_space (void)
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
-		vma = vm_area_alloc();
+		vma = vm_area_alloc(current->mm);
 		if (vma) {
-			INIT_LIST_HEAD(&vma->anon_vma_chain);
-			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
 			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
diff --git a/fs/exec.c b/fs/exec.c
index 9bd83989ea25..72e961a62adb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = bprm->mm;
 
-	bprm->vma = vma = vm_area_alloc();
+	bprm->vma = vma = vm_area_alloc(mm);
 	if (!vma)
 		return -ENOMEM;
 
@@ -298,7 +298,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 		err = -EINTR;
 		goto err_free;
 	}
-	vma->vm_mm = mm;
 
 	/*
 	 * Place the stack at the largest stack address the architecture
@@ -311,7 +310,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 	vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	err = insert_vm_struct(mm, vma);
 	if (err)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index de2fd86c6154..d3a3842316b8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -155,7 +155,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
  * mmap() functions).
  */
 
-struct vm_area_struct *vm_area_alloc(void);
+struct vm_area_struct *vm_area_alloc(struct mm_struct *);
 struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
 void vm_area_free(struct vm_area_struct *);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 67253e41bfb0..a191c05e757d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -308,9 +308,15 @@ static struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
-struct vm_area_struct *vm_area_alloc(void)
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+
+	if (vma) {
+		vma->vm_mm = mm;
+		INIT_LIST_HEAD(&vma->anon_vma_chain);
+	}
+	return vma;
 }
 
 struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
diff --git a/mm/mmap.c b/mm/mmap.c
index b0ed8ce1b67e..ff1944d8d458 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1729,19 +1729,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
 	}
 
-	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_flags = vm_flags;
 	vma->vm_page_prot = vm_get_page_prot(vm_flags);
 	vma->vm_pgoff = pgoff;
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	if (file) {
 		if (vm_flags & VM_DENYWRITE) {
@@ -2979,14 +2977,12 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (!vma) {
 		vm_unacct_memory(len >> PAGE_SHIFT);
 		return -ENOMEM;
 	}
 
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
-	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
@@ -3343,12 +3339,10 @@ static struct vm_area_struct *__install_special_mapping(
 	int ret;
 	struct vm_area_struct *vma;
 
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (unlikely(vma == NULL))
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
-	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index c2560e9cc803..1d22fdbf7d7c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file,
 	if (!region)
 		goto error_getting_region;
 
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(current->mm);
 	if (!vma)
 		goto error_getting_vma;
 
@@ -1212,7 +1212,6 @@ unsigned long do_mmap(struct file *file,
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_flags = vm_flags;
 	vma->vm_pgoff = pgoff;
 
-- 
cgit v1.2.3


From f95de8aa9f824d96421cb7ca81552b4ad8768a31 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 19 Jul 2018 15:56:59 +0800
Subject: bpfilter: Fix mismatch in function argument types

Fix following warning:
net/ipv4/bpfilter/sockopt.c:28:5: error: symbol 'bpfilter_ip_set_sockopt' redeclared with different type
net/ipv4/bpfilter/sockopt.c:34:5: error: symbol 'bpfilter_ip_get_sockopt' redeclared with different type

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpfilter.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index 687b1760bb9f..f02cee0225d4 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -5,10 +5,10 @@
 #include <uapi/linux/bpfilter.h>
 
 struct sock;
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval,
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
 			    unsigned int optlen);
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval,
-			    int *optlen);
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+			    int __user *optlen);
 extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
 				       char __user *optval,
 				       unsigned int optlen, bool is_set);
-- 
cgit v1.2.3


From c4db9c1e8c70bc60e392da8a485bcfb035d559c2 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 20 Jul 2018 10:47:23 +0900
Subject: efi: Deduplicate efi_open_volume()

There's one ARM, one x86_32 and one x86_64 version of efi_open_volume()
which can be folded into a single shared version by masking their
differences with the efi_call_proto() macro introduced by commit:

  3552fdf29f01 ("efi: Allow bitness-agnostic protocol calls").

To be able to dereference the device_handle attribute from the
efi_loaded_image_t table in an arch- and bitness-agnostic manner,
introduce the efi_table_attr() macro (which already exists for x86)
to arm and arm64.

No functional change intended.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180720014726.24031-7-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/efi.h                     |  3 ++
 arch/arm64/include/asm/efi.h                   |  3 ++
 arch/x86/boot/compressed/eboot.c               | 63 --------------------------
 drivers/firmware/efi/libstub/arm-stub.c        | 25 ----------
 drivers/firmware/efi/libstub/efi-stub-helper.c | 31 ++++++++++++-
 drivers/firmware/efi/libstub/efistub.h         |  3 --
 include/linux/efi.h                            | 10 ++++
 7 files changed, 45 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 17f1f1a814ff..38badaae8d9d 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -58,6 +58,9 @@ void efi_virtmap_unload(void);
 #define efi_call_runtime(f, ...)	sys_table_arg->runtime->f(__VA_ARGS__)
 #define efi_is_64bit()			(false)
 
+#define efi_table_attr(table, attr, instance)				\
+	((table##_t *)instance)->attr
+
 #define efi_call_proto(protocol, f, instance, ...)			\
 	((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
 
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 192d791f1103..7ed320895d1f 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -87,6 +87,9 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
 #define efi_call_runtime(f, ...)	sys_table_arg->runtime->f(__VA_ARGS__)
 #define efi_is_64bit()			(true)
 
+#define efi_table_attr(table, attr, instance)				\
+	((table##_t *)instance)->attr
+
 #define efi_call_proto(protocol, f, instance, ...)			\
 	((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
 
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 92b573fd239c..915c64edbe8e 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -41,69 +41,6 @@ static void setup_boot_services##bits(struct efi_config *c)		\
 BOOT_SERVICES(32);
 BOOT_SERVICES(64);
 
-static inline efi_status_t __open_volume32(void *__image, void **__fh)
-{
-	efi_file_io_interface_t *io;
-	efi_loaded_image_32_t *image = __image;
-	efi_file_handle_32_t *fh;
-	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
-	efi_status_t status;
-	void *handle = (void *)(unsigned long)image->device_handle;
-	unsigned long func;
-
-	status = efi_call_early(handle_protocol, handle,
-				&fs_proto, (void **)&io);
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to handle fs_proto\n");
-		return status;
-	}
-
-	func = (unsigned long)io->open_volume;
-	status = efi_early->call(func, io, &fh);
-	if (status != EFI_SUCCESS)
-		efi_printk(sys_table, "Failed to open volume\n");
-
-	*__fh = fh;
-
-	return status;
-}
-
-static inline efi_status_t __open_volume64(void *__image, void **__fh)
-{
-	efi_file_io_interface_t *io;
-	efi_loaded_image_64_t *image = __image;
-	efi_file_handle_64_t *fh;
-	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
-	efi_status_t status;
-	void *handle = (void *)(unsigned long)image->device_handle;
-	unsigned long func;
-
-	status = efi_call_early(handle_protocol, handle,
-				&fs_proto, (void **)&io);
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to handle fs_proto\n");
-		return status;
-	}
-
-	func = (unsigned long)io->open_volume;
-	status = efi_early->call(func, io, &fh);
-	if (status != EFI_SUCCESS)
-		efi_printk(sys_table, "Failed to open volume\n");
-
-	*__fh = fh;
-
-	return status;
-}
-
-efi_status_t
-efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
-{
-	if (efi_early->is64)
-		return __open_volume64(__image, __fh);
-
-	return __open_volume32(__image, __fh);
-}
-
 void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
 {
 	efi_call_proto(efi_simple_text_output_protocol, output_string,
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index c98b1856fc3d..6920033de6d4 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -40,31 +40,6 @@
 
 static u64 virtmap_base = EFI_RT_VIRTUAL_BASE;
 
-efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
-			     void *__image, void **__fh)
-{
-	efi_file_io_interface_t *io;
-	efi_loaded_image_t *image = __image;
-	efi_file_handle_t *fh;
-	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
-	efi_status_t status;
-	void *handle = (void *)(unsigned long)image->device_handle;
-
-	status = sys_table_arg->boottime->handle_protocol(handle,
-				 &fs_proto, (void **)&io);
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
-		return status;
-	}
-
-	status = io->open_volume(io, &fh);
-	if (status != EFI_SUCCESS)
-		efi_printk(sys_table_arg, "Failed to open volume\n");
-
-	*__fh = fh;
-	return status;
-}
-
 void efi_char16_printk(efi_system_table_t *sys_table_arg,
 			      efi_char16_t *str)
 {
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 50a9cab5a834..e94975f4655b 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -413,6 +413,34 @@ static efi_status_t efi_file_close(void *handle)
 	return efi_call_proto(efi_file_handle, close, handle);
 }
 
+static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
+				    efi_loaded_image_t *image,
+				    efi_file_handle_t **__fh)
+{
+	efi_file_io_interface_t *io;
+	efi_file_handle_t *fh;
+	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
+	efi_status_t status;
+	void *handle = (void *)(unsigned long)efi_table_attr(efi_loaded_image,
+							     device_handle,
+							     image);
+
+	status = efi_call_early(handle_protocol, handle,
+				&fs_proto, (void **)&io);
+	if (status != EFI_SUCCESS) {
+		efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
+		return status;
+	}
+
+	status = efi_call_proto(efi_file_io_interface, open_volume, io, &fh);
+	if (status != EFI_SUCCESS)
+		efi_printk(sys_table_arg, "Failed to open volume\n");
+	else
+		*__fh = fh;
+
+	return status;
+}
+
 /*
  * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi=
  * option, e.g. efi=nochunk.
@@ -563,8 +591,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 
 		/* Only open the volume once. */
 		if (!i) {
-			status = efi_open_volume(sys_table_arg, image,
-						 (void **)&fh);
+			status = efi_open_volume(sys_table_arg, image, &fh);
 			if (status != EFI_SUCCESS)
 				goto free_files;
 		}
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index f59564b72ddc..32799cf039ef 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -36,9 +36,6 @@ extern int __pure is_quiet(void);
 
 void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
 
-efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
-			     void **__fh);
-
 unsigned long get_dram_base(efi_system_table_t *sys_table_arg);
 
 efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index e190652f5ef9..401e4b254e30 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -894,6 +894,16 @@ typedef struct _efi_file_handle {
 	void *flush;
 } efi_file_handle_t;
 
+typedef struct {
+	u64 revision;
+	u32 open_volume;
+} efi_file_io_interface_32_t;
+
+typedef struct {
+	u64 revision;
+	u64 open_volume;
+} efi_file_io_interface_64_t;
+
 typedef struct _efi_file_io_interface {
 	u64 revision;
 	int (*open_volume)(struct _efi_file_io_interface *,
-- 
cgit v1.2.3


From f88a333b44318643282b8acc92af90deda441f5e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sun, 22 Jul 2018 15:07:11 +0100
Subject: alpha: fix osf_wait4() breakage

kernel_wait4() expects a userland address for status - it's only
rusage that goes as a kernel one (and needs a copyout afterwards)

[ Also, fix the prototype of kernel_wait4() to have that __user
  annotation   - Linus ]

Fixes: 92ebce5ac55d ("osf_wait4: switch to kernel_wait4()")
Cc: stable@kernel.org # v4.13+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/osf_sys.c | 5 +----
 include/linux/sched/task.h  | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 6e921754c8fc..c210a25dd6da 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1180,13 +1180,10 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
 		struct rusage32 __user *, ur)
 {
-	unsigned int status = 0;
 	struct rusage r;
-	long err = kernel_wait4(pid, &status, options, &r);
+	long err = kernel_wait4(pid, ustatus, options, &r);
 	if (err <= 0)
 		return err;
-	if (put_user(status, ustatus))
-		return -EFAULT;
 	if (!ur)
 		return err;
 	if (put_tv_to_tv32(&ur->ru_utime, &r.ru_utime))
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 5be31eb7b266..108ede99e533 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -75,7 +75,7 @@ extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *,
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-extern long kernel_wait4(pid_t, int *, int, struct rusage *);
+extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
 
 extern void free_task(struct task_struct *tsk);
 
-- 
cgit v1.2.3


From d5e838275c80aeb96d4c0b81442a851dfef2e948 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 5 Jun 2018 13:21:26 +1000
Subject: devres: Add devm_of_iomap()

There are still quite a few cases where a device might want
to get to a different node of the device-tree, obtain the
resources and map them.

We have of_iomap() and of_io_request_and_map() but they both
have shortcomings, such as not returning the size of the
resource found (which can be useful) and not being "managed".

This adds a devm_of_iomap() that provides all of these and
should probably replace uses of the above in most drivers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
---
 include/linux/device.h |  4 ++++
 lib/devres.c           | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..6aa8d51eabe9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -696,6 +696,10 @@ extern void devm_free_pages(struct device *dev, unsigned long addr);
 
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
 
+void __iomem *devm_of_iomap(struct device *dev,
+			    struct device_node *node, int index,
+			    resource_size_t *size);
+
 /* allows to add/remove a custom action to devres stack */
 int devm_add_action(struct device *dev, void (*action)(void *), void *data);
 void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
diff --git a/lib/devres.c b/lib/devres.c
index 5bec1120b392..faccf1a037d0 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -4,6 +4,7 @@
 #include <linux/io.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
+#include <linux/of_address.h>
 
 enum devm_ioremap_type {
 	DEVM_IOREMAP = 0,
@@ -162,6 +163,41 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 }
 EXPORT_SYMBOL(devm_ioremap_resource);
 
+/*
+ * devm_of_iomap - Requests a resource and maps the memory mapped IO
+ *		   for a given device_node managed by a given device
+ *
+ * Checks that a resource is a valid memory region, requests the memory
+ * region and ioremaps it. All operations are managed and will be undone
+ * on driver detach of the device.
+ *
+ * This is to be used when a device requests/maps resources described
+ * by other device tree nodes (children or otherwise).
+ *
+ * @dev:	The device "managing" the resource
+ * @node:       The device-tree node where the resource resides
+ * @index:	index of the MMIO range in the "reg" property
+ * @size:	Returns the size of the resource (pass NULL if not needed)
+ * Returns a pointer to the requested and mapped memory or an ERR_PTR() encoded
+ * error code on failure. Usage example:
+ *
+ *	base = devm_of_iomap(&pdev->dev, node, 0, NULL);
+ *	if (IS_ERR(base))
+ *		return PTR_ERR(base);
+ */
+void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index,
+			    resource_size_t *size)
+{
+	struct resource res;
+
+	if (of_address_to_resource(node, index, &res))
+		return IOMEM_ERR_PTR(-EINVAL);
+	if (size)
+		*size = resource_size(&res);
+	return devm_ioremap_resource(dev, &res);
+}
+EXPORT_SYMBOL(devm_of_iomap);
+
 #ifdef CONFIG_HAS_IOPORT_MAP
 /*
  * Generic iomap devres
-- 
cgit v1.2.3


From 40c6f9c28ef03f2f2c3ee58c2447a6e6b9a713f2 Mon Sep 17 00:00:00 2001
From: Revanth Rajashekar <revanth.rajashekar@intel.com>
Date: Fri, 15 Jun 2018 12:39:27 -0600
Subject: nvme.h: resync with nvme-cli

Added some feature ids present in nvme-cli but not kernel.

Signed-off-by: Revanth Rajashekar <revanth.rajashekar@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2950ce957656..80dfedcf0bf7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -749,6 +749,11 @@ enum {
 	NVME_FEAT_HOST_MEM_BUF	= 0x0d,
 	NVME_FEAT_TIMESTAMP	= 0x0e,
 	NVME_FEAT_KATO		= 0x0f,
+	NVME_FEAT_HCTM		= 0x10,
+	NVME_FEAT_NOPSC		= 0x11,
+	NVME_FEAT_RRL		= 0x12,
+	NVME_FEAT_PLM_CONFIG	= 0x13,
+	NVME_FEAT_PLM_WINDOW	= 0x14,
 	NVME_FEAT_SW_PROGRESS	= 0x80,
 	NVME_FEAT_HOST_ID	= 0x81,
 	NVME_FEAT_RESV_MASK	= 0x82,
-- 
cgit v1.2.3


From 1a8861f117608e269647fb09658ce77bd5899d42 Mon Sep 17 00:00:00 2001
From: "Daniel M. Lambea" <dmlambea@gmail.com>
Date: Tue, 17 Jul 2018 22:35:36 +0100
Subject: HID: cougar: make compare_device_paths reusable

The function compare_device_paths from wacom_sys.c is generic
and useful for other drivers. Move the function to hid-core and
rename it as hid_compare_device_paths.

Signed-off-by: Daniel M. Lambea <dmlambea@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c  | 23 +++++++++++++++++++++++
 drivers/hid/wacom_sys.c | 18 +++---------------
 include/linux/hid.h     |  2 ++
 3 files changed, 28 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3942ee61bd1c..402ad974b31c 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1939,6 +1939,29 @@ static int hid_bus_match(struct device *dev, struct device_driver *drv)
 	return hid_match_device(hdev, hdrv) != NULL;
 }
 
+/**
+ * hid_compare_device_paths - check if both devices share the same path
+ * @hdev_a: hid device
+ * @hdev_b: hid device
+ * @separator: char to use as separator
+ *
+ * Check if two devices share the same path up to the last occurrence of
+ * the separator char. Both paths must exist (i.e., zero-length paths
+ * don't match).
+ */
+bool hid_compare_device_paths(struct hid_device *hdev_a,
+			      struct hid_device *hdev_b, char separator)
+{
+	int n1 = strrchr(hdev_a->phys, separator) - hdev_a->phys;
+	int n2 = strrchr(hdev_b->phys, separator) - hdev_b->phys;
+
+	if (n1 != n2 || n1 <= 0 || n2 <= 0)
+		return false;
+
+	return !strncmp(hdev_a->phys, hdev_b->phys, n1);
+}
+EXPORT_SYMBOL_GPL(hid_compare_device_paths);
+
 static int hid_device_probe(struct device *dev)
 {
 	struct hid_driver *hdrv = to_hid_driver(dev->driver);
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index d6797535fff9..ffe59a19b3a3 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -703,18 +703,6 @@ struct wacom_hdev_data {
 static LIST_HEAD(wacom_udev_list);
 static DEFINE_MUTEX(wacom_udev_list_lock);
 
-static bool compare_device_paths(struct hid_device *hdev_a,
-		struct hid_device *hdev_b, char separator)
-{
-	int n1 = strrchr(hdev_a->phys, separator) - hdev_a->phys;
-	int n2 = strrchr(hdev_b->phys, separator) - hdev_b->phys;
-
-	if (n1 != n2 || n1 <= 0 || n2 <= 0)
-		return false;
-
-	return !strncmp(hdev_a->phys, hdev_b->phys, n1);
-}
-
 static bool wacom_are_sibling(struct hid_device *hdev,
 		struct hid_device *sibling)
 {
@@ -737,10 +725,10 @@ static bool wacom_are_sibling(struct hid_device *hdev,
 	 * the same physical parent device path.
 	 */
 	if (hdev->vendor == sibling->vendor && hdev->product == sibling->product) {
-		if (!compare_device_paths(hdev, sibling, '/'))
+		if (!hid_compare_device_paths(hdev, sibling, '/'))
 			return false;
 	} else {
-		if (!compare_device_paths(hdev, sibling, '.'))
+		if (!hid_compare_device_paths(hdev, sibling, '.'))
 			return false;
 	}
 
@@ -787,7 +775,7 @@ static struct wacom_hdev_data *wacom_get_hdev_data(struct hid_device *hdev)
 
 	/* Try to find an already-probed interface from the same device */
 	list_for_each_entry(data, &wacom_udev_list, list) {
-		if (compare_device_paths(hdev, data->dev, '/')) {
+		if (hid_compare_device_paths(hdev, data->dev, '/')) {
 			kref_get(&data->kref);
 			return data;
 		}
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 773bcb1d4044..938d9ba6d7cd 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -894,6 +894,8 @@ const struct hid_device_id *hid_match_id(const struct hid_device *hdev,
 					 const struct hid_device_id *id);
 const struct hid_device_id *hid_match_device(struct hid_device *hdev,
 					     struct hid_driver *hdrv);
+bool hid_compare_device_paths(struct hid_device *hdev_a,
+			      struct hid_device *hdev_b, char separator);
 s32 hid_snto32(__u32 value, unsigned n);
 __u32 hid_field_extract(const struct hid_device *hid, __u8 *report,
 		     unsigned offset, unsigned n);
-- 
cgit v1.2.3


From 977d5ad39f3ea12ac0bd51d75020cea5ecdca235 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 17 Jul 2018 17:19:11 +0300
Subject: ACPI: Convert ACPI reference args to generic fwnode reference args

Convert all users of struct acpi_reference_args to more generic
fwnode_reference_args. This will

 1) avoid an ACPI specific references to device nodes with integer
    arguments as well as

 2) allow making references to nodes other than device nodes in ACPI.

As a by-product, convert the fwnode interger arguments to u64. The
arguments were 64-bit integers on ACPI but the fwnode arguments were
just 32-bit.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c                           | 36 ++++++++---------------
 drivers/gpio/gpiolib-acpi.c                       | 11 ++++---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c        | 10 +++----
 drivers/media/v4l2-core/v4l2-fwnode.c             |  2 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.c    |  6 ++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c |  6 ++--
 drivers/net/ethernet/hisilicon/hns/hns_enet.c     |  8 +++--
 include/linux/acpi.h                              | 17 ++++-------
 include/linux/fwnode.h                            |  2 +-
 9 files changed, 43 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 5815356ea6ad..3fa40010fd67 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -579,7 +579,7 @@ static int acpi_data_get_property_array(const struct acpi_device_data *data,
  */
 int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 	const char *propname, size_t index, size_t num_args,
-	struct acpi_reference_args *args)
+	struct fwnode_reference_args *args)
 {
 	const union acpi_object *element, *end;
 	const union acpi_object *obj;
@@ -607,7 +607,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 		if (ret)
 			return ret == -ENODEV ? -EINVAL : ret;
 
-		args->adev = device;
+		args->fwnode = acpi_fwnode_handle(device);
 		args->nargs = 0;
 		return 0;
 	}
@@ -653,11 +653,11 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 					return -EINVAL;
 			}
 
-			if (nargs > MAX_ACPI_REFERENCE_ARGS)
+			if (nargs > NR_FWNODE_REFERENCE_ARGS)
 				return -EINVAL;
 
 			if (idx == index) {
-				args->adev = device;
+				args->fwnode = acpi_fwnode_handle(device);
 				args->nargs = nargs;
 				for (i = 0; i < nargs; i++)
 					args->args[i] = element[i].integer.value;
@@ -1089,7 +1089,7 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 {
 	struct fwnode_handle *fwnode;
 	unsigned int port_nr, endpoint_nr;
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	int ret;
 
 	memset(&args, 0, sizeof(args));
@@ -1098,6 +1098,10 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 	if (ret)
 		return ret;
 
+	/* Ensure this is a device node. */
+	if (!is_acpi_device_node(args.fwnode))
+		return -ENODEV;
+
 	/*
 	 * Always require two arguments with the reference: port and
 	 * endpoint indices.
@@ -1105,7 +1109,7 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 	if (args.nargs != 2)
 		return -EPROTO;
 
-	fwnode = acpi_fwnode_handle(args.adev);
+	fwnode = args.fwnode;
 	port_nr = args.args[0];
 	endpoint_nr = args.args[1];
 
@@ -1209,24 +1213,8 @@ acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
 			       unsigned int args_count, unsigned int index,
 			       struct fwnode_reference_args *args)
 {
-	struct acpi_reference_args acpi_args;
-	unsigned int i;
-	int ret;
-
-	ret = __acpi_node_get_property_reference(fwnode, prop, index,
-						 args_count, &acpi_args);
-	if (ret < 0)
-		return ret;
-	if (!args)
-		return 0;
-
-	args->nargs = acpi_args.nargs;
-	args->fwnode = acpi_fwnode_handle(acpi_args.adev);
-
-	for (i = 0; i < NR_FWNODE_REFERENCE_ARGS; i++)
-		args->args[i] = i < acpi_args.nargs ? acpi_args.args[i] : 0;
-
-	return 0;
+	return __acpi_node_get_property_reference(fwnode, prop, index,
+						  args_count, args);
 }
 
 static struct fwnode_handle *
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index e2232cbcec8b..4e8fdae1cde4 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL_GPL(devm_acpi_dev_remove_driver_gpios);
 
 static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
 				      const char *name, int index,
-				      struct acpi_reference_args *args,
+				      struct fwnode_reference_args *args,
 				      unsigned int *quirks)
 {
 	const struct acpi_gpio_mapping *gm;
@@ -365,7 +365,7 @@ static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
 		if (!strcmp(name, gm->name) && gm->data && index < gm->size) {
 			const struct acpi_gpio_params *par = gm->data + index;
 
-			args->adev = adev;
+			args->fwnode = acpi_fwnode_handle(adev);
 			args->args[0] = par->crs_entry_index;
 			args->args[1] = par->line_index;
 			args->args[2] = par->active_low;
@@ -528,7 +528,7 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
 				     const char *propname, int index,
 				     struct acpi_gpio_lookup *lookup)
 {
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	unsigned int quirks = 0;
 	int ret;
 
@@ -549,6 +549,8 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
 	 * The property was found and resolved, so need to lookup the GPIO based
 	 * on returned args.
 	 */
+	if (!to_acpi_device_node(args.fwnode))
+		return -EINVAL;
 	if (args.nargs != 3)
 		return -EPROTO;
 
@@ -556,8 +558,9 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
 	lookup->pin_index = args.args[1];
 	lookup->active_low = !!args.args[2];
 
-	lookup->info.adev = args.adev;
+	lookup->info.adev = to_acpi_device_node(args.fwnode);
 	lookup->info.quirks = quirks;
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 8013d69c5ac4..8444234ed092 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1435,7 +1435,7 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
 		}
 		fwnode = &dsaf_node->fwnode;
 	} else if (is_acpi_device_node(dev->fwnode)) {
-		struct acpi_reference_args args;
+		struct fwnode_reference_args args;
 
 		ret = acpi_node_get_property_reference(dev->fwnode,
 						       "dsaf-handle", 0, &args);
@@ -1443,7 +1443,7 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
 			dev_err(dev, "could not find dsaf-handle\n");
 			return ret;
 		}
-		fwnode = acpi_fwnode_handle(args.adev);
+		fwnode = args.fwnode;
 	} else {
 		dev_err(dev, "cannot read data from DT or ACPI\n");
 		return -ENXIO;
@@ -4835,16 +4835,14 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
 				continue;
 			pdev = of_find_device_by_node(net_node);
 		} else if (is_acpi_device_node(dev->fwnode)) {
-			struct acpi_reference_args args;
-			struct fwnode_handle *fwnode;
+			struct fwnode_reference_args args;
 
 			ret = acpi_node_get_property_reference(dev->fwnode,
 							       "eth-handle",
 							       i, &args);
 			if (ret)
 				continue;
-			fwnode = acpi_fwnode_handle(args.adev);
-			pdev = hns_roce_find_pdev(fwnode);
+			pdev = hns_roce_find_pdev(args.fwnode);
 		} else {
 			dev_err(dev, "cannot read data from DT or ACPI\n");
 			return -ENXIO;
diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
index 3f77aa318035..82595cebc0b8 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -739,7 +739,7 @@ static struct fwnode_handle *v4l2_fwnode_reference_get_int_prop(
 	const char * const *props, unsigned int nprops)
 {
 	struct fwnode_reference_args fwnode_args;
-	unsigned int *args = fwnode_args.args;
+	u64 *args = fwnode_args.args;
 	struct fwnode_handle *child;
 	int ret;
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 3188f553da35..078a04dc1182 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -836,19 +836,19 @@ static void xgene_enet_adjust_link(struct net_device *ndev)
 #ifdef CONFIG_ACPI
 static struct acpi_device *acpi_phy_find_device(struct device *dev)
 {
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	struct fwnode_handle *fw_node;
 	int status;
 
 	fw_node = acpi_fwnode_handle(ACPI_COMPANION(dev));
 	status = acpi_node_get_property_reference(fw_node, "phy-handle", 0,
 						  &args);
-	if (ACPI_FAILURE(status)) {
+	if (ACPI_FAILURE(status) || !is_acpi_device_node(args.fwnode)) {
 		dev_dbg(dev, "No matching phy in ACPI table\n");
 		return NULL;
 	}
 
-	return args.adev;
+	return to_acpi_device_node(args.fwnode);
 }
 #endif
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 9dcc5765f11f..794516718d9d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -708,7 +708,7 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb,
 
 static int hns_mac_register_phy(struct hns_mac_cb *mac_cb)
 {
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	struct platform_device *pdev;
 	struct mii_bus *mii_bus;
 	int rc;
@@ -722,13 +722,15 @@ static int hns_mac_register_phy(struct hns_mac_cb *mac_cb)
 			mac_cb->fw_port, "mdio-node", 0, &args);
 	if (rc)
 		return rc;
+	if (!is_acpi_device_node(args.fwnode))
+		return -EINVAL;
 
 	addr = hns_mac_phy_parse_addr(mac_cb->dev, mac_cb->fw_port);
 	if (addr < 0)
 		return addr;
 
 	/* dev address in adev */
-	pdev = hns_dsaf_find_platform_device(acpi_fwnode_handle(args.adev));
+	pdev = hns_dsaf_find_platform_device(args.fwnode);
 	if (!pdev) {
 		dev_err(mac_cb->dev, "mac%d mdio pdev is NULL\n",
 			mac_cb->mac_id);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ef9ef703d13a..5608f807d7ba 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2377,7 +2377,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 		}
 		priv->fwnode = &ae_node->fwnode;
 	} else if (is_acpi_node(dev->fwnode)) {
-		struct acpi_reference_args args;
+		struct fwnode_reference_args args;
 
 		if (acpi_dev_found(hns_enet_acpi_match[0].id))
 			priv->enet_ver = AE_VERSION_1;
@@ -2393,7 +2393,11 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 			dev_err(dev, "not find ae-handle\n");
 			goto out_read_prop_fail;
 		}
-		priv->fwnode = acpi_fwnode_handle(args.adev);
+		if (!is_acpi_device_node(args.fwnode)) {
+			ret = -EINVAL;
+			goto out_read_prop_fail;
+		}
+		priv->fwnode = args.fwnode;
 	} else {
 		dev_err(dev, "cannot read cfg data from OF or acpi\n");
 		return -ENXIO;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e54f40974eb0..11cf39719fd8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1058,27 +1058,20 @@ static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
 
 /* Device properties */
 
-#define MAX_ACPI_REFERENCE_ARGS	8
-struct acpi_reference_args {
-	struct acpi_device *adev;
-	size_t nargs;
-	u64 args[MAX_ACPI_REFERENCE_ARGS];
-};
-
 #ifdef CONFIG_ACPI
 int acpi_dev_get_property(const struct acpi_device *adev, const char *name,
 			  acpi_object_type type, const union acpi_object **obj);
 int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 				const char *name, size_t index, size_t num_args,
-				struct acpi_reference_args *args);
+				struct fwnode_reference_args *args);
 
 static inline int acpi_node_get_property_reference(
 				const struct fwnode_handle *fwnode,
 				const char *name, size_t index,
-				struct acpi_reference_args *args)
+				struct fwnode_reference_args *args)
 {
 	return __acpi_node_get_property_reference(fwnode, name, index,
-		MAX_ACPI_REFERENCE_ARGS, args);
+		NR_FWNODE_REFERENCE_ARGS, args);
 }
 
 int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
@@ -1169,7 +1162,7 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
 static inline int
 __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 				const char *name, size_t index, size_t num_args,
-				struct acpi_reference_args *args)
+				struct fwnode_reference_args *args)
 {
 	return -ENXIO;
 }
@@ -1177,7 +1170,7 @@ __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 static inline int
 acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 				 const char *name, size_t index,
-				 struct acpi_reference_args *args)
+				 struct fwnode_reference_args *args)
 {
 	return -ENXIO;
 }
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 4fe8f289b3f6..faebf0ca0686 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -45,7 +45,7 @@ struct fwnode_endpoint {
 struct fwnode_reference_args {
 	struct fwnode_handle *fwnode;
 	unsigned int nargs;
-	unsigned int args[NR_FWNODE_REFERENCE_ARGS];
+	u64 args[NR_FWNODE_REFERENCE_ARGS];
 };
 
 /**
-- 
cgit v1.2.3


From 0ef7478639c5165a672f01b4024aacfffa951813 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 17 Jul 2018 17:19:14 +0300
Subject: ACPI: property: Make the ACPI graph API private

The fwnode graph API is preferred over the ACPI graph API. Therefore
make the ACPI graph API private, and use it as a back-end for the
fwnode graph API only.

Unused functionality is removed while the functionality actually used
remains the same.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 83 ++++++++++---------------------------------------
 include/linux/acpi.h    |  8 -----
 2 files changed, 16 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 5878d3678b38..19bdada64435 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1033,10 +1033,10 @@ struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode)
  * @prev: Previous endpoint node or %NULL to get the first
  *
  * Looks up next endpoint ACPI firmware node below a given @fwnode. Returns
- * %NULL if there is no next endpoint, ERR_PTR() in case of error. In case
- * of success the next endpoint is returned.
+ * %NULL if there is no next endpoint or in case of error. In case of success
+ * the next endpoint is returned.
  */
-struct fwnode_handle *acpi_graph_get_next_endpoint(
+static struct fwnode_handle *acpi_graph_get_next_endpoint(
 	const struct fwnode_handle *fwnode, struct fwnode_handle *prev)
 {
 	struct fwnode_handle *port = NULL;
@@ -1065,11 +1065,9 @@ struct fwnode_handle *acpi_graph_get_next_endpoint(
 			endpoint = fwnode_get_next_child_node(port, NULL);
 	}
 
-	if (endpoint) {
-		/* Endpoints must have "endpoint" property */
-		if (!fwnode_property_present(endpoint, "endpoint"))
-			return ERR_PTR(-EPROTO);
-	}
+	/* Endpoints must have "endpoint" property */
+	if (!fwnode_property_present(endpoint, "endpoint"))
+		return NULL;
 
 	return endpoint;
 }
@@ -1106,18 +1104,12 @@ static struct fwnode_handle *acpi_graph_get_child_prop_value(
 /**
  * acpi_graph_get_remote_enpoint - Parses and returns remote end of an endpoint
  * @fwnode: Endpoint firmware node pointing to a remote device
- * @parent: Firmware node of remote port parent is filled here if not %NULL
- * @port: Firmware node of remote port is filled here if not %NULL
  * @endpoint: Firmware node of remote endpoint is filled here if not %NULL
  *
- * Function parses remote end of ACPI firmware remote endpoint and fills in
- * fields requested by the caller. Returns %0 in case of success and
- * negative errno otherwise.
+ * Returns the remote endpoint corresponding to @__fwnode. NULL on error.
  */
-int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
-				   struct fwnode_handle **parent,
-				   struct fwnode_handle **port,
-				   struct fwnode_handle **endpoint)
+static struct fwnode_handle *
+acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode)
 {
 	struct fwnode_handle *fwnode;
 	unsigned int port_nr, endpoint_nr;
@@ -1128,47 +1120,27 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 	ret = acpi_node_get_property_reference(__fwnode, "remote-endpoint", 0,
 					       &args);
 	if (ret)
-		return ret;
+		return NULL;
 
 	/* Ensure this is a device node. */
 	if (!is_acpi_device_node(args.fwnode))
-		return -ENODEV;
+		return NULL;
 
 	/*
 	 * Always require two arguments with the reference: port and
 	 * endpoint indices.
 	 */
 	if (args.nargs != 2)
-		return -EPROTO;
+		return NULL;
 
 	fwnode = args.fwnode;
 	port_nr = args.args[0];
 	endpoint_nr = args.args[1];
 
-	if (parent)
-		*parent = fwnode;
-
-	if (!port && !endpoint)
-		return 0;
-
 	fwnode = acpi_graph_get_child_prop_value(fwnode, "port", port_nr);
-	if (!fwnode)
-		return -EPROTO;
-
-	if (port)
-		*port = fwnode;
-
-	if (!endpoint)
-		return 0;
-
-	fwnode = acpi_graph_get_child_prop_value(fwnode, "endpoint",
-						 endpoint_nr);
-	if (!fwnode)
-		return -EPROTO;
 
-	*endpoint = fwnode;
-
-	return 0;
+	return acpi_graph_get_child_prop_value(fwnode, "endpoint",
+					       endpoint_nr);
 }
 
 static bool acpi_fwnode_device_is_available(const struct fwnode_handle *fwnode)
@@ -1232,29 +1204,6 @@ acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
 						  args_count, args);
 }
 
-static struct fwnode_handle *
-acpi_fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
-				    struct fwnode_handle *prev)
-{
-	struct fwnode_handle *endpoint;
-
-	endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
-	if (IS_ERR(endpoint))
-		return NULL;
-
-	return endpoint;
-}
-
-static struct fwnode_handle *
-acpi_fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode)
-{
-	struct fwnode_handle *endpoint = NULL;
-
-	acpi_graph_get_remote_endpoint(fwnode, NULL, NULL, &endpoint);
-
-	return endpoint;
-}
-
 static struct fwnode_handle *
 acpi_fwnode_get_parent(struct fwnode_handle *fwnode)
 {
@@ -1295,9 +1244,9 @@ acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
 		.get_named_child_node = acpi_fwnode_get_named_child_node, \
 		.get_reference_args = acpi_fwnode_get_reference_args,	\
 		.graph_get_next_endpoint =				\
-			acpi_fwnode_graph_get_next_endpoint,		\
+			acpi_graph_get_next_endpoint,			\
 		.graph_get_remote_endpoint =				\
-			acpi_fwnode_graph_get_remote_endpoint,		\
+			acpi_graph_get_remote_endpoint,			\
 		.graph_get_port_parent = acpi_fwnode_get_parent,	\
 		.graph_parse_endpoint = acpi_fwnode_graph_parse_endpoint, \
 	};								\
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 11cf39719fd8..de8d3d3fa651 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1089,14 +1089,6 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 					    struct fwnode_handle *child);
 struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode);
 
-struct fwnode_handle *
-acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
-			     struct fwnode_handle *prev);
-int acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode,
-				   struct fwnode_handle **remote,
-				   struct fwnode_handle **port,
-				   struct fwnode_handle **endpoint);
-
 struct acpi_probe_entry;
 typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
 						 struct acpi_probe_entry *);
-- 
cgit v1.2.3


From 0e3fd810c4f41dbd63fb7caddc11684959176727 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Jul 2018 16:46:41 +0200
Subject: Documentation: document ktime_get_*() APIs

As Dave Chinner points out, we don't have a proper documentation for the
ktime_get() family of interfaces, making it rather unclear which of the
over 30 (!) interfaces one should actually use in a driver or elsewhere
in the kernel.

I wrote up an explanation from how I personally see the interfaces,
documenting what each of the functions do and hopefully making it a bit
clearer which should be used where.

This is the first time I tried writing .rst format documentation, so
in addition to any mistakes in the content, I probably also introduce
nonstandard formatting ;-)

I first tried to add an extra section to
Documentation/timers/timekeeping.txt, but this is currently not included
in the generated API, and it seems useful to have the API docs as part
of what gets generated in
https://www.kernel.org/doc/html/latest/core-api/index.html#core-utilities
instead, so I started a new file there.

I also considered adding the documentation inline in the
include/linux/timekeeping.h header, but couldn't figure out how to do
that in a way that would result both in helpful inline comments as
well as readable html output, so I settled for the latter, with
a small note pointing to it from the header.

Cc: Dave Chinner <david@fromorbit.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@kernel.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst       |   1 +
 Documentation/core-api/timekeeping.rst | 185 +++++++++++++++++++++++++++++++++
 include/linux/timekeeping.h            |  15 +++
 3 files changed, 201 insertions(+)
 create mode 100644 Documentation/core-api/timekeeping.rst

(limited to 'include/linux')

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index f5a66b72f984..989c97cc232a 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -28,6 +28,7 @@ Core utilities
    printk-formats
    circular-buffers
    gfp_mask-from-fs-io
+   timekeeping
 
 Interfaces for kernel debugging
 ===============================
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
new file mode 100644
index 000000000000..93cbeb9daec0
--- /dev/null
+++ b/Documentation/core-api/timekeeping.rst
@@ -0,0 +1,185 @@
+ktime accessors
+===============
+
+Device drivers can read the current time using ktime_get() and the many
+related functions declared in linux/timekeeping.h. As a rule of thumb,
+using an accessor with a shorter name is preferred over one with a longer
+name if both are equally fit for a particular use case.
+
+Basic ktime_t based interfaces
+------------------------------
+
+The recommended simplest form returns an opaque ktime_t, with variants
+that return time for different clock references:
+
+
+.. c:function:: ktime_t ktime_get( void )
+
+	CLOCK_MONOTONIC
+
+	Useful for reliable timestamps and measuring short time intervals
+	accurately. Starts at system boot time but stops during suspend.
+
+.. c:function:: ktime_t ktime_get_boottime( void )
+
+	CLOCK_BOOTTIME
+
+	Like ktime_get(), but does not stop when suspended. This can be
+	used e.g. for key expiration times that need to be synchronized
+	with other machines across a suspend operation.
+
+.. c:function:: ktime_t ktime_get_real( void )
+
+	CLOCK_REALTIME
+
+	Returns the time in relative to the UNIX epoch starting in 1970
+	using the Coordinated Universal Time (UTC), same as gettimeofday()
+	user space. This is used for all timestamps that need to
+	persist across a reboot, like inode times, but should be avoided
+	for internal uses, since it can jump backwards due to a leap
+	second update, NTP adjustment settimeofday() operation from user
+	space.
+
+.. c:function:: ktime_t ktime_get_clocktai( void )
+
+	 CLOCK_TAI
+
+	Like ktime_get_real(), but uses the International Atomic Time (TAI)
+	reference instead of UTC to avoid jumping on leap second updates.
+	This is rarely useful in the kernel.
+
+.. c:function:: ktime_t ktime_get_raw( void )
+
+	CLOCK_MONOTONIC_RAW
+
+	Like ktime_get(), but runs at the same rate as the hardware
+	clocksource without (NTP) adjustments for clock drift. This is
+	also rarely needed in the kernel.
+
+nanosecond, timespec64, and second output
+-----------------------------------------
+
+For all of the above, there are variants that return the time in a
+different format depending on what is required by the user:
+
+.. c:function:: u64 ktime_get_ns( void )
+		u64 ktime_get_boottime_ns( void )
+		u64 ktime_get_real_ns( void )
+		u64 ktime_get_tai_ns( void )
+		u64 ktime_get_raw_ns( void )
+
+	Same as the plain ktime_get functions, but returning a u64 number
+	of nanoseconds in the respective time reference, which may be
+	more convenient for some callers.
+
+.. c:function:: void ktime_get_ts64( struct timespec64 * )
+		void ktime_get_boottime_ts64( struct timespec64 * )
+		void ktime_get_real_ts64( struct timespec64 * )
+		void ktime_get_clocktai_ts64( struct timespec64 * )
+		void ktime_get_raw_ts64( struct timespec64 * )
+
+	Same above, but returns the time in a 'struct timespec64', split
+	into seconds and nanoseconds. This can avoid an extra division
+	when printing the time, or when passing it into an external
+	interface that expects a 'timespec' or 'timeval' structure.
+
+.. c:function:: time64_t ktime_get_seconds( void )
+		time64_t ktime_get_boottime_seconds( void )
+		time64_t ktime_get_real_seconds( void )
+		time64_t ktime_get_clocktai_seconds( void )
+		time64_t ktime_get_raw_seconds( void )
+
+	Return a coarse-grained version of the time as a scalar
+	time64_t. This avoids accessing the clock hardware and rounds
+	down the seconds to the full seconds of the last timer tick
+	using the respective reference.
+
+Coarse and fast_ns access
+-------------------------
+
+Some additional variants exist for more specialized cases:
+
+.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+		ktime_t ktime_get_coarse_real( void )
+		ktime_t ktime_get_coarse_clocktai( void )
+		ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
+		void ktime_get_coarse_boottime_ts64( struct timespec64 * )
+		void ktime_get_coarse_real_ts64( struct timespec64 * )
+		void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
+		void ktime_get_coarse_raw_ts64( struct timespec64 * )
+
+	These are quicker than the non-coarse versions, but less accurate,
+	corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
+	in user space, along with the equivalent boottime/tai/raw
+	timebase not available in user space.
+
+	The time returned here corresponds to the last timer tick, which
+	may be as much as 10ms in the past (for CONFIG_HZ=100), same as
+	reading the 'jiffies' variable.  These are only useful when called
+	in a fast path and one still expects better than second accuracy,
+	but can't easily use 'jiffies', e.g. for inode timestamps.
+	Skipping the hardware clock access saves around 100 CPU cycles
+	on most modern machines with a reliable cycle counter, but
+	up to several microseconds on older hardware with an external
+	clocksource.
+
+.. c:function:: u64 ktime_get_mono_fast_ns( void )
+		u64 ktime_get_raw_fast_ns( void )
+		u64 ktime_get_boot_fast_ns( void )
+		u64 ktime_get_real_fast_ns( void )
+
+	These variants are safe to call from any context, including from
+	a non-maskable interrupt (NMI) during a timekeeper update, and
+	while we are entering suspend with the clocksource powered down.
+	This is useful in some tracing or debugging code as well as
+	machine check reporting, but most drivers should never call them,
+	since the time is allowed to jump under certain conditions.
+
+Deprecated time interfaces
+--------------------------
+
+Older kernels used some other interfaces that are now being phased out
+but may appear in third-party drivers being ported here. In particular,
+all interfaces returning a 'struct timeval' or 'struct timespec' have
+been replaced because the tv_sec member overflows in year 2038 on 32-bit
+architectures. These are the recommended replacements:
+
+.. c:function:: void ktime_get_ts( struct timespec * )
+
+	Use ktime_get() or ktime_get_ts64() instead.
+
+.. c:function:: struct timeval do_gettimeofday( void )
+		struct timespec getnstimeofday( void )
+		struct timespec64 getnstimeofday64( void )
+		void ktime_get_real_ts( struct timespec * )
+
+	ktime_get_real_ts64() is a direct replacement, but consider using
+	monotonic time (ktime_get_ts64()) and/or a ktime_t based interface
+	(ktime_get()/ktime_get_real()).
+
+.. c:function:: struct timespec current_kernel_time( void )
+		struct timespec64 current_kernel_time64( void )
+		struct timespec get_monotonic_coarse( void )
+		struct timespec64 get_monotonic_coarse64( void )
+
+	These are replaced by ktime_get_coarse_real_ts64() and
+	ktime_get_coarse_ts64(). However, A lot of code that wants
+	coarse-grained times can use the simple 'jiffies' instead, while
+	some drivers may actually want the higher resolution accessors
+	these days.
+
+.. c:function:: struct timespec getrawmonotonic( void )
+		struct timespec64 getrawmonotonic64( void )
+		struct timespec timekeeping_clocktai( void )
+		struct timespec64 timekeeping_clocktai64( void )
+		struct timespec get_monotonic_boottime( void )
+		struct timespec64 get_monotonic_boottime64( void )
+
+	These are replaced by ktime_get_raw()/ktime_get_raw_ts64(),
+	ktime_get_clocktai()/ktime_get_clocktai_ts64() as well
+	as ktime_get_boottime()/ktime_get_boottime_ts64().
+	However, if the particular choice of clock source is not
+	important for the user, consider converting to
+	ktime_get()/ktime_get_ts64() instead for consistency.
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..947b1b8d2d01 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -20,6 +20,21 @@ extern int do_settimeofday64(const struct timespec64 *ts);
 extern int do_sys_settimeofday64(const struct timespec64 *tv,
 				 const struct timezone *tz);
 
+/*
+ * ktime_get() family: read the current time in a multitude of ways,
+ *
+ * The default time reference is CLOCK_MONOTONIC, starting at
+ * boot time but not counting the time spent in suspend.
+ * For other references, use the functions with "real", "clocktai",
+ * "boottime" and "raw" suffixes.
+ *
+ * To get the time in a different format, use the ones wit
+ * "ns", "ts64" and "seconds" suffix.
+ *
+ * See Documentation/core-api/timekeeping.rst for more details.
+ */
+
+
 /*
  * timespec64 based interfaces
  */
-- 
cgit v1.2.3


From c2a7d2a115525d3501d38e23d24875a79a07e15e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:16 -0700
Subject: filesystem-dax: Introduce dax_lock_mapping_entry()

In preparation for implementing support for memory poison (media error)
handling via dax mappings, implement a lock_page() equivalent. Poison
error handling requires rmap and needs guarantees that the page->mapping
association is maintained / valid (inode not freed) for the duration of
the lookup.

In the device-dax case it is sufficient to simply hold a dev_pagemap
reference. In the filesystem-dax case we need to use the entry lock.

Export the entry lock via dax_lock_mapping_entry() that uses
rcu_read_lock() to protect against the inode being freed, and
revalidates the page->mapping association under xa_lock().

Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 fs/dax.c            | 109 +++++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/dax.h |  13 +++++++
 2 files changed, 116 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 4de11ed463ce..57ec272038da 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
  *
  * Must be called with the i_pages lock held.
  */
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
-					pgoff_t index, void ***slotp)
+static void *__get_unlocked_mapping_entry(struct address_space *mapping,
+		pgoff_t index, void ***slotp, bool (*wait_fn)(void))
 {
 	void *entry, **slot;
 	struct wait_exceptional_entry_queue ewait;
@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 	ewait.wait.func = wake_exceptional_entry_func;
 
 	for (;;) {
+		bool revalidate;
+
 		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
 					  &slot);
 		if (!entry ||
@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
 		xa_unlock_irq(&mapping->i_pages);
-		schedule();
+		revalidate = wait_fn();
 		finish_wait(wq, &ewait.wait);
 		xa_lock_irq(&mapping->i_pages);
+		if (revalidate)
+			return ERR_PTR(-EAGAIN);
 	}
 }
 
-static void dax_unlock_mapping_entry(struct address_space *mapping,
-				     pgoff_t index)
+static bool entry_wait(void)
+{
+	schedule();
+	/*
+	 * Never return an ERR_PTR() from
+	 * __get_unlocked_mapping_entry(), just keep looping.
+	 */
+	return false;
+}
+
+static void *get_unlocked_mapping_entry(struct address_space *mapping,
+		pgoff_t index, void ***slotp)
+{
+	return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
+}
+
+static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 {
 	void *entry, **slot;
 
@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
 static void put_locked_mapping_entry(struct address_space *mapping,
 		pgoff_t index)
 {
-	dax_unlock_mapping_entry(mapping, index);
+	unlock_mapping_entry(mapping, index);
 }
 
 /*
@@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
 	return NULL;
 }
 
+static bool entry_wait_revalidate(void)
+{
+	rcu_read_unlock();
+	schedule();
+	rcu_read_lock();
+
+	/*
+	 * Tell __get_unlocked_mapping_entry() to take a break, we need
+	 * to revalidate page->mapping after dropping locks
+	 */
+	return true;
+}
+
+bool dax_lock_mapping_entry(struct page *page)
+{
+	pgoff_t index;
+	struct inode *inode;
+	bool did_lock = false;
+	void *entry = NULL, **slot;
+	struct address_space *mapping;
+
+	rcu_read_lock();
+	for (;;) {
+		mapping = READ_ONCE(page->mapping);
+
+		if (!dax_mapping(mapping))
+			break;
+
+		/*
+		 * In the device-dax case there's no need to lock, a
+		 * struct dev_pagemap pin is sufficient to keep the
+		 * inode alive, and we assume we have dev_pagemap pin
+		 * otherwise we would not have a valid pfn_to_page()
+		 * translation.
+		 */
+		inode = mapping->host;
+		if (S_ISCHR(inode->i_mode)) {
+			did_lock = true;
+			break;
+		}
+
+		xa_lock_irq(&mapping->i_pages);
+		if (mapping != page->mapping) {
+			xa_unlock_irq(&mapping->i_pages);
+			continue;
+		}
+		index = page->index;
+
+		entry = __get_unlocked_mapping_entry(mapping, index, &slot,
+				entry_wait_revalidate);
+		if (!entry) {
+			xa_unlock_irq(&mapping->i_pages);
+			break;
+		} else if (IS_ERR(entry)) {
+			WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
+			continue;
+		}
+		lock_slot(mapping, slot);
+		did_lock = true;
+		xa_unlock_irq(&mapping->i_pages);
+		break;
+	}
+	rcu_read_unlock();
+
+	return did_lock;
+}
+
+void dax_unlock_mapping_entry(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+
+	if (S_ISCHR(inode->i_mode))
+		return;
+
+	unlock_mapping_entry(mapping, page->index);
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
diff --git a/include/linux/dax.h b/include/linux/dax.h
index deb0f663252f..450b28db9533 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
+bool dax_lock_mapping_entry(struct page *page);
+void dax_unlock_mapping_entry(struct page *page);
 #else
 static inline bool bdev_dax_supported(struct block_device *bdev,
 		int blocksize)
@@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline bool dax_lock_mapping_entry(struct page *page)
+{
+	if (IS_DAX(page->mapping->host))
+		return true;
+	return false;
+}
+
+static inline void dax_unlock_mapping_entry(struct page *page)
+{
+}
 #endif
 
 int dax_read_lock(void);
-- 
cgit v1.2.3


From 6100e34b2526e1dc3dbcc47fea2677974d6aaea5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:21 -0700
Subject: mm, memory_failure: Teach memory_failure() about dev_pagemap pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mce: Uncorrected hardware memory error in user-access at af34214200
    {1}[Hardware Error]: It has been corrected by h/w and requires no further action
    mce: [Hardware Error]: Machine check events logged
    {1}[Hardware Error]: event severity: corrected
    Memory failure: 0xaf34214: reserved kernel page still referenced by 1 users
    [..]
    Memory failure: 0xaf34214: recovery action for reserved kernel page: Failed
    mce: Memory error not recovered

In contrast to typical memory, dev_pagemap pages may be dax mapped. With
dax there is no possibility to map in another page dynamically since dax
establishes 1:1 physical address to file offset associations. Also
dev_pagemap pages associated with NVDIMM / persistent memory devices can
internal remap/repair addresses with poison. While memory_failure()
assumes that it can discard typical poisoned pages and keep them
unmapped indefinitely, dev_pagemap pages may be returned to service
after the error is cleared.

Teach memory_failure() to detect and handle MEMORY_DEVICE_HOST
dev_pagemap pages that have poison consumed by userspace. Mark the
memory as UC instead of unmapping it completely to allow ongoing access
via the device driver (nd_pmem). Later, nd_pmem will grow support for
marking the page back to WB when the error is cleared.

Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/mm.h  |   1 +
 mm/memory-failure.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 124 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0fbb9ffe380..374e5e9284f7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2725,6 +2725,7 @@ enum mf_action_page_type {
 	MF_MSG_TRUNCATED_LRU,
 	MF_MSG_BUDDY,
 	MF_MSG_BUDDY_2ND,
+	MF_MSG_DAX,
 	MF_MSG_UNKNOWN,
 };
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8a81680d00dd..32a644d9c2ee 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -55,6 +55,7 @@
 #include <linux/hugetlb.h>
 #include <linux/memory_hotplug.h>
 #include <linux/mm_inline.h>
+#include <linux/memremap.h>
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
 #include "internal.h"
@@ -263,6 +264,40 @@ void shake_page(struct page *p, int access)
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
+static unsigned long dev_pagemap_mapping_shift(struct page *page,
+		struct vm_area_struct *vma)
+{
+	unsigned long address = vma_address(page, vma);
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(vma->vm_mm, address);
+	if (!pgd_present(*pgd))
+		return 0;
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		return 0;
+	pud = pud_offset(p4d, address);
+	if (!pud_present(*pud))
+		return 0;
+	if (pud_devmap(*pud))
+		return PUD_SHIFT;
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+	if (pmd_devmap(*pmd))
+		return PMD_SHIFT;
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if (pte_devmap(*pte))
+		return PAGE_SHIFT;
+	return 0;
+}
+
 /*
  * Failure handling: if we can't find or can't kill a process there's
  * not much we can do.	We just print a message and ignore otherwise.
@@ -292,7 +327,10 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 	}
 	tk->addr = page_address_in_vma(p, vma);
 	tk->addr_valid = 1;
-	tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
+	if (is_zone_device_page(p))
+		tk->size_shift = dev_pagemap_mapping_shift(p, vma);
+	else
+		tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
 
 	/*
 	 * In theory we don't have to kill when the page was
@@ -300,7 +338,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 	 * likely very rare kill anyways just out of paranoia, but use
 	 * a SIGKILL because the error is not contained anymore.
 	 */
-	if (tk->addr == -EFAULT) {
+	if (tk->addr == -EFAULT || tk->size_shift == 0) {
 		pr_info("Memory failure: Unable to find user space address %lx in %s\n",
 			page_to_pfn(p), tsk->comm);
 		tk->addr_valid = 0;
@@ -514,6 +552,7 @@ static const char * const action_page_types[] = {
 	[MF_MSG_TRUNCATED_LRU]		= "already truncated LRU page",
 	[MF_MSG_BUDDY]			= "free buddy page",
 	[MF_MSG_BUDDY_2ND]		= "free buddy page (2nd try)",
+	[MF_MSG_DAX]			= "dax page",
 	[MF_MSG_UNKNOWN]		= "unknown page",
 };
 
@@ -1111,6 +1150,83 @@ out:
 	return res;
 }
 
+static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
+		struct dev_pagemap *pgmap)
+{
+	struct page *page = pfn_to_page(pfn);
+	const bool unmap_success = true;
+	unsigned long size = 0;
+	struct to_kill *tk;
+	LIST_HEAD(tokill);
+	int rc = -EBUSY;
+	loff_t start;
+
+	/*
+	 * Prevent the inode from being freed while we are interrogating
+	 * the address_space, typically this would be handled by
+	 * lock_page(), but dax pages do not use the page lock. This
+	 * also prevents changes to the mapping of this pfn until
+	 * poison signaling is complete.
+	 */
+	if (!dax_lock_mapping_entry(page))
+		goto out;
+
+	if (hwpoison_filter(page)) {
+		rc = 0;
+		goto unlock;
+	}
+
+	switch (pgmap->type) {
+	case MEMORY_DEVICE_PRIVATE:
+	case MEMORY_DEVICE_PUBLIC:
+		/*
+		 * TODO: Handle HMM pages which may need coordination
+		 * with device-side memory.
+		 */
+		goto unlock;
+	default:
+		break;
+	}
+
+	/*
+	 * Use this flag as an indication that the dax page has been
+	 * remapped UC to prevent speculative consumption of poison.
+	 */
+	SetPageHWPoison(page);
+
+	/*
+	 * Unlike System-RAM there is no possibility to swap in a
+	 * different physical page at a given virtual address, so all
+	 * userspace consumption of ZONE_DEVICE memory necessitates
+	 * SIGBUS (i.e. MF_MUST_KILL)
+	 */
+	flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
+	collect_procs(page, &tokill, flags & MF_ACTION_REQUIRED);
+
+	list_for_each_entry(tk, &tokill, nd)
+		if (tk->size_shift)
+			size = max(size, 1UL << tk->size_shift);
+	if (size) {
+		/*
+		 * Unmap the largest mapping to avoid breaking up
+		 * device-dax mappings which are constant size. The
+		 * actual size of the mapping being torn down is
+		 * communicated in siginfo, see kill_proc()
+		 */
+		start = (page->index << PAGE_SHIFT) & ~(size - 1);
+		unmap_mapping_range(page->mapping, start, start + size, 0);
+	}
+	kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
+	rc = 0;
+unlock:
+	dax_unlock_mapping_entry(page);
+out:
+	/* drop pgmap ref acquired in caller */
+	put_dev_pagemap(pgmap);
+	action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
+	return rc;
+}
+
 /**
  * memory_failure - Handle memory failure of a page.
  * @pfn: Page Number of the corrupted page
@@ -1133,6 +1249,7 @@ int memory_failure(unsigned long pfn, int flags)
 	struct page *p;
 	struct page *hpage;
 	struct page *orig_head;
+	struct dev_pagemap *pgmap;
 	int res;
 	unsigned long page_flags;
 
@@ -1145,6 +1262,10 @@ int memory_failure(unsigned long pfn, int flags)
 		return -ENXIO;
 	}
 
+	pgmap = get_dev_pagemap(pfn, NULL);
+	if (pgmap)
+		return memory_failure_dev_pagemap(pfn, flags, pgmap);
+
 	p = pfn_to_page(pfn);
 	if (PageHuge(p))
 		return memory_failure_hugetlb(pfn, flags);
-- 
cgit v1.2.3


From f53aaa31cce7b543e407da7e97690a700206f7b9 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Mon, 16 Jul 2018 15:22:01 -0700
Subject: net/mlx5: FW tracer, implement tracer logic

Implement FW tracer logic and registers access, initialization and
cleanup flows.

Initializing the tracer will be part of load one flow, as multiple
PFs will try to acquire ownership but only one will succeed and will
be the tracer owner.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c   | 196 +++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h   |  66 +++++++
 include/linux/mlx5/driver.h                        |   3 +
 3 files changed, 265 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
new file mode 100644
index 000000000000..3ecbf06b4d71
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "fw_tracer.h"
+
+static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
+{
+	u32 *string_db_base_address_out = tracer->str_db.base_address_out;
+	u32 *string_db_size_out = tracer->str_db.size_out;
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+	void *mtrc_cap_sp;
+	int err, i;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CAP, 0, 0);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n",
+			       err);
+		return err;
+	}
+
+	if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
+		mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
+		return -ENOTSUPP;
+	}
+
+	tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
+	tracer->str_db.first_string_trace =
+			MLX5_GET(mtrc_cap, out, first_string_trace);
+	tracer->str_db.num_string_trace =
+			MLX5_GET(mtrc_cap, out, num_string_trace);
+	tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
+	tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+	for (i = 0; i < tracer->str_db.num_string_db; i++) {
+		mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
+		string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param,
+							 mtrc_cap_sp,
+							 string_db_base_address);
+		string_db_size_out[i] = MLX5_GET(mtrc_string_db_param,
+						 mtrc_cap_sp, string_db_size);
+	}
+
+	return err;
+}
+
+static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer,
+					  u32 *out, u32 out_size,
+					  u8 trace_owner)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+	MLX5_SET(mtrc_cap, in, trace_owner, trace_owner);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size,
+				    MLX5_REG_MTRC_CAP, 0, 1);
+}
+
+static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+	int err;
+
+	err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+					     MLX5_FW_TRACER_ACQUIRE_OWNERSHIP);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n",
+			       err);
+		return err;
+	}
+
+	tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+	if (!tracer->owner)
+		return -EBUSY;
+
+	return 0;
+}
+
+static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
+{
+	u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+	mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+				       MLX5_FW_TRACER_RELEASE_OWNERSHIP);
+	tracer->owner = false;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
+						     ownership_change_work);
+	struct mlx5_core_dev *dev = tracer->dev;
+	int err;
+
+	if (tracer->owner) {
+		mlx5_fw_tracer_ownership_release(tracer);
+		return;
+	}
+
+	err = mlx5_fw_tracer_ownership_acquire(tracer);
+	if (err) {
+		mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+		return;
+	}
+}
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_tracer *tracer = NULL;
+	int err;
+
+	if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) {
+		mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n");
+		return NULL;
+	}
+
+	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+	if (!tracer)
+		return ERR_PTR(-ENOMEM);
+
+	tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer");
+	if (!tracer->work_queue) {
+		err = -ENOMEM;
+		goto free_tracer;
+	}
+
+	tracer->dev = dev;
+
+	INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
+
+	err = mlx5_query_mtrc_caps(tracer);
+	if (err) {
+		mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+		goto destroy_workqueue;
+	}
+
+	mlx5_fw_tracer_ownership_change(&tracer->ownership_change_work);
+
+	return tracer;
+
+destroy_workqueue:
+	tracer->dev = NULL;
+	destroy_workqueue(tracer->work_queue);
+free_tracer:
+	kfree(tracer);
+	return ERR_PTR(err);
+}
+
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
+{
+	if (!tracer)
+		return;
+
+	cancel_work_sync(&tracer->ownership_change_work);
+
+	if (tracer->owner)
+		mlx5_fw_tracer_ownership_release(tracer);
+
+	flush_workqueue(tracer->work_queue);
+	destroy_workqueue(tracer->work_queue);
+	kfree(tracer);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
new file mode 100644
index 000000000000..721c41a5e827
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_TRACER_H__
+#define __LIB_TRACER_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define STRINGS_DB_SECTIONS_NUM 8
+
+struct mlx5_fw_tracer {
+	struct mlx5_core_dev *dev;
+	bool owner;
+	u8   trc_ver;
+	struct workqueue_struct *work_queue;
+	struct work_struct ownership_change_work;
+
+	/* Strings DB */
+	struct {
+		u8 first_string_trace;
+		u8 num_string_trace;
+		u32 num_string_db;
+		u32 base_address_out[STRINGS_DB_SECTIONS_NUM];
+		u32 size_out[STRINGS_DB_SECTIONS_NUM];
+	} str_db;
+};
+
+enum mlx5_fw_tracer_ownership_state {
+	MLX5_FW_TRACER_RELEASE_OWNERSHIP,
+	MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
+};
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+
+#endif
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 957199c20a0f..86cb0ebf92fa 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -816,6 +816,8 @@ struct mlx5_clock {
 	struct mlx5_pps            pps_info;
 };
 
+struct mlx5_fw_tracer;
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
 	/* sync pci state */
@@ -860,6 +862,7 @@ struct mlx5_core_dev {
 	struct mlx5_clock        clock;
 	struct mlx5_ib_clock_info  *clock_info;
 	struct page             *clock_info_page;
+	struct mlx5_fw_tracer   *tracer;
 };
 
 struct mlx5_db {
-- 
cgit v1.2.3


From c71ad41ccb0c29fce95149b74786574b354c9dda Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Wed, 7 Feb 2018 11:08:56 +0200
Subject: net/mlx5: FW tracer, events handling

The tracer has one event, event 0x26, with two subtypes:
- Subtype 0: Ownership change
- Subtype 1: Traces available

An ownership change occurs in the following cases:
1- Owner releases his ownership, in this case, an event will be
sent to inform others to reattempt acquire ownership.
2- Ownership was taken by a higher priority tool, in this case
the owner should understand that it lost ownership, and go through
tear down flow.

The second subtype indicates that there are traces in the trace buffer,
in this case, the driver polls the tracer buffer for new traces, parse
them and prepares the messages for printing.

The HW starts tracing from the first address in the tracer buffer.
Driver receives an event notifying that new trace block exists.
HW posts a timestamp event at the last 8B of every 256B block.
Comparing the timestamp to the last handled timestamp would indicate
that this is a new trace block. Once the new timestamp is detected,
the entire block is considered valid.

Block validation and parsing, should be done after copying the current
block to a different location, in order to avoid block overwritten
during processing.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c   | 268 ++++++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h   |  71 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  11 +
 include/linux/mlx5/device.h                        |   7 +
 4 files changed, 347 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d6cc27b0ff34..bd887d1d3396 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -318,25 +318,244 @@ out:
 	return;
 }
 
-static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev)
 {
-	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
-						     ownership_change_work);
-	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
 	int err;
 
-	if (tracer->owner) {
-		mlx5_fw_tracer_ownership_release(tracer);
+	MLX5_SET(mtrc_ctrl, in, arm_event, 1);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CTRL, 0, 1);
+	if (err)
+		mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err);
+}
+
+static void poll_trace(struct mlx5_fw_tracer *tracer,
+		       struct tracer_event *tracer_event, u64 *trace)
+{
+	u32 timestamp_low, timestamp_mid, timestamp_high, urts;
+
+	tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id);
+	tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost);
+
+	switch (tracer_event->event_id) {
+	case TRACER_EVENT_TYPE_TIMESTAMP:
+		tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP;
+		urts = MLX5_GET(tracer_timestamp_event, trace, urts);
+		if (tracer->trc_ver == 0)
+			tracer_event->timestamp_event.unreliable = !!(urts >> 2);
+		else
+			tracer_event->timestamp_event.unreliable = !!(urts & 1);
+
+		timestamp_low = MLX5_GET(tracer_timestamp_event,
+					 trace, timestamp7_0);
+		timestamp_mid = MLX5_GET(tracer_timestamp_event,
+					 trace, timestamp39_8);
+		timestamp_high = MLX5_GET(tracer_timestamp_event,
+					  trace, timestamp52_40);
+
+		tracer_event->timestamp_event.timestamp =
+				((u64)timestamp_high << 40) |
+				((u64)timestamp_mid << 8) |
+				(u64)timestamp_low;
+		break;
+	default:
+		if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
+		    tracer_event->event_id <= tracer->str_db.first_string_trace +
+					      tracer->str_db.num_string_trace) {
+			tracer_event->type = TRACER_EVENT_TYPE_STRING;
+			tracer_event->string_event.timestamp =
+				MLX5_GET(tracer_string_event, trace, timestamp);
+			tracer_event->string_event.string_param =
+				MLX5_GET(tracer_string_event, trace, string_param);
+			tracer_event->string_event.tmsn =
+				MLX5_GET(tracer_string_event, trace, tmsn);
+			tracer_event->string_event.tdsn =
+				MLX5_GET(tracer_string_event, trace, tdsn);
+		} else {
+			tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED;
+		}
+		break;
+	}
+}
+
+static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event)
+{
+	struct tracer_event tracer_event;
+	u8 event_id;
+
+	event_id = MLX5_GET(tracer_event, ts_event, event_id);
+
+	if (event_id == TRACER_EVENT_TYPE_TIMESTAMP)
+		poll_trace(tracer, &tracer_event, ts_event);
+	else
+		tracer_event.timestamp_event.timestamp = 0;
+
+	return tracer_event.timestamp_event.timestamp;
+}
+
+static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+{
+	struct mlx5_fw_tracer *tracer =
+			container_of(work, struct mlx5_fw_tracer, handle_traces_work);
+	u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK];
+	u32 block_count, start_offset, prev_start_offset, prev_consumer_index;
+	u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event);
+	struct tracer_event tracer_event;
+	struct mlx5_core_dev *dev;
+	int i;
+
+	if (!tracer->owner)
 		return;
+
+	dev = tracer->dev;
+	block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
+	start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+	/* Copy the block to local buffer to avoid HW override while being processed*/
+	memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+	       TRACER_BLOCK_SIZE_BYTE);
+
+	block_timestamp =
+		get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+
+	while (block_timestamp > tracer->last_timestamp) {
+		/* Check block override if its not the first block */
+		if (!tracer->last_timestamp) {
+			u64 *ts_event;
+			/* To avoid block override be the HW in case of buffer
+			 * wraparound, the time stamp of the previous block
+			 * should be compared to the last timestamp handled
+			 * by the driver.
+			 */
+			prev_consumer_index =
+				(tracer->buff.consumer_index - 1) & (block_count - 1);
+			prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+			ts_event = tracer->buff.log_buf + prev_start_offset +
+				   (TRACES_PER_BLOCK - 1) * trace_event_size;
+			last_block_timestamp = get_block_timestamp(tracer, ts_event);
+			/* If previous timestamp different from last stored
+			 * timestamp then there is a good chance that the
+			 * current buffer is overwritten and therefore should
+			 * not be parsed.
+			 */
+			if (tracer->last_timestamp != last_block_timestamp) {
+				mlx5_core_warn(dev, "FWTracer: Events were lost\n");
+				tracer->last_timestamp = block_timestamp;
+				tracer->buff.consumer_index =
+					(tracer->buff.consumer_index + 1) & (block_count - 1);
+				break;
+			}
+		}
+
+		/* Parse events */
+		for (i = 0; i < TRACES_PER_BLOCK ; i++)
+			poll_trace(tracer, &tracer_event, &tmp_trace_block[i]);
+
+		tracer->buff.consumer_index =
+			(tracer->buff.consumer_index + 1) & (block_count - 1);
+
+		tracer->last_timestamp = block_timestamp;
+		start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+		memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+		       TRACER_BLOCK_SIZE_BYTE);
+		block_timestamp = get_block_timestamp(tracer,
+						      &tmp_trace_block[TRACES_PER_BLOCK - 1]);
 	}
 
+	mlx5_fw_tracer_arm(dev);
+}
+
+static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+	int err;
+
+	MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
+	MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
+		 ilog2(TRACER_BUFFER_PAGE_NUM));
+	MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CONF, 0, 1);
+	if (err)
+		mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+
+	return err;
+}
+
+static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+	int err;
+
+	MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS);
+	MLX5_SET(mtrc_ctrl, in, trace_status, status);
+	MLX5_SET(mtrc_ctrl, in, arm_event, arm);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CTRL, 0, 1);
+
+	if (!err && status)
+		tracer->last_timestamp = 0;
+
+	return err;
+}
+
+static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	int err;
+
 	err = mlx5_fw_tracer_ownership_acquire(tracer);
 	if (err) {
 		mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+		/* Don't fail since ownership can be acquired on a later FW event */
+		return 0;
+	}
+
+	err = mlx5_fw_tracer_set_mtrc_conf(tracer);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err);
+		goto release_ownership;
+	}
+
+	/* enable tracer & trace events */
+	err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err);
+		goto release_ownership;
+	}
+
+	return 0;
+
+release_ownership:
+	mlx5_fw_tracer_ownership_release(tracer);
+	return err;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+	struct mlx5_fw_tracer *tracer =
+		container_of(work, struct mlx5_fw_tracer, ownership_change_work);
+
+	if (tracer->owner) {
+		tracer->owner = false;
+		tracer->buff.consumer_index = 0;
 		return;
 	}
+
+	mlx5_fw_tracer_start(tracer);
 }
 
+/* Create software resources (Buffers, etc ..) */
 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fw_tracer *tracer = NULL;
@@ -361,6 +580,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 
 	INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
 	INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db);
+	INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces);
+
 
 	err = mlx5_query_mtrc_caps(tracer);
 	if (err) {
@@ -392,6 +613,9 @@ free_tracer:
 	return ERR_PTR(err);
 }
 
+/* Create HW resources + start tracer
+ * must be called before Async EQ is created
+ */
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 {
 	struct mlx5_core_dev *dev;
@@ -417,22 +641,25 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 		goto err_dealloc_pd;
 	}
 
-	err = mlx5_fw_tracer_ownership_acquire(tracer);
-	if (err) /* Don't fail since ownership can be acquired on a later FW event */
-		mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+	mlx5_fw_tracer_start(tracer);
 
 	return 0;
+
 err_dealloc_pd:
 	mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
 	return err;
 }
 
+/* Stop tracer + Cleanup HW resources
+ * must be called after Async EQ is destroyed
+ */
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 {
 	if (IS_ERR_OR_NULL(tracer))
 		return;
 
 	cancel_work_sync(&tracer->ownership_change_work);
+	cancel_work_sync(&tracer->handle_traces_work);
 
 	if (tracer->owner)
 		mlx5_fw_tracer_ownership_release(tracer);
@@ -441,6 +668,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 	mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
 }
 
+/* Free software resources (Buffers, etc ..) */
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 {
 	if (IS_ERR_OR_NULL(tracer))
@@ -454,4 +682,26 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 	kfree(tracer);
 }
 
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+	struct mlx5_fw_tracer *tracer = dev->tracer;
+
+	if (!tracer)
+		return;
+
+	switch (eqe->sub_type) {
+	case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
+		if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
+			queue_work(tracer->work_queue, &tracer->ownership_change_work);
+		break;
+	case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
+		if (likely(tracer->str_db.loaded))
+			queue_work(tracer->work_queue, &tracer->handle_traces_work);
+		break;
+	default:
+		mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
+			      eqe->sub_type);
+	}
+}
+
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 66cb7e7ada28..3915e91486b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -43,6 +43,9 @@
 #define TRACER_BUFFER_CHUNK 4096
 #define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK)
 
+#define TRACER_BLOCK_SIZE_BYTE 256
+#define TRACES_PER_BLOCK 32
+
 struct mlx5_fw_tracer {
 	struct mlx5_core_dev *dev;
 	bool owner;
@@ -69,8 +72,11 @@ struct mlx5_fw_tracer {
 		dma_addr_t dma;
 		u32 size;
 		struct mlx5_core_mkey mkey;
-
+		u32 consumer_index;
 	} buff;
+
+	u64 last_timestamp;
+	struct work_struct handle_traces_work;
 };
 
 enum mlx5_fw_tracer_ownership_state {
@@ -78,7 +84,70 @@ enum mlx5_fw_tracer_ownership_state {
 	MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
 };
 
+enum tracer_ctrl_fields_select {
+	TRACE_STATUS = 1 << 0,
+};
+
+enum tracer_event_type {
+	TRACER_EVENT_TYPE_STRING,
+	TRACER_EVENT_TYPE_TIMESTAMP = 0xFF,
+	TRACER_EVENT_TYPE_UNRECOGNIZED,
+};
+
+enum tracing_mode {
+	TRACE_TO_MEMORY = 1 << 0,
+};
+
+struct tracer_timestamp_event {
+	u64        timestamp;
+	u8         unreliable;
+};
+
+struct tracer_string_event {
+	u32        timestamp;
+	u32        tmsn;
+	u32        tdsn;
+	u32        string_param;
+};
+
+struct tracer_event {
+	bool      lost_event;
+	u32       type;
+	u8        event_id;
+	union {
+		struct tracer_string_event string_event;
+		struct tracer_timestamp_event timestamp_event;
+	};
+};
+
+struct mlx5_ifc_tracer_event_bits {
+	u8         lost[0x1];
+	u8         timestamp[0x7];
+	u8         event_id[0x8];
+	u8         event_data[0x30];
+};
+
+struct mlx5_ifc_tracer_string_event_bits {
+	u8         lost[0x1];
+	u8         timestamp[0x7];
+	u8         event_id[0x8];
+	u8         tmsn[0xd];
+	u8         tdsn[0x3];
+	u8         string_param[0x20];
+};
+
+struct mlx5_ifc_tracer_timestamp_event_bits {
+	u8         timestamp7_0[0x8];
+	u8         event_id[0x8];
+	u8         urts[0x3];
+	u8         timestamp52_40[0xd];
+	u8         timestamp39_8[0x20];
+};
+
 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { return; }
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 406c23862f5f..7669b4380779 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -40,6 +40,7 @@
 #include "mlx5_core.h"
 #include "fpga/core.h"
 #include "eswitch.h"
+#include "diag/fw_tracer.h"
 
 enum {
 	MLX5_EQE_SIZE		= sizeof(struct mlx5_eqe),
@@ -168,6 +169,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
 	case MLX5_EVENT_TYPE_GENERAL_EVENT:
 		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+	case MLX5_EVENT_TYPE_DEVICE_TRACER:
+		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
 	default:
 		return "Unrecognized event";
 	}
@@ -576,6 +579,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 		case MLX5_EVENT_TYPE_GENERAL_EVENT:
 			general_event_handler(dev, eqe);
 			break;
+
+		case MLX5_EVENT_TYPE_DEVICE_TRACER:
+			mlx5_fw_tracer_event(dev, eqe);
+			break;
+
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
 				       eqe->type, eq->eqn);
@@ -853,6 +861,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, temp_warn_event))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
 
+	if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0566c6a94805..d489494b0a84 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -332,6 +332,13 @@ enum mlx5_event {
 
 	MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
 	MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
+
+	MLX5_EVENT_TYPE_DEVICE_TRACER      = 0x26,
+};
+
+enum {
+	MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE = 0x0,
+	MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE = 0x1,
 };
 
 enum {
-- 
cgit v1.2.3


From 51bbf9bee34ff5d4006d266f24a54dc9c1669eb5 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 19 Jul 2018 17:27:43 -0500
Subject: PCI: hotplug: Demidlayer registration with the core

When a hotplug driver calls pci_hp_register(), all steps necessary for
registration are carried out in one go, including creation of a kobject
and addition to sysfs.  That's a problem for pciehp once it's converted
to enable/disable the slot exclusively from the IRQ thread:  The thread
needs to be spawned after creation of the kobject (because it uses the
kobject's name), but before addition to sysfs (because it will handle
enable/disable requests submitted via sysfs).

pci_hp_deregister() does offer a ->release callback that's invoked
after deletion from sysfs and before destruction of the kobject.  But
because pci_hp_register() doesn't offer a counterpart, hotplug drivers'
->probe and ->remove code becomes asymmetric, which is error prone
as recently discovered use-after-free bugs in pciehp's ->remove hook
have shown.

In a sense, this appears to be a case of the midlayer antipattern:

   "The core thesis of the "midlayer mistake" is that midlayers are
    bad and should not exist.  That common functionality which it is
    so tempting to put in a midlayer should instead be provided as
    library routines which can [be] used, augmented, or ignored by
    each bottom level driver independently.  Thus every subsystem
    that supports multiple implementations (or drivers) should
    provide a very thin top layer which calls directly into the
    bottom layer drivers, and a rich library of support code that
    eases the implementation of those drivers.  This library is
    available to, but not forced upon, those drivers."
        --  Neil Brown (2009), https://lwn.net/Articles/336262/

The presence of midlayer traits in the PCI hotplug core might be ascribed
to its age:  When it was introduced in February 2002, the blessings of a
library approach might not have been well known:
https://git.kernel.org/tglx/history/c/a8a2069f432c

For comparison, the driver core does offer split functions for creating
a kobject (device_initialize()) and addition to sysfs (device_add()) as
an alternative to carrying out everything at once (device_register()).
This was introduced in October 2002:
https://git.kernel.org/tglx/history/c/8b290eb19962

The odd ->release callback in the PCI hotplug core was added in 2003:
https://git.kernel.org/tglx/history/c/69f8d663b595

Clearly, a library approach would not force every hotplug driver to
implement a ->release callback, but rather allow the driver to remove
the sysfs files, release its data structures and finally destroy the
kobject.  Alternatively, a driver may choose to remove everything with
pci_hp_deregister(), then release its data structures.

To this end, offer drivers pci_hp_initialize() and pci_hp_add() as a
split-up version of pci_hp_register().  Likewise, offer pci_hp_del()
and pci_hp_destroy() as a split-up version of pci_hp_deregister().

Eliminate the ->release callback and move its code into each driver's
teardown routine.

Declare pci_hp_deregister() void, in keeping with the usual kernel
pattern that enablement can fail, but disablement cannot.  It only
returned an error if the caller passed in a NULL pointer or a slot which
has never or is no longer registered or is sharing its name with another
slot.  Those would be bugs, so WARN about them.  Few hotplug drivers
actually checked the return value and those that did only printed a
useless error message to dmesg.  Remove that.

For most drivers the conversion was straightforward since it doesn't
matter whether the code in the ->release callback is executed before or
after destruction of the kobject.  But in the case of ibmphp, it was
unclear to me whether setting slot_cur->ctrl and slot_cur->bus_on to
NULL needs to happen before the kobject is destroyed, so I erred on
the side of caution and ensured that the order stays the same.  Another
nontrivial case is pnv_php, I've found the list and kref logic difficult
to understand, however my impression was that it is safe to delete the
list element and drop the references until after the kobject is
destroyed.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>  # drivers/platform/x86
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Scott Murray <scott@spiteful.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Gavin Shan <gwshan@linux.vnet.ibm.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Corentin Chary <corentin.chary@gmail.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Andy Shevchenko <andy@infradead.org>
---
 drivers/pci/hotplug/acpiphp_core.c      |  22 +----
 drivers/pci/hotplug/cpci_hotplug_core.c |  14 +---
 drivers/pci/hotplug/cpqphp_core.c       |  16 +---
 drivers/pci/hotplug/ibmphp_core.c       |  15 +++-
 drivers/pci/hotplug/ibmphp_ebda.c       |  20 -----
 drivers/pci/hotplug/pci_hotplug_core.c  | 139 +++++++++++++++++++++++---------
 drivers/pci/hotplug/pciehp_core.c       |  19 ++---
 drivers/pci/hotplug/pnv_php.c           |   5 +-
 drivers/pci/hotplug/rpaphp_core.c       |   2 +-
 drivers/pci/hotplug/rpaphp_slot.c       |  13 +--
 drivers/pci/hotplug/s390_pci_hpc.c      |  13 +--
 drivers/pci/hotplug/sgi_hotplug.c       |   9 ++-
 drivers/pci/hotplug/shpchp_core.c       |  20 +----
 drivers/platform/x86/asus-wmi.c         |  12 +--
 drivers/platform/x86/eeepc-laptop.c     |  12 +--
 include/linux/pci_hotplug.h             |  15 +++-
 16 files changed, 168 insertions(+), 178 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index 12b5655fd390..ad32ffbc4b91 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -254,20 +254,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-/**
- * release_slot - free up the memory used by a slot
- * @hotplug_slot: slot to free
- */
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
-
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 /* callback routine to initialize 'struct slot' for each slot */
 int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 				  unsigned int sun)
@@ -287,7 +273,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 	slot->hotplug_slot->info = &slot->info;
 
 	slot->hotplug_slot->private = slot;
-	slot->hotplug_slot->release = &release_slot;
 	slot->hotplug_slot->ops = &acpi_hotplug_slot_ops;
 
 	slot->acpi_slot = acpiphp_slot;
@@ -324,13 +309,12 @@ error:
 void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
 {
 	struct slot *slot = acpiphp_slot->slot;
-	int retval = 0;
 
 	pr_info("Slot [%s] unregistered\n", slot_name(slot));
 
-	retval = pci_hp_deregister(slot->hotplug_slot);
-	if (retval)
-		pr_err("pci_hp_deregister failed with error %d\n", retval);
+	pci_hp_deregister(slot->hotplug_slot);
+	kfree(slot->hotplug_slot);
+	kfree(slot);
 }
 
 
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 07b533adc9df..52a339baf06c 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -195,10 +195,8 @@ get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
+static void release_slot(struct slot *slot)
 {
-	struct slot *slot = hotplug_slot->private;
-
 	kfree(slot->hotplug_slot->info);
 	kfree(slot->hotplug_slot);
 	pci_dev_put(slot->dev);
@@ -253,7 +251,6 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 		snprintf(name, SLOT_NAME_SIZE, "%02x:%02x", bus->number, i);
 
 		hotplug_slot->private = slot;
-		hotplug_slot->release = &release_slot;
 		hotplug_slot->ops = &cpci_hotplug_slot_ops;
 
 		/*
@@ -308,12 +305,8 @@ cpci_hp_unregister_bus(struct pci_bus *bus)
 			slots--;
 
 			dbg("deregistering slot %s", slot_name(slot));
-			status = pci_hp_deregister(slot->hotplug_slot);
-			if (status) {
-				err("pci_hp_deregister failed with error %d",
-				    status);
-				break;
-			}
+			pci_hp_deregister(slot->hotplug_slot);
+			release_slot(slot);
 		}
 	}
 	up_write(&list_rwsem);
@@ -623,6 +616,7 @@ cleanup_slots(void)
 	list_for_each_entry_safe(slot, tmp, &slot_list, slot_list) {
 		list_del(&slot->slot_list);
 		pci_hp_deregister(slot->hotplug_slot);
+		release_slot(slot);
 	}
 cleanup_null:
 	up_write(&list_rwsem);
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 1797e36ec586..5a06636e910a 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -266,17 +266,6 @@ static void __iomem *get_SMBIOS_entry(void __iomem *smbios_start,
 	return previous;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
-
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 static int ctrl_slot_cleanup(struct controller *ctrl)
 {
 	struct slot *old_slot, *next_slot;
@@ -285,9 +274,11 @@ static int ctrl_slot_cleanup(struct controller *ctrl)
 	ctrl->slot = NULL;
 
 	while (old_slot) {
-		/* memory will be freed by the release_slot callback */
 		next_slot = old_slot->next;
 		pci_hp_deregister(old_slot->hotplug_slot);
+		kfree(old_slot->hotplug_slot->info);
+		kfree(old_slot->hotplug_slot);
+		kfree(old_slot);
 		old_slot = next_slot;
 	}
 
@@ -678,7 +669,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 			((read_slot_enable(ctrl) << 2) >> ctrl_slot) & 0x04;
 
 		/* register this slot with the hotplug pci core */
-		hotplug_slot->release = &release_slot;
 		hotplug_slot->private = slot;
 		snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
 		hotplug_slot->ops = &cpqphp_hotplug_slot_ops;
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 1869b0411ce0..4ea57e9019f1 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -673,7 +673,20 @@ static void free_slots(void)
 
 	list_for_each_entry_safe(slot_cur, next, &ibmphp_slot_head,
 				 ibm_slot_list) {
-		pci_hp_deregister(slot_cur->hotplug_slot);
+		pci_hp_del(slot_cur->hotplug_slot);
+		slot_cur->ctrl = NULL;
+		slot_cur->bus_on = NULL;
+
+		/*
+		 * We don't want to actually remove the resources,
+		 * since ibmphp_free_resources() will do just that.
+		 */
+		ibmphp_unconfigure_card(&slot_cur, -1);
+
+		pci_hp_destroy(slot_cur->hotplug_slot);
+		kfree(slot_cur->hotplug_slot->info);
+		kfree(slot_cur->hotplug_slot);
+		kfree(slot_cur);
 	}
 	debug("%s -- exit\n", __func__);
 }
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 64549aa24c0f..6f8e90e3ec08 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -699,25 +699,6 @@ static int fillslotinfo(struct hotplug_slot *hotplug_slot)
 	return rc;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot;
-
-	if (!hotplug_slot || !hotplug_slot->private)
-		return;
-
-	slot = hotplug_slot->private;
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	slot->ctrl = NULL;
-	slot->bus_on = NULL;
-
-	/* we don't want to actually remove the resources, since free_resources will do just that */
-	ibmphp_unconfigure_card(&slot, -1);
-
-	kfree(slot);
-}
-
 static struct pci_driver ibmphp_driver;
 
 /*
@@ -941,7 +922,6 @@ static int __init ebda_rsrc_controller(void)
 			tmp_slot->hotplug_slot = hp_slot_ptr;
 
 			hp_slot_ptr->private = tmp_slot;
-			hp_slot_ptr->release = release_slot;
 
 			rc = fillslotinfo(hp_slot_ptr);
 			if (rc)
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index fd93783a87b0..90fde5f106d8 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -396,8 +396,9 @@ static struct hotplug_slot *get_slot_from_name(const char *name)
  * @owner: caller module owner
  * @mod_name: caller module name
  *
- * Registers a hotplug slot with the pci hotplug subsystem, which will allow
- * userspace interaction to the slot.
+ * Prepares a hotplug slot for in-kernel use and immediately publishes it to
+ * user space in one go.  Drivers may alternatively carry out the two steps
+ * separately by invoking pci_hp_initialize() and pci_hp_add().
  *
  * Returns 0 if successful, anything else for an error.
  */
@@ -406,54 +407,91 @@ int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus,
 		      struct module *owner, const char *mod_name)
 {
 	int result;
+
+	result = __pci_hp_initialize(slot, bus, devnr, name, owner, mod_name);
+	if (result)
+		return result;
+
+	result = pci_hp_add(slot);
+	if (result)
+		pci_hp_destroy(slot);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(__pci_hp_register);
+
+/**
+ * __pci_hp_initialize - prepare hotplug slot for in-kernel use
+ * @slot: pointer to the &struct hotplug_slot to initialize
+ * @bus: bus this slot is on
+ * @devnr: slot number
+ * @name: name registered with kobject core
+ * @owner: caller module owner
+ * @mod_name: caller module name
+ *
+ * Allocate and fill in a PCI slot for use by a hotplug driver.  Once this has
+ * been called, the driver may invoke hotplug_slot_name() to get the slot's
+ * unique name.  The driver must be prepared to handle a ->reset_slot callback
+ * from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus,
+			int devnr, const char *name, struct module *owner,
+			const char *mod_name)
+{
 	struct pci_slot *pci_slot;
 
 	if (slot == NULL)
 		return -ENODEV;
 	if ((slot->info == NULL) || (slot->ops == NULL))
 		return -EINVAL;
-	if (slot->release == NULL) {
-		dbg("Why are you trying to register a hotplug slot without a proper release function?\n");
-		return -EINVAL;
-	}
 
 	slot->ops->owner = owner;
 	slot->ops->mod_name = mod_name;
 
-	mutex_lock(&pci_hp_mutex);
 	/*
 	 * No problems if we call this interface from both ACPI_PCI_SLOT
 	 * driver and call it here again. If we've already created the
 	 * pci_slot, the interface will simply bump the refcount.
 	 */
 	pci_slot = pci_create_slot(bus, devnr, name, slot);
-	if (IS_ERR(pci_slot)) {
-		result = PTR_ERR(pci_slot);
-		goto out;
-	}
+	if (IS_ERR(pci_slot))
+		return PTR_ERR(pci_slot);
 
 	slot->pci_slot = pci_slot;
 	pci_slot->hotplug = slot;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__pci_hp_initialize);
 
-	list_add(&slot->slot_list, &pci_hotplug_slot_list);
+/**
+ * pci_hp_add - publish hotplug slot to user space
+ * @slot: pointer to the &struct hotplug_slot to publish
+ *
+ * Make a hotplug slot's sysfs interface available and inform user space of its
+ * addition by sending a uevent.  The hotplug driver must be prepared to handle
+ * all &struct hotplug_slot_ops callbacks from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+int pci_hp_add(struct hotplug_slot *slot)
+{
+	struct pci_slot *pci_slot = slot->pci_slot;
+	int result;
 
 	result = fs_add_slot(pci_slot);
 	if (result)
-		goto err_list_del;
+		return result;
 
 	kobject_uevent(&pci_slot->kobj, KOBJ_ADD);
-	dbg("Added slot %s to the list\n", name);
-	goto out;
-
-err_list_del:
-	list_del(&slot->slot_list);
-	pci_slot->hotplug = NULL;
-	pci_destroy_slot(pci_slot);
-out:
+	mutex_lock(&pci_hp_mutex);
+	list_add(&slot->slot_list, &pci_hotplug_slot_list);
 	mutex_unlock(&pci_hp_mutex);
-	return result;
+	dbg("Added slot %s to the list\n", hotplug_slot_name(slot));
+	return 0;
 }
-EXPORT_SYMBOL_GPL(__pci_hp_register);
+EXPORT_SYMBOL_GPL(pci_hp_add);
 
 /**
  * pci_hp_deregister - deregister a hotplug_slot with the PCI hotplug subsystem
@@ -464,35 +502,62 @@ EXPORT_SYMBOL_GPL(__pci_hp_register);
  *
  * Returns 0 if successful, anything else for an error.
  */
-int pci_hp_deregister(struct hotplug_slot *slot)
+void pci_hp_deregister(struct hotplug_slot *slot)
+{
+	pci_hp_del(slot);
+	pci_hp_destroy(slot);
+}
+EXPORT_SYMBOL_GPL(pci_hp_deregister);
+
+/**
+ * pci_hp_del - unpublish hotplug slot from user space
+ * @slot: pointer to the &struct hotplug_slot to unpublish
+ *
+ * Remove a hotplug slot's sysfs interface.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+void pci_hp_del(struct hotplug_slot *slot)
 {
 	struct hotplug_slot *temp;
-	struct pci_slot *pci_slot;
 
-	if (!slot)
-		return -ENODEV;
+	if (WARN_ON(!slot))
+		return;
 
 	mutex_lock(&pci_hp_mutex);
 	temp = get_slot_from_name(hotplug_slot_name(slot));
-	if (temp != slot) {
+	if (WARN_ON(temp != slot)) {
 		mutex_unlock(&pci_hp_mutex);
-		return -ENODEV;
+		return;
 	}
 
 	list_del(&slot->slot_list);
-
-	pci_slot = slot->pci_slot;
-	fs_remove_slot(pci_slot);
+	mutex_unlock(&pci_hp_mutex);
 	dbg("Removed slot %s from the list\n", hotplug_slot_name(slot));
+	fs_remove_slot(slot->pci_slot);
+}
+EXPORT_SYMBOL_GPL(pci_hp_del);
+
+/**
+ * pci_hp_destroy - remove hotplug slot from in-kernel use
+ * @slot: pointer to the &struct hotplug_slot to destroy
+ *
+ * Destroy a PCI slot used by a hotplug driver.  Once this has been called,
+ * the driver may no longer invoke hotplug_slot_name() to get the slot's
+ * unique name.  The driver no longer needs to handle a ->reset_slot callback
+ * from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+void pci_hp_destroy(struct hotplug_slot *slot)
+{
+	struct pci_slot *pci_slot = slot->pci_slot;
 
-	slot->release(slot);
+	slot->pci_slot = NULL;
 	pci_slot->hotplug = NULL;
 	pci_destroy_slot(pci_slot);
-	mutex_unlock(&pci_hp_mutex);
-
-	return 0;
 }
-EXPORT_SYMBOL_GPL(pci_hp_deregister);
+EXPORT_SYMBOL_GPL(pci_hp_destroy);
 
 /**
  * pci_hp_change_slot_info - changes the slot's information structure in the core
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index b360645377c2..37d8f81e548f 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -56,17 +56,6 @@ static int get_latch_status(struct hotplug_slot *slot, u8 *value);
 static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
 static int reset_slot(struct hotplug_slot *slot, int probe);
 
-/**
- * release_slot - free up the memory used by a slot
- * @hotplug_slot: slot to free
- */
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	kfree(hotplug_slot->ops);
-	kfree(hotplug_slot->info);
-	kfree(hotplug_slot);
-}
-
 static int init_slot(struct controller *ctrl)
 {
 	struct slot *slot = ctrl->slot;
@@ -107,7 +96,6 @@ static int init_slot(struct controller *ctrl)
 	/* register this slot with the hotplug pci core */
 	hotplug->info = info;
 	hotplug->private = slot;
-	hotplug->release = &release_slot;
 	hotplug->ops = ops;
 	slot->hotplug_slot = hotplug;
 	snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
@@ -127,7 +115,12 @@ out:
 
 static void cleanup_slot(struct controller *ctrl)
 {
-	pci_hp_deregister(ctrl->slot->hotplug_slot);
+	struct hotplug_slot *hotplug_slot = ctrl->slot->hotplug_slot;
+
+	pci_hp_deregister(hotplug_slot);
+	kfree(hotplug_slot->ops);
+	kfree(hotplug_slot->info);
+	kfree(hotplug_slot);
 }
 
 /*
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 6c2e8d7307c6..3276a5e4c430 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -538,9 +538,8 @@ static struct hotplug_slot_ops php_slot_ops = {
 	.disable_slot		= pnv_php_disable_slot,
 };
 
-static void pnv_php_release(struct hotplug_slot *slot)
+static void pnv_php_release(struct pnv_php_slot *php_slot)
 {
-	struct pnv_php_slot *php_slot = slot->private;
 	unsigned long flags;
 
 	/* Remove from global or child list */
@@ -596,7 +595,6 @@ static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
 	php_slot->power_state_check     = false;
 	php_slot->slot.ops              = &php_slot_ops;
 	php_slot->slot.info             = &php_slot->slot_info;
-	php_slot->slot.release          = pnv_php_release;
 	php_slot->slot.private          = php_slot;
 
 	INIT_LIST_HEAD(&php_slot->children);
@@ -924,6 +922,7 @@ static void pnv_php_unregister_one(struct device_node *dn)
 
 	php_slot->state = PNV_PHP_STATE_OFFLINE;
 	pci_hp_deregister(&php_slot->slot);
+	pnv_php_release(php_slot);
 	pnv_php_put_slot(php_slot);
 }
 
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index fb5e0845429d..857c358b727b 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -404,13 +404,13 @@ static void __exit cleanup_slots(void)
 	/*
 	 * Unregister all of our slots with the pci_hotplug subsystem,
 	 * and free up all memory that we had allocated.
-	 * memory will be freed in release_slot callback.
 	 */
 
 	list_for_each_entry_safe(slot, next, &rpaphp_slot_head,
 				 rpaphp_slot_list) {
 		list_del(&slot->rpaphp_slot_list);
 		pci_hp_deregister(slot->hotplug_slot);
+		dealloc_slot_struct(slot);
 	}
 	return;
 }
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index 3840a2075e6a..b916c8e4372d 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -19,12 +19,6 @@
 #include "rpaphp.h"
 
 /* free up the memory used by a slot */
-static void rpaphp_release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = (struct slot *) hotplug_slot->private;
-	dealloc_slot_struct(slot);
-}
-
 void dealloc_slot_struct(struct slot *slot)
 {
 	kfree(slot->hotplug_slot->info);
@@ -56,7 +50,6 @@ struct slot *alloc_slot_struct(struct device_node *dn,
 	slot->power_domain = power_domain;
 	slot->hotplug_slot->private = slot;
 	slot->hotplug_slot->ops = &rpaphp_hotplug_slot_ops;
-	slot->hotplug_slot->release = &rpaphp_release_slot;
 
 	return (slot);
 
@@ -90,10 +83,8 @@ int rpaphp_deregister_slot(struct slot *slot)
 		__func__, slot->name);
 
 	list_del(&slot->rpaphp_slot_list);
-
-	retval = pci_hp_deregister(php_slot);
-	if (retval)
-		err("Problem unregistering a slot %s\n", slot->name);
+	pci_hp_deregister(php_slot);
+	dealloc_slot_struct(slot);
 
 	dbg("%s - Exit: rc[%d]\n", __func__, retval);
 	return retval;
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index ffdc2977395d..93b5341d282c 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -130,15 +130,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 static struct hotplug_slot_ops s390_hotplug_slot_ops = {
 	.enable_slot =		enable_slot,
 	.disable_slot =		disable_slot,
@@ -175,7 +166,6 @@ int zpci_init_slot(struct zpci_dev *zdev)
 	hotplug_slot->info = info;
 
 	hotplug_slot->ops = &s390_hotplug_slot_ops;
-	hotplug_slot->release = &release_slot;
 
 	get_power_status(hotplug_slot, &info->power_status);
 	get_adapter_status(hotplug_slot, &info->adapter_status);
@@ -209,5 +199,8 @@ void zpci_exit_slot(struct zpci_dev *zdev)
 			continue;
 		list_del(&slot->slot_list);
 		pci_hp_deregister(slot->hotplug_slot);
+		kfree(slot->hotplug_slot->info);
+		kfree(slot->hotplug_slot);
+		kfree(slot);
 	}
 }
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index 78b6bdbb3a39..babd23409f61 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -628,7 +628,6 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
 			goto alloc_err;
 		}
 		bss_hotplug_slot->ops = &sn_hotplug_slot_ops;
-		bss_hotplug_slot->release = &sn_release_slot;
 
 		rc = pci_hp_register(bss_hotplug_slot, pci_bus, device, name);
 		if (rc)
@@ -656,8 +655,10 @@ alloc_err:
 		sn_release_slot(bss_hotplug_slot);
 
 	/* destroy anything else on the list */
-	while ((bss_hotplug_slot = sn_hp_destroy()))
+	while ((bss_hotplug_slot = sn_hp_destroy())) {
 		pci_hp_deregister(bss_hotplug_slot);
+		sn_release_slot(bss_hotplug_slot);
+	}
 
 	return rc;
 }
@@ -703,8 +704,10 @@ static void __exit sn_pci_hotplug_exit(void)
 {
 	struct hotplug_slot *bss_hotplug_slot;
 
-	while ((bss_hotplug_slot = sn_hp_destroy()))
+	while ((bss_hotplug_slot = sn_hp_destroy())) {
 		pci_hp_deregister(bss_hotplug_slot);
+		sn_release_slot(bss_hotplug_slot);
+	}
 
 	if (!list_empty(&sn_hp_list))
 		printk(KERN_ERR "%s: internal list is not empty\n", __FILE__);
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 8e3c6ce12f31..97cee23f3d51 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -61,22 +61,6 @@ static struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
 	.get_adapter_status =	get_adapter_status,
 };
 
-/**
- * release_slot - free up the memory used by a slot
- * @hotplug_slot: slot to free
- */
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
-		 __func__, slot_name(slot));
-
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 static int init_slots(struct controller *ctrl)
 {
 	struct slot *slot;
@@ -125,7 +109,6 @@ static int init_slots(struct controller *ctrl)
 
 		/* register this slot with the hotplug pci core */
 		hotplug_slot->private = slot;
-		hotplug_slot->release = &release_slot;
 		snprintf(name, SLOT_NAME_SIZE, "%d", slot->number);
 		hotplug_slot->ops = &shpchp_hotplug_slot_ops;
 
@@ -171,6 +154,9 @@ void cleanup_slots(struct controller *ctrl)
 		cancel_delayed_work(&slot->work);
 		destroy_workqueue(slot->wq);
 		pci_hp_deregister(slot->hotplug_slot);
+		kfree(slot->hotplug_slot->info);
+		kfree(slot->hotplug_slot);
+		kfree(slot);
 	}
 }
 
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 3d523ca64694..d67f32a29bb4 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -858,12 +858,6 @@ static int asus_get_adapter_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-static void asus_cleanup_pci_hotplug(struct hotplug_slot *hotplug_slot)
-{
-	kfree(hotplug_slot->info);
-	kfree(hotplug_slot);
-}
-
 static struct hotplug_slot_ops asus_hotplug_slot_ops = {
 	.owner = THIS_MODULE,
 	.get_adapter_status = asus_get_adapter_status,
@@ -905,7 +899,6 @@ static int asus_setup_pci_hotplug(struct asus_wmi *asus)
 		goto error_info;
 
 	asus->hotplug_slot->private = asus;
-	asus->hotplug_slot->release = &asus_cleanup_pci_hotplug;
 	asus->hotplug_slot->ops = &asus_hotplug_slot_ops;
 	asus_get_adapter_status(asus->hotplug_slot,
 				&asus->hotplug_slot->info->adapter_status);
@@ -1051,8 +1044,11 @@ static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
 	 * asus_unregister_rfkill_notifier()
 	 */
 	asus_rfkill_hotplug(asus);
-	if (asus->hotplug_slot)
+	if (asus->hotplug_slot) {
 		pci_hp_deregister(asus->hotplug_slot);
+		kfree(asus->hotplug_slot->info);
+		kfree(asus->hotplug_slot);
+	}
 	if (asus->hotplug_workqueue)
 		destroy_workqueue(asus->hotplug_workqueue);
 
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 4c38904a8a32..a4bbf6ecd1f0 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -726,12 +726,6 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-static void eeepc_cleanup_pci_hotplug(struct hotplug_slot *hotplug_slot)
-{
-	kfree(hotplug_slot->info);
-	kfree(hotplug_slot);
-}
-
 static struct hotplug_slot_ops eeepc_hotplug_slot_ops = {
 	.owner = THIS_MODULE,
 	.get_adapter_status = eeepc_get_adapter_status,
@@ -758,7 +752,6 @@ static int eeepc_setup_pci_hotplug(struct eeepc_laptop *eeepc)
 		goto error_info;
 
 	eeepc->hotplug_slot->private = eeepc;
-	eeepc->hotplug_slot->release = &eeepc_cleanup_pci_hotplug;
 	eeepc->hotplug_slot->ops = &eeepc_hotplug_slot_ops;
 	eeepc_get_adapter_status(eeepc->hotplug_slot,
 				 &eeepc->hotplug_slot->info->adapter_status);
@@ -837,8 +830,11 @@ static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc)
 		eeepc->wlan_rfkill = NULL;
 	}
 
-	if (eeepc->hotplug_slot)
+	if (eeepc->hotplug_slot) {
 		pci_hp_deregister(eeepc->hotplug_slot);
+		kfree(eeepc->hotplug_slot->info);
+		kfree(eeepc->hotplug_slot);
+	}
 
 	if (eeepc->bluetooth_rfkill) {
 		rfkill_unregister(eeepc->bluetooth_rfkill);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index cf5e22103f68..a6d6650a0490 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -80,15 +80,12 @@ struct hotplug_slot_info {
  * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot
  * @info: pointer to the &struct hotplug_slot_info for the initial values for
  * this slot.
- * @release: called during pci_hp_deregister to free memory allocated in a
- * hotplug_slot structure.
  * @private: used by the hotplug pci controller driver to store whatever it
  * needs.
  */
 struct hotplug_slot {
 	struct hotplug_slot_ops		*ops;
 	struct hotplug_slot_info	*info;
-	void (*release) (struct hotplug_slot *slot);
 	void				*private;
 
 	/* Variables below this are for use only by the hotplug pci core. */
@@ -104,13 +101,23 @@ static inline const char *hotplug_slot_name(const struct hotplug_slot *slot)
 int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *pbus, int nr,
 		      const char *name, struct module *owner,
 		      const char *mod_name);
-int pci_hp_deregister(struct hotplug_slot *slot);
+int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus, int nr,
+			const char *name, struct module *owner,
+			const char *mod_name);
+int pci_hp_add(struct hotplug_slot *slot);
+
+void pci_hp_del(struct hotplug_slot *slot);
+void pci_hp_destroy(struct hotplug_slot *slot);
+void pci_hp_deregister(struct hotplug_slot *slot);
+
 int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot,
 					 struct hotplug_slot_info *info);
 
 /* use a define to avoid include chaining to get THIS_MODULE & friends */
 #define pci_hp_register(slot, pbus, devnr, name) \
 	__pci_hp_register(slot, pbus, devnr, name, THIS_MODULE, KBUILD_MODNAME)
+#define pci_hp_initialize(slot, bus, nr, name) \
+	__pci_hp_initialize(slot, bus, nr, name, THIS_MODULE, KBUILD_MODNAME)
 
 /* PCI Setting Record (Type 0) */
 struct hpp_type0 {
-- 
cgit v1.2.3


From 664000b6bb4352295dc774108a1fc87c4a1ad0e3 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Tue, 19 Jun 2018 15:23:36 +0300
Subject: net/mlx5: Add support for flow table destination number

Add support to set a destination from a flow table number.
This functionality will be used in downstream patches from this
series by the DEVX stuff.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../mellanox/mlx5/core/diag/fs_tracepoint.c        |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 24 ++++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  4 +++-
 include/linux/mlx5/fs.h                            |  1 +
 include/linux/mlx5/mlx5_ifc.h                      |  1 +
 5 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index b3820a34e773..0f11fff32a9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -240,6 +240,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
 		trace_seq_printf(p, "ft=%p\n", dst->ft);
 		break;
+	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+		trace_seq_printf(p, "ft_num=%u\n", dst->ft_num);
+		break;
 	case MLX5_FLOW_DESTINATION_TYPE_TIR:
 		trace_seq_printf(p, "tir=%u\n", dst->tir_num);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 6a62b84e57f4..8e01f818021b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -368,18 +368,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 		int list_size = 0;
 
 		list_for_each_entry(dst, &fte->node.children, node.list) {
-			unsigned int id;
+			unsigned int id, type = dst->dest_attr.type;
 
-			if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+			if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
 				continue;
 
-			MLX5_SET(dest_format_struct, in_dests, destination_type,
-				 dst->dest_attr.type);
-			if (dst->dest_attr.type ==
-			    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+			switch (type) {
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+				id = dst->dest_attr.ft_num;
+				type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
 				id = dst->dest_attr.ft->id;
-			} else if (dst->dest_attr.type ==
-				   MLX5_FLOW_DESTINATION_TYPE_VPORT) {
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
 				id = dst->dest_attr.vport.num;
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id_valid,
@@ -387,9 +389,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id,
 					 dst->dest_attr.vport.vhca_id);
-			} else {
+				break;
+			default:
 				id = dst->dest_attr.tir_num;
 			}
+
+			MLX5_SET(dest_format_struct, in_dests, destination_type,
+				 type);
 			MLX5_SET(dest_format_struct, in_dests, destination_id, id);
 			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
 			list_size++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a8d7d00f92f6..0d8378243903 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1356,7 +1356,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
 		     d1->ft == d2->ft) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
-		     d1->tir_num == d2->tir_num))
+		     d1->tir_num == d2->tir_num) ||
+		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
+		     d1->ft_num == d2->ft_num))
 			return true;
 	}
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index c40f2fc68655..af0592400499 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -89,6 +89,7 @@ struct mlx5_flow_destination {
 	enum mlx5_flow_destination_type	type;
 	union {
 		u32			tir_num;
+		u32			ft_num;
 		struct mlx5_flow_table	*ft;
 		struct mlx5_fc		*counter;
 		struct {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ae12120ef021..c14b81559505 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1180,6 +1180,7 @@ enum mlx5_flow_destination_type {
 
 	MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
+	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101,
 };
 
 struct mlx5_ifc_dest_format_struct_bits {
-- 
cgit v1.2.3


From 3730cf4dd70b6a36e48d58a862120311411b77f5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 24 Jul 2018 12:47:56 +0200
Subject: netlink: do not store start function in netlink_cb

->start() is called once when dump is being initialized, there is no
need to store it in netlink_cb.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 1 -
 net/netlink/af_netlink.c | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index f3075d6c7e82..71f121b66ca8 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -170,7 +170,6 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 struct netlink_callback {
 	struct sk_buff		*skb;
 	const struct nlmsghdr	*nlh;
-	int			(*start)(struct netlink_callback *);
 	int			(*dump)(struct sk_buff * skb,
 					struct netlink_callback *cb);
 	int			(*done)(struct netlink_callback *cb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 393573a99a5a..f6ac7693d2cc 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2300,7 +2300,6 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 
 	cb = &nlk->cb;
 	memset(cb, 0, sizeof(*cb));
-	cb->start = control->start;
 	cb->dump = control->dump;
 	cb->done = control->done;
 	cb->nlh = nlh;
@@ -2309,8 +2308,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	cb->min_dump_alloc = control->min_dump_alloc;
 	cb->skb = skb;
 
-	if (cb->start) {
-		ret = cb->start(cb);
+	if (control->start) {
+		ret = control->start(cb);
 		if (ret)
 			goto error_put;
 	}
-- 
cgit v1.2.3


From 10ac86884b4d7642a235f9da61367c3f8d2ab2ff Mon Sep 17 00:00:00 2001
From: Yisheng Xie <ysxie@foxmail.com>
Date: Tue, 24 Jul 2018 19:11:26 +0200
Subject: fbcon: introduce for_each_registered_fb() helper

Following pattern is often used:

 for (i = 0; i < FB_MAX; i++) {
        if (registered_fb[i]) {
                ...
        }
 }

Therefore, as Andy's suggestion, for_each_registered_fb() helper can
be introduced to make the code easier to read and write by reducing
indentation level. It also saves few lines of code in each occurrence.

This patch convert all part here at the same time.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yisheng Xie <ysxie@foxmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: David Lechner <david@lechnology.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/core/fbcon.c | 31 +++++++++++--------------------
 drivers/video/fbdev/core/fbmem.c |  4 +---
 include/linux/fb.h               |  4 ++++
 3 files changed, 16 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 5fb156bdcf4e..e30d3a138c97 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2234,8 +2234,8 @@ static int fbcon_switch(struct vc_data *vc)
 	 *
 	 * info->currcon = vc->vc_num;
 	 */
-	for (i = 0; i < FB_MAX; i++) {
-		if (registered_fb[i] != NULL && registered_fb[i]->fbcon_par) {
+	for_each_registered_fb(i) {
+		if (registered_fb[i]->fbcon_par) {
 			struct fbcon_ops *o = registered_fb[i]->fbcon_par;
 
 			o->currcon = vc->vc_num;
@@ -3124,11 +3124,9 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 	if (idx == info_idx) {
 		info_idx = -1;
 
-		for (i = 0; i < FB_MAX; i++) {
-			if (registered_fb[i] != NULL) {
-				info_idx = i;
-				break;
-			}
+		for_each_registered_fb(i) {
+			info_idx = i;
+			break;
 		}
 	}
 
@@ -3609,10 +3607,8 @@ static int fbcon_output_notifier(struct notifier_block *nb,
 	deferred_takeover = false;
 	logo_shown = FBCON_LOGO_DONTSHOW;
 
-	for (i = 0; i < FB_MAX; i++) {
-		if (registered_fb[i])
-			fbcon_fb_registered(registered_fb[i]);
-	}
+	for_each_registered_fb(i)
+		fbcon_fb_registered(registered_fb[i]);
 
 	return NOTIFY_OK;
 }
@@ -3638,11 +3634,9 @@ static void fbcon_start(void)
 
 		console_lock();
 
-		for (i = 0; i < FB_MAX; i++) {
-			if (registered_fb[i] != NULL) {
-				info_idx = i;
-				break;
-			}
+		for_each_registered_fb(i) {
+			info_idx = i;
+			break;
 		}
 
 		do_fbcon_takeover(0);
@@ -3669,15 +3663,12 @@ static void fbcon_exit(void)
 	kfree((void *)softback_buf);
 	softback_buf = 0UL;
 
-	for (i = 0; i < FB_MAX; i++) {
+	for_each_registered_fb(i) {
 		int pending = 0;
 
 		mapped = 0;
 		info = registered_fb[i];
 
-		if (info == NULL)
-			continue;
-
 		if (info->queue.func)
 			pending = cancel_work_sync(&info->queue);
 		DPRINTK("fbcon: %s pending work\n", (pending ? "canceled" :
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 609438d2465b..645c6ac932d6 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1593,10 +1593,8 @@ static int do_remove_conflicting_framebuffers(struct apertures_struct *a,
 	int i, ret;
 
 	/* check all firmware fbs and kick off if the base addr overlaps */
-	for (i = 0 ; i < FB_MAX; i++) {
+	for_each_registered_fb(i) {
 		struct apertures_struct *gen_aper;
-		if (!registered_fb[i])
-			continue;
 
 		if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
 			continue;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index aa74a228bb92..fd31e6f24b8d 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -650,6 +650,10 @@ extern struct fb_info *registered_fb[FB_MAX];
 extern int num_registered_fb;
 extern struct class *fb_class;
 
+#define for_each_registered_fb(i)		\
+	for (i = 0; i < FB_MAX; i++)		\
+		if (!registered_fb[i]) {} else
+
 extern int lock_fb_info(struct fb_info *info);
 
 static inline void unlock_fb_info(struct fb_info *info)
-- 
cgit v1.2.3


From 3d910ef71732d15f251ca9b679da634adca72f5a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <ysxie@foxmail.com>
Date: Tue, 24 Jul 2018 19:11:26 +0200
Subject: fbdev: fix typo in comment

Change beeng to being and occured to occurred.

Signed-off-by: Yisheng Xie <ysxie@foxmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: David Lechner <david@lechnology.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/fb.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index fd31e6f24b8d..3e7e75383d32 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -126,7 +126,7 @@ struct fb_cursor_user {
 
 /*	The resolution of the passed in fb_info about to change */ 
 #define FB_EVENT_MODE_CHANGE		0x01
-/*	The display on this fb_info is beeing suspended, no access to the
+/*	The display on this fb_info is being suspended, no access to the
  *	framebuffer is allowed any more after that call returns
  */
 #define FB_EVENT_SUSPEND		0x02
@@ -159,9 +159,9 @@ struct fb_cursor_user {
 #define FB_EVENT_FB_UNBIND              0x0E
 /*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */
 #define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
-/*      A hardware display blank early change occured */
+/*      A hardware display blank early change occurred */
 #define FB_EARLY_EVENT_BLANK		0x10
-/*      A hardware display blank revert early change occured */
+/*      A hardware display blank revert early change occurred */
 #define FB_R_EARLY_EVENT_BLANK		0x11
 
 struct fb_event {
-- 
cgit v1.2.3


From e5cda42c16d89720c29678f51d95a119490ef7d8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 23 Jul 2018 19:52:57 +0200
Subject: ARM: exynos: Define EINT_WAKEUP_MASK registers for S5Pv210 and
 Exynos5433

S5Pv210 and Exynos5433/Exynos7 have different address of
EINT_WAKEUP_MASK register.  Rename existing S5P_EINT_WAKEUP_MASK to
avoid confusion and add new ones.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Tomasz Figa <tomasz.figa@gmail.com>
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mach-exynos/suspend.c              | 2 +-
 include/linux/soc/samsung/exynos-regs-pmu.h | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index d3db306a5a70..f3384e3a675d 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -272,7 +272,7 @@ static int exynos5420_cpu_suspend(unsigned long arg)
 static void exynos_pm_set_wakeup_mask(void)
 {
 	/* Set wake-up mask registers */
-	pmu_raw_writel(exynos_get_eint_wake_mask(), S5P_EINT_WAKEUP_MASK);
+	pmu_raw_writel(exynos_get_eint_wake_mask(), EXYNOS_EINT_WAKEUP_MASK);
 	pmu_raw_writel(exynos_irqwake_intmask & ~(1 << 31), S5P_WAKEUP_MASK);
 }
 
diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index 66dcb9ec273a..eb0d240df7a7 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -42,7 +42,7 @@
 #define EXYNOS_SWRESET				0x0400
 
 #define S5P_WAKEUP_STAT				0x0600
-#define S5P_EINT_WAKEUP_MASK			0x0604
+#define EXYNOS_EINT_WAKEUP_MASK			0x0604
 #define S5P_WAKEUP_MASK				0x0608
 #define S5P_WAKEUP_MASK2				0x0614
 
@@ -180,6 +180,9 @@
 #define S5P_CORE_WAKEUP_FROM_LOCAL_CFG		(0x3 << 8)
 #define S5P_CORE_AUTOWAKEUP_EN			(1 << 31)
 
+/* Only for S5Pv210 */
+#define S5PV210_EINT_WAKEUP_MASK	0xC004
+
 /* Only for EXYNOS4210 */
 #define S5P_CMU_CLKSTOP_LCD1_LOWPWR	0x1154
 #define S5P_CMU_RESET_LCD1_LOWPWR	0x1174
@@ -641,6 +644,7 @@
 					 | EXYNOS5420_KFC_USE_STANDBY_WFI3)
 
 /* For EXYNOS5433 */
+#define EXYNOS5433_EINT_WAKEUP_MASK				(0x060C)
 #define EXYNOS5433_USBHOST30_PHY_CONTROL			(0x0728)
 #define EXYNOS5433_PAD_RETENTION_AUD_OPTION			(0x3028)
 #define EXYNOS5433_PAD_RETENTION_MMC2_OPTION			(0x30C8)
-- 
cgit v1.2.3


From a8be2af0218cf037704dc2e733bf56d6560fa324 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 23 Jul 2018 19:52:58 +0200
Subject: pinctrl: samsung: Write external wakeup interrupt mask

The pinctrl driver defines an IRQ chip which handles external wakeup
interrupts, therefore from logical point of view, it is the owner of
external interrupt mask.  The register controlling the mask belongs to
Power Management Unit address space so it has to be accessed with PMU
syscon regmap handle.

This mask should be written to hardware during system suspend.  Till now
ARMv7 machine code was responsible for this which created a dependency
between pin controller driver and arch/arm/mach code.

Try to rework this dependency so the pinctrl driver will write external
wakeup interrupt mask during late suspend.

Impact on ARMv7 designs (S5Pv210 and Exynos)
============================================
This duplicates setting mask with existing machine code
arch/arm/mach-exynos/suspend.c and arch/arm/mach-s5pv210/pm.c but it is
not a problem - the wakeup mask register will be written twice.  The
machine code will be cleaned up later.

The difference between implementation here and ARMv7 machine code
(arch/arm/mach-*) is the time of writing the mask:
1. The machine code is writing the mask quite late during system suspend
   path, after offlining secondary CPUs and just before doing actual
   suspend.
2. The implementation in pinctrl driver uses late suspend ops, therefore it
   will write the mask much earlier.  Hopefully late enough, after all
   drivers will enable or disable their interrupt wakeups
   (enable_irq_wake() etc).

Impact on ARMv8 designs (Exynos5433 and Exynos7)
================================================
The Suspend to RAM was not supported and external wakeup interrupt mask
was not written to HW.  This change brings us one step closer to
supporting Suspend to RAM.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Tomasz Figa <tomasz.figa@gmail.com>
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 drivers/pinctrl/samsung/pinctrl-exynos.c    | 50 ++++++++++++++++++++++++++++-
 drivers/pinctrl/samsung/pinctrl-samsung.h   |  3 ++
 include/linux/soc/samsung/exynos-regs-pmu.h |  2 ++
 3 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index 29d86d704b0c..40ef14956876 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -25,6 +25,7 @@
 #include <linux/regmap.h>
 #include <linux/err.h>
 #include <linux/soc/samsung/exynos-pmu.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
 
 #include <dt-bindings/pinctrl/samsung.h>
 
@@ -37,6 +38,8 @@ struct exynos_irq_chip {
 	u32 eint_con;
 	u32 eint_mask;
 	u32 eint_pend;
+	u32 eint_wake_mask_value;
+	u32 eint_wake_mask_reg;
 };
 
 static inline struct exynos_irq_chip *to_exynos_irq_chip(struct irq_chip *chip)
@@ -215,6 +218,7 @@ static struct exynos_irq_chip exynos_gpio_irq_chip = {
 	.eint_con = EXYNOS_GPIO_ECON_OFFSET,
 	.eint_mask = EXYNOS_GPIO_EMASK_OFFSET,
 	.eint_pend = EXYNOS_GPIO_EPEND_OFFSET,
+	/* eint_wake_mask_value not used */
 };
 
 static int exynos_eint_irq_map(struct irq_domain *h, unsigned int virq,
@@ -330,6 +334,8 @@ u32 exynos_get_eint_wake_mask(void)
 
 static int exynos_wkup_irq_set_wake(struct irq_data *irqd, unsigned int on)
 {
+	struct irq_chip *chip = irq_data_get_irq_chip(irqd);
+	struct exynos_irq_chip *our_chip = to_exynos_irq_chip(chip);
 	struct samsung_pin_bank *bank = irq_data_get_irq_chip_data(irqd);
 	unsigned long bit = 1UL << (2 * bank->eint_offset + irqd->hwirq);
 
@@ -339,6 +345,7 @@ static int exynos_wkup_irq_set_wake(struct irq_data *irqd, unsigned int on)
 		exynos_eint_wake_mask |= bit;
 	else
 		exynos_eint_wake_mask &= ~bit;
+	our_chip->eint_wake_mask_value = exynos_eint_wake_mask;
 
 	return 0;
 }
@@ -360,6 +367,9 @@ static const struct exynos_irq_chip s5pv210_wkup_irq_chip __initconst = {
 	.eint_con = EXYNOS_WKUP_ECON_OFFSET,
 	.eint_mask = EXYNOS_WKUP_EMASK_OFFSET,
 	.eint_pend = EXYNOS_WKUP_EPEND_OFFSET,
+	.eint_wake_mask_value = EXYNOS_EINT_WAKEUP_MASK_DISABLED,
+	/* Only difference with exynos4210_wkup_irq_chip: */
+	.eint_wake_mask_reg = S5PV210_EINT_WAKEUP_MASK,
 };
 
 static const struct exynos_irq_chip exynos4210_wkup_irq_chip __initconst = {
@@ -376,6 +386,8 @@ static const struct exynos_irq_chip exynos4210_wkup_irq_chip __initconst = {
 	.eint_con = EXYNOS_WKUP_ECON_OFFSET,
 	.eint_mask = EXYNOS_WKUP_EMASK_OFFSET,
 	.eint_pend = EXYNOS_WKUP_EPEND_OFFSET,
+	.eint_wake_mask_value = EXYNOS_EINT_WAKEUP_MASK_DISABLED,
+	.eint_wake_mask_reg = EXYNOS_EINT_WAKEUP_MASK,
 };
 
 static const struct exynos_irq_chip exynos7_wkup_irq_chip __initconst = {
@@ -392,6 +404,8 @@ static const struct exynos_irq_chip exynos7_wkup_irq_chip __initconst = {
 	.eint_con = EXYNOS7_WKUP_ECON_OFFSET,
 	.eint_mask = EXYNOS7_WKUP_EMASK_OFFSET,
 	.eint_pend = EXYNOS7_WKUP_EPEND_OFFSET,
+	.eint_wake_mask_value = EXYNOS_EINT_WAKEUP_MASK_DISABLED,
+	.eint_wake_mask_reg = EXYNOS5433_EINT_WAKEUP_MASK,
 };
 
 /* list of external wakeup controllers supported */
@@ -560,6 +574,27 @@ int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d)
 	return 0;
 }
 
+static void
+exynos_pinctrl_set_eint_wakeup_mask(struct samsung_pinctrl_drv_data *drvdata,
+				    struct exynos_irq_chip *irq_chip)
+{
+	struct regmap *pmu_regs;
+
+	if (!drvdata->retention_ctrl || !drvdata->retention_ctrl->priv) {
+		dev_warn(drvdata->dev,
+			 "No retention data configured bank with external wakeup interrupt. Wake-up mask will not be set.\n");
+		return;
+	}
+
+	pmu_regs = drvdata->retention_ctrl->priv;
+	dev_info(drvdata->dev,
+		 "Setting external wakeup interrupt wakeup mask: 0x%x\n",
+		 irq_chip->eint_wake_mask_value);
+
+	regmap_write(pmu_regs, irq_chip->eint_wake_mask_reg,
+		     irq_chip->eint_wake_mask_value);
+}
+
 static void exynos_pinctrl_suspend_bank(
 				struct samsung_pinctrl_drv_data *drvdata,
 				struct samsung_pin_bank *bank)
@@ -582,11 +617,24 @@ static void exynos_pinctrl_suspend_bank(
 void exynos_pinctrl_suspend(struct samsung_pinctrl_drv_data *drvdata)
 {
 	struct samsung_pin_bank *bank = drvdata->pin_banks;
+	struct exynos_irq_chip *irq_chip = NULL;
 	int i;
 
-	for (i = 0; i < drvdata->nr_banks; ++i, ++bank)
+	for (i = 0; i < drvdata->nr_banks; ++i, ++bank) {
 		if (bank->eint_type == EINT_TYPE_GPIO)
 			exynos_pinctrl_suspend_bank(drvdata, bank);
+		else if (bank->eint_type == EINT_TYPE_WKUP) {
+			if (!irq_chip) {
+				irq_chip = bank->irq_chip;
+				exynos_pinctrl_set_eint_wakeup_mask(drvdata,
+								    irq_chip);
+			} else if (bank->irq_chip != irq_chip) {
+				dev_warn(drvdata->dev,
+					 "More than one external wakeup interrupt chip configured (bank: %s). This is not supported by hardware nor by driver.\n",
+					 bank->name);
+			}
+		}
+	}
 }
 
 static void exynos_pinctrl_resume_bank(
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h
index aac16cc8362a..e571bbd7139b 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.h
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.h
@@ -227,6 +227,9 @@ struct samsung_retention_data {
  *	device suspend, see samsung_pinctrl_suspend()
  * @resume: platform specific resume callback, executed during pin controller
  *	device suspend, see samsung_pinctrl_resume()
+ *
+ * External wakeup interrupts must define at least eint_wkup_init,
+ * retention_data and suspend in order for proper suspend/resume to work.
  */
 struct samsung_pin_ctrl {
 	const struct samsung_pin_bank_data *pin_banks;
diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index eb0d240df7a7..5addaf5ccbce 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -42,6 +42,8 @@
 #define EXYNOS_SWRESET				0x0400
 
 #define S5P_WAKEUP_STAT				0x0600
+/* Value for EXYNOS_EINT_WAKEUP_MASK disabling all external wakeup interrupts */
+#define EXYNOS_EINT_WAKEUP_MASK_DISABLED	0xffffffff
 #define EXYNOS_EINT_WAKEUP_MASK			0x0604
 #define S5P_WAKEUP_MASK				0x0608
 #define S5P_WAKEUP_MASK2				0x0614
-- 
cgit v1.2.3


From 0fc09f920983f61be625658c62cc40ac25a7b3a5 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 23 Jul 2018 08:37:50 -0600
Subject: blk-mq: export setting request completion state

This is preparing for drivers that want to directly alter the state of
their requests. No functional change here.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         |  4 +---
 include/linux/blk-mq.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d394cdd8d8c6..5291a95ba362 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -558,10 +558,8 @@ static void __blk_mq_complete_request(struct request *rq)
 	bool shared = false;
 	int cpu;
 
-	if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
-			MQ_RQ_IN_FLIGHT)
+	if (!blk_mq_mark_complete(rq))
 		return;
-
 	if (rq->internal_tag != -1)
 		blk_mq_sched_completed_request(rq);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3147eb74222..ca3f2c2edd85 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -287,6 +287,20 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
 
+/**
+ * blk_mq_mark_complete() - Set request state to complete
+ * @rq: request to set to complete state
+ *
+ * Returns true if request state was successfully set to complete. If
+ * successful, the caller is responsibile for seeing this request is ended, as
+ * blk_mq_complete_request will not work again.
+ */
+static inline bool blk_mq_mark_complete(struct request *rq)
+{
+	return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) ==
+			MQ_RQ_IN_FLIGHT;
+}
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.
-- 
cgit v1.2.3


From 071f52fbce6161706d070ceada5accb81630bf02 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 24 Jul 2018 09:52:32 +0200
Subject: block: remove bio_clone_kmalloc

Unused now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index ab221c517f4e..b861baa59454 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -443,12 +443,6 @@ static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
 }
 
-static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
-{
-	return bio_clone_bioset(bio, gfp_mask, NULL);
-
-}
-
 extern blk_qc_t submit_bio(struct bio *);
 
 extern void bio_endio(struct bio *);
-- 
cgit v1.2.3


From c55183c9aaa00d2bbb578169a480e31aff3d397c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 24 Jul 2018 09:52:34 +0200
Subject: block: unexport bio_clone_bioset

Now only used by the bounce code, so move it there and mark the function
static.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 77 -----------------------------------------------------
 block/bounce.c      | 69 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/bio.h |  1 -
 3 files changed, 68 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 07d3ffd95989..b832151cd0bf 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -646,83 +646,6 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 }
 EXPORT_SYMBOL(bio_clone_fast);
 
-/**
- * 	bio_clone_bioset - clone a bio
- * 	@bio_src: bio to clone
- *	@gfp_mask: allocation priority
- *	@bs: bio_set to allocate from
- *
- *	Clone bio. Caller will own the returned bio, but not the actual data it
- *	points to. Reference count of returned bio will be one.
- */
-struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
-			     struct bio_set *bs)
-{
-	struct bvec_iter iter;
-	struct bio_vec bv;
-	struct bio *bio;
-
-	/*
-	 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
-	 * bio_src->bi_io_vec to bio->bi_io_vec.
-	 *
-	 * We can't do that anymore, because:
-	 *
-	 *  - The point of cloning the biovec is to produce a bio with a biovec
-	 *    the caller can modify: bi_idx and bi_bvec_done should be 0.
-	 *
-	 *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
-	 *    we tried to clone the whole thing bio_alloc_bioset() would fail.
-	 *    But the clone should succeed as long as the number of biovecs we
-	 *    actually need to allocate is fewer than BIO_MAX_PAGES.
-	 *
-	 *  - Lastly, bi_vcnt should not be looked at or relied upon by code
-	 *    that does not own the bio - reason being drivers don't use it for
-	 *    iterating over the biovec anymore, so expecting it to be kept up
-	 *    to date (i.e. for clones that share the parent biovec) is just
-	 *    asking for trouble and would force extra work on
-	 *    __bio_clone_fast() anyways.
-	 */
-
-	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
-	if (!bio)
-		return NULL;
-	bio->bi_disk		= bio_src->bi_disk;
-	bio->bi_opf		= bio_src->bi_opf;
-	bio->bi_write_hint	= bio_src->bi_write_hint;
-	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
-	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
-
-	switch (bio_op(bio)) {
-	case REQ_OP_DISCARD:
-	case REQ_OP_SECURE_ERASE:
-	case REQ_OP_WRITE_ZEROES:
-		break;
-	case REQ_OP_WRITE_SAME:
-		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
-		break;
-	default:
-		bio_for_each_segment(bv, bio_src, iter)
-			bio->bi_io_vec[bio->bi_vcnt++] = bv;
-		break;
-	}
-
-	if (bio_integrity(bio_src)) {
-		int ret;
-
-		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
-		if (ret < 0) {
-			bio_put(bio);
-			return NULL;
-		}
-	}
-
-	bio_clone_blkcg_association(bio, bio_src);
-
-	return bio;
-}
-EXPORT_SYMBOL(bio_clone_bioset);
-
 /**
  *	bio_add_pc_page	-	attempt to add page to bio
  *	@q: the target queue
diff --git a/block/bounce.c b/block/bounce.c
index fd31347b7836..bc63b3a2d18c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -195,6 +195,73 @@ static void bounce_end_io_read_isa(struct bio *bio)
 	__bounce_end_io_read(bio, &isa_page_pool);
 }
 
+static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
+		struct bio_set *bs)
+{
+	struct bvec_iter iter;
+	struct bio_vec bv;
+	struct bio *bio;
+
+	/*
+	 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
+	 * bio_src->bi_io_vec to bio->bi_io_vec.
+	 *
+	 * We can't do that anymore, because:
+	 *
+	 *  - The point of cloning the biovec is to produce a bio with a biovec
+	 *    the caller can modify: bi_idx and bi_bvec_done should be 0.
+	 *
+	 *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
+	 *    we tried to clone the whole thing bio_alloc_bioset() would fail.
+	 *    But the clone should succeed as long as the number of biovecs we
+	 *    actually need to allocate is fewer than BIO_MAX_PAGES.
+	 *
+	 *  - Lastly, bi_vcnt should not be looked at or relied upon by code
+	 *    that does not own the bio - reason being drivers don't use it for
+	 *    iterating over the biovec anymore, so expecting it to be kept up
+	 *    to date (i.e. for clones that share the parent biovec) is just
+	 *    asking for trouble and would force extra work on
+	 *    __bio_clone_fast() anyways.
+	 */
+
+	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
+	if (!bio)
+		return NULL;
+	bio->bi_disk		= bio_src->bi_disk;
+	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
+	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
+	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
+
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_ZEROES:
+		break;
+	case REQ_OP_WRITE_SAME:
+		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
+		break;
+	default:
+		bio_for_each_segment(bv, bio_src, iter)
+			bio->bi_io_vec[bio->bi_vcnt++] = bv;
+		break;
+	}
+
+	if (bio_integrity(bio_src)) {
+		int ret;
+
+		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
+		if (ret < 0) {
+			bio_put(bio);
+			return NULL;
+		}
+	}
+
+	bio_clone_blkcg_association(bio, bio_src);
+
+	return bio;
+}
+
 static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 			       mempool_t *pool)
 {
@@ -222,7 +289,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 		generic_make_request(*bio_orig);
 		*bio_orig = bio;
 	}
-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+	bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
 			&bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b861baa59454..51371740d2a8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -429,7 +429,6 @@ extern void bio_put(struct bio *);
 
 extern void __bio_clone_fast(struct bio *, struct bio *);
 extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
-extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
 
 extern struct bio_set fs_bio_set;
 
-- 
cgit v1.2.3


From 62cedf3e60af03e47849fe2bd6a03ec179422a8a Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Fri, 20 Jul 2018 10:39:13 +0200
Subject: locking/rtmutex: Allow specifying a subclass for nested locking

Needed for annotating rt_mutex locks.

Tested-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Deepa Dinamani <deepadinamani@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Chang <dpf@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
Link: http://lkml.kernel.org/r/20180720083914.1950-2-peda@axentia.se
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rtmutex.h  |  7 +++++++
 kernel/locking/rtmutex.c | 29 +++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 1b92a28dd672..6fd615a0eea9 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -106,7 +106,14 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock)
 extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void rt_mutex_destroy(struct rt_mutex *lock);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
+#define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
+#else
 extern void rt_mutex_lock(struct rt_mutex *lock);
+#define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
+#endif
+
 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
 extern int rt_mutex_timed_lock(struct rt_mutex *lock,
 			       struct hrtimer_sleeper *timeout);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4f014be7a4b8..2823d4163a37 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1465,6 +1465,29 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
 		rt_mutex_postunlock(&wake_q);
 }
 
+static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass)
+{
+	might_sleep();
+
+	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/**
+ * rt_mutex_lock_nested - lock a rt_mutex
+ *
+ * @lock: the rt_mutex to be locked
+ * @subclass: the lockdep subclass
+ */
+void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
+{
+	__rt_mutex_lock(lock, subclass);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
+#endif
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /**
  * rt_mutex_lock - lock a rt_mutex
  *
@@ -1472,12 +1495,10 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
  */
 void __sched rt_mutex_lock(struct rt_mutex *lock)
 {
-	might_sleep();
-
-	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
+	__rt_mutex_lock(lock, 0);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock);
+#endif
 
 /**
  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
-- 
cgit v1.2.3


From 6e30396767508101eacec8b93b068e8905e660dc Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Wed, 20 Jun 2018 22:32:42 +0530
Subject: sched/numa: Remove redundant field

'numa_entry' is a struct list_head defined in task_struct, but never used.

No functional change.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1529514181-9842-2-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..e0f4f56c9310 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1017,7 +1017,6 @@ struct task_struct {
 	u64				last_sum_exec_runtime;
 	struct callback_head		numa_work;
 
-	struct list_head		numa_entry;
 	struct numa_group		*numa_group;
 
 	/*
-- 
cgit v1.2.3


From 6cbc304f2f360f25cc8607817239d6f4a2fd3dc5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 10 May 2018 15:48:41 +0200
Subject: perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)

Vince reported the perf_fuzzer giving various unwinder warnings and
Josh reported:

> Deja vu.  Most of these are related to perf PEBS, similar to the
> following issue:
>
>   b8000586c90b ("perf/x86/intel: Cure bogus unwind from PEBS entries")
>
> This is basically the ORC version of that.  setup_pebs_sample_data() is
> assembling a franken-pt_regs which ORC isn't happy about.  RIP is
> inconsistent with some of the other registers (like RSP and RBP).

And where the previous unwinder only needed BP,SP ORC also requires
IP. But we cannot spoof IP because then the sample will get displaced,
entirely negating the point of PEBS.

So cure the whole thing differently by doing the unwind early; this
does however require a means to communicate we did the unwind early.
We (ab)use an unused sample_type bit for this, which we set on events
that fill out the data->callchain before the normal
perf_prepare_sample().

Debugged-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c    |  3 +++
 arch/x86/events/intel/ds.c      | 25 +++++++++++--------------
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/perf_event.h |  2 ++
 kernel/events/core.c            |  6 ++++--
 5 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 707b2a96e516..86f0c15dcc2d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2997,6 +2997,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		}
 		if (x86_pmu.pebs_aliases)
 			x86_pmu.pebs_aliases(event);
+
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
 	}
 
 	if (needs_branch_stack(event)) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8cf03f101938..8dbba77e0518 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1185,17 +1185,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		data->data_src.val = val;
 	}
 
+	/*
+	 * We must however always use iregs for the unwinder to stay sane; the
+	 * record BP,SP,IP can point into thin air when the record is from a
+	 * previous PMI context or an (I)RET happend between the record and
+	 * PMI.
+	 */
+	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+		data->callchain = perf_callchain(event, iregs);
+
 	/*
 	 * We use the interrupt regs as a base because the PEBS record does not
 	 * contain a full regs set, specifically it seems to lack segment
 	 * descriptors, which get used by things like user_mode().
 	 *
 	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
-	 *
-	 * We must however always use BP,SP from iregs for the unwinder to stay
-	 * sane; the record BP,SP can point into thin air when the record is
-	 * from a previous PMI context or an (I)RET happend between the record
-	 * and PMI.
 	 */
 	*regs = *iregs;
 
@@ -1214,15 +1218,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		regs->si = pebs->si;
 		regs->di = pebs->di;
 
-		/*
-		 * Per the above; only set BP,SP if we don't need callchains.
-		 *
-		 * XXX: does this make sense?
-		 */
-		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
-			regs->bp = pebs->bp;
-			regs->sp = pebs->sp;
-		}
+		regs->bp = pebs->bp;
+		regs->sp = pebs->sp;
 
 #ifndef CONFIG_X86_32
 		regs->r8 = pebs->r8;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1fa12887ec02..87f6db437e4a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1130,6 +1130,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
 extern struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		   u32 max_stack, bool crosstask, bool add_mark);
+extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
 extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b8e288a1f740..eeb787b1c53c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -143,6 +143,8 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
 	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
+
+	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63,
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f0434a9951a..cdb32cf8e33c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6343,7 +6343,7 @@ static u64 perf_virt_to_phys(u64 virt)
 
 static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
 
-static struct perf_callchain_entry *
+struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
 	bool kernel = !event->attr.exclude_callchain_kernel;
@@ -6382,7 +6382,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
 
-		data->callchain = perf_callchain(event, regs);
+		if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+			data->callchain = perf_callchain(event, regs);
+
 		size += data->callchain->nr;
 
 		header->size += size * sizeof(u64);
-- 
cgit v1.2.3


From fd2efaa4eb5317c3a86357a83a7d456a1b86a0ac Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 16 Jul 2018 12:30:11 +0100
Subject: locking/atomics: Rework ordering barriers

Currently architectures can override __atomic_op_*() to define the barriers
used before/after a relaxed atomic when used to build acquire/release/fence
variants.

This has the unfortunate property of requiring the architecture to define the
full wrapper for the atomics, rather than just the barriers they care about,
and gets in the way of generating atomics which can be easily read.

Instead, this patch has architectures define an optional set of barriers:

* __atomic_acquire_fence()
* __atomic_release_fence()
* __atomic_pre_full_fence()
* __atomic_post_full_fence()

... which <linux/atomic.h> uses to build the wrappers.

It would be nice if we could undef these, along with the __atomic_op_*()
wrappers, but that would break the cmpxchg() wrappers, which are written
in preprocessor. Undefs would have been nice, but alas.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andy.shevchenko@gmail.com
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: catalin.marinas@arm.com
Cc: dvyukov@google.com
Cc: glider@google.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: peter@hurleysoftware.com
Link: http://lkml.kernel.org/r/20180716113017.3909-7-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h   |  8 ++++----
 arch/powerpc/include/asm/atomic.h | 17 +++++------------
 arch/riscv/include/asm/atomic.h   | 17 +++++------------
 include/linux/atomic.h            | 38 ++++++++++++++++++++++----------------
 4 files changed, 36 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 4a6a8f58c9c9..150a1c5d6a2c 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -18,11 +18,11 @@
  * To ensure dependency ordering is preserved for the _relaxed and
  * _release atomics, an smp_read_barrier_depends() is unconditionally
  * inserted into the _relaxed variants, which are used to build the
- * barriered versions. To avoid redundant back-to-back fences, we can
- * define the _acquire and _fence versions explicitly.
+ * barriered versions. Avoid redundant back-to-back fences in the
+ * _acquire and _fence versions.
  */
-#define __atomic_op_acquire(op, args...)	op##_relaxed(args)
-#define __atomic_op_fence			__atomic_op_release
+#define __atomic_acquire_fence()
+#define __atomic_post_full_fence()
 
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index a0156cb43d1f..963abf8bf1c0 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -18,18 +18,11 @@
  * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
  * on the platform without lwsync.
  */
-#define __atomic_op_acquire(op, args...)				\
-({									\
-	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
-	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory");	\
-	__ret;								\
-})
-
-#define __atomic_op_release(op, args...)				\
-({									\
-	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory");	\
-	op##_relaxed(args);						\
-})
+#define __atomic_acquire_fence()					\
+	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
+
+#define __atomic_release_fence()					\
+	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
 
 static __inline__ int atomic_read(const atomic_t *v)
 {
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 512b89485790..c452359c9cb8 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -25,18 +25,11 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define __atomic_op_acquire(op, args...)				\
-({									\
-	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
-	__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory");	\
-	__ret;								\
-})
-
-#define __atomic_op_release(op, args...)				\
-({									\
-	__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");	\
-	op##_relaxed(args);						\
-})
+#define __atomic_acquire_fence()					\
+	__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
+
+#define __atomic_release_fence()					\
+	__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");
 
 static __always_inline int atomic_read(const atomic_t *v)
 {
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8e04f1f69bd9..1e8e88bdaf09 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -38,40 +38,46 @@
  * barriers on top of the relaxed variant. In the case where the relaxed
  * variant is already fully ordered, no additional barriers are needed.
  *
- * Besides, if an arch has a special barrier for acquire/release, it could
- * implement its own __atomic_op_* and use the same framework for building
- * variants
- *
- * If an architecture overrides __atomic_op_acquire() it will probably want
- * to define smp_mb__after_spinlock().
+ * If an architecture overrides __atomic_acquire_fence() it will probably
+ * want to define smp_mb__after_spinlock().
  */
-#ifndef __atomic_op_acquire
+#ifndef __atomic_acquire_fence
+#define __atomic_acquire_fence		smp_mb__after_atomic
+#endif
+
+#ifndef __atomic_release_fence
+#define __atomic_release_fence		smp_mb__before_atomic
+#endif
+
+#ifndef __atomic_pre_full_fence
+#define __atomic_pre_full_fence		smp_mb__before_atomic
+#endif
+
+#ifndef __atomic_post_full_fence
+#define __atomic_post_full_fence	smp_mb__after_atomic
+#endif
+
 #define __atomic_op_acquire(op, args...)				\
 ({									\
 	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
-	smp_mb__after_atomic();						\
+	__atomic_acquire_fence();					\
 	__ret;								\
 })
-#endif
 
-#ifndef __atomic_op_release
 #define __atomic_op_release(op, args...)				\
 ({									\
-	smp_mb__before_atomic();					\
+	__atomic_release_fence();					\
 	op##_relaxed(args);						\
 })
-#endif
 
-#ifndef __atomic_op_fence
 #define __atomic_op_fence(op, args...)					\
 ({									\
 	typeof(op##_relaxed(args)) __ret;				\
-	smp_mb__before_atomic();					\
+	__atomic_pre_full_fence();					\
 	__ret = op##_relaxed(args);					\
-	smp_mb__after_atomic();						\
+	__atomic_post_full_fence();					\
 	__ret;								\
 })
-#endif
 
 /* atomic_add_return_relaxed */
 #ifndef atomic_add_return_relaxed
-- 
cgit v1.2.3


From d27fb99f62af7b79c542d161aa5155ed57271ddc Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 23 Jul 2018 22:42:48 +0100
Subject: dma-mapping: relax warning for per-device areas

The reasons why dma_free_attrs() should not be called from IRQ context
are not necessarily obvious and somewhat buried in the development
history, so let's start by documenting the warning itself to help anyone
who does happen to hit it and wonder what the deal is.

However, this check turns out to be slightly over-restrictive for the
way that per-device memory has been spliced into the general API, since
for that case we know that dma_declare_coherent_memory() has created an
appropriate CPU mapping for the entire area and nothing dynamic should
be happening. Given that the usage model for per-device memory is often
more akin to streaming DMA than 'real' coherent DMA (e.g. allocating and
freeing space to copy short-lived packets in and out), it is also
somewhat more reasonable for those operations to happen in IRQ handlers
for such devices.

Therefore, let's move the irqs_disabled() check down past the per-device
area hook, so that that gets a chance to resolve the request before we
reach definite "you're doing it wrong" territory.

Reported-by: Fredrik Noring <noring@nocrew.org>
Tested-by: Fredrik Noring <noring@nocrew.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-mapping.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f9cc309507d9..1db6a6b46d0d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -538,10 +538,17 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!ops);
-	WARN_ON(irqs_disabled());
 
 	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
 		return;
+	/*
+	 * On non-coherent platforms which implement DMA-coherent buffers via
+	 * non-cacheable remaps, ops->free() may call vunmap(). Thus getting
+	 * this far in IRQ context is a) at risk of a BUG_ON() or trying to
+	 * sleep on some machines, and b) an indication that the driver is
+	 * probably misusing the coherent API anyway.
+	 */
+	WARN_ON(irqs_disabled());
 
 	if (!ops->free || !cpu_addr)
 		return;
-- 
cgit v1.2.3


From 73c8d8945505acdcbae137c2e00a1232e0be709f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 14 Jul 2018 01:28:15 +0900
Subject: ring_buffer: tracing: Inherit the tracing setting to next ring buffer

Maintain the tracing on/off setting of the ring_buffer when switching
to the trace buffer snapshot.

Taking a snapshot is done by swapping the backup ring buffer
(max_tr_buffer). But since the tracing on/off setting is defined
by the ring buffer, when swapping it, the tracing on/off setting
can also be changed. This causes a strange result like below:

  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 0 > tracing_on
  /sys/kernel/debug/tracing # cat tracing_on
  0
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  0

We don't touch tracing_on, but snapshot changes tracing_on
setting each time. This is an anomaly, because user doesn't know
that each "ring_buffer" stores its own tracing-enable state and
the snapshot is done by swapping ring buffers.

Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Hiraku Toyooka <hiraku.toyooka@cybertrust.co.jp>
Cc: stable@vger.kernel.org
Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
[ Updated commit log and comment in the code ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 16 ++++++++++++++++
 kernel/trace/trace.c        |  6 ++++++
 3 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b72ebdff0b77..003d09ab308d 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -165,6 +165,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
 int ring_buffer_record_is_on(struct ring_buffer *buffer);
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6a46af21765c..0b0b688ea166 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3226,6 +3226,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer)
 	return !atomic_read(&buffer->record_disabled);
 }
 
+/**
+ * ring_buffer_record_is_set_on - return true if the ring buffer is set writable
+ * @buffer: The ring buffer to see if write is set enabled
+ *
+ * Returns true if the ring buffer is set writable by ring_buffer_record_on().
+ * Note that this does NOT mean it is in a writable state.
+ *
+ * It may return true when the ring buffer has been disabled by
+ * ring_buffer_record_disable(), as that is a temporary disabling of
+ * the ring buffer.
+ */
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer)
+{
+	return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
+}
+
 /**
  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
  * @buffer: The ring buffer to stop writes to.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87cf25171fb8..823687997b01 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1373,6 +1373,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	arch_spin_lock(&tr->max_lock);
 
+	/* Inherit the recordable setting from trace_buffer */
+	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
+		ring_buffer_record_on(tr->max_buffer.buffer);
+	else
+		ring_buffer_record_off(tr->max_buffer.buffer);
+
 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
 
 	__update_max_tr(tr, tsk, cpu);
-- 
cgit v1.2.3


From 3c507b8af638c67d4a80d70091d2057ecb01e8a6 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 23 Jul 2018 21:40:07 +0200
Subject: net: phy: add helper phy_polling_mode

Add a helper for checking whether polling is used to detect PHY status
changes.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c |  8 ++++----
 include/linux/phy.h   | 10 ++++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 04780db4c963..1ee25877c4d1 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -525,7 +525,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
 	 * negotiation may already be done and aneg interrupt may not be
 	 * generated.
 	 */
-	if (phydev->irq != PHY_POLL && phydev->state == PHY_AN) {
+	if (!phy_polling_mode(phydev) && phydev->state == PHY_AN) {
 		err = phy_aneg_done(phydev);
 		if (err > 0) {
 			trigger = true;
@@ -983,7 +983,7 @@ void phy_state_machine(struct work_struct *work)
 			needs_aneg = true;
 		break;
 	case PHY_NOLINK:
-		if (phydev->irq != PHY_POLL)
+		if (!phy_polling_mode(phydev))
 			break;
 
 		err = phy_read_status(phydev);
@@ -1024,7 +1024,7 @@ void phy_state_machine(struct work_struct *work)
 		/* Only register a CHANGE if we are polling and link changed
 		 * since latest checking.
 		 */
-		if (phydev->irq == PHY_POLL) {
+		if (phy_polling_mode(phydev)) {
 			old_link = phydev->link;
 			err = phy_read_status(phydev);
 			if (err)
@@ -1123,7 +1123,7 @@ void phy_state_machine(struct work_struct *work)
 	 * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
 	 * between states from phy_mac_interrupt()
 	 */
-	if (phydev->irq == PHY_POLL)
+	if (phy_polling_mode(phydev))
 		queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
 				   PHY_STATE_TIME * HZ);
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 075c2f770d3e..cd6f637cbbfb 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -824,6 +824,16 @@ static inline bool phy_interrupt_is_valid(struct phy_device *phydev)
 	return phydev->irq != PHY_POLL && phydev->irq != PHY_IGNORE_INTERRUPT;
 }
 
+/**
+ * phy_polling_mode - Convenience function for testing whether polling is
+ * used to detect PHY status changes
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_polling_mode(struct phy_device *phydev)
+{
+	return phydev->irq == PHY_POLL;
+}
+
 /**
  * phy_is_internal - Convenience function for testing if a PHY is internal
  * @phydev: the phy_device struct
-- 
cgit v1.2.3


From 6f4ceee9305dc3fe74099159b460f4b56b506f1d Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 24 Jul 2018 14:26:04 -0400
Subject: cpu/hotplug: Add a cpus_read_trylock() function

There are use cases where it can be useful to have a cpus_read_trylock()
function to work around circular lock dependency problem involving
the cpu_hotplug_lock.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpu.h | 2 ++
 kernel/cpu.c        | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..e850bfea3e84 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -103,6 +103,7 @@ extern void cpus_write_lock(void);
 extern void cpus_write_unlock(void);
 extern void cpus_read_lock(void);
 extern void cpus_read_unlock(void);
+extern int  cpus_read_trylock(void);
 extern void lockdep_assert_cpus_held(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
@@ -115,6 +116,7 @@ static inline void cpus_write_lock(void) { }
 static inline void cpus_write_unlock(void) { }
 static inline void cpus_read_lock(void) { }
 static inline void cpus_read_unlock(void) { }
+static inline int  cpus_read_trylock(void) { return true; }
 static inline void lockdep_assert_cpus_held(void) { }
 static inline void cpu_hotplug_disable(void) { }
 static inline void cpu_hotplug_enable(void) { }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0db8938fbb23..307486baa477 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -290,6 +290,12 @@ void cpus_read_lock(void)
 }
 EXPORT_SYMBOL_GPL(cpus_read_lock);
 
+int cpus_read_trylock(void)
+{
+	return percpu_down_read_trylock(&cpu_hotplug_lock);
+}
+EXPORT_SYMBOL_GPL(cpus_read_trylock);
+
 void cpus_read_unlock(void)
 {
 	percpu_up_read(&cpu_hotplug_lock);
-- 
cgit v1.2.3


From 1560d0848a1a84db6c1d9b17c14273c0dae41828 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 14:34:29 +0200
Subject: rtc: remove rtc_irq_register/rtc_irq_unregister

The rtc_irq_* interface is not used from outside the RTC subsytem since
2016.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c | 33 ---------------------------------
 include/linux/rtc.h     |  4 ----
 2 files changed, 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 6d4012dd6922..2b1b9a0b9f8a 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -719,39 +719,6 @@ void rtc_class_close(struct rtc_device *rtc)
 }
 EXPORT_SYMBOL_GPL(rtc_class_close);
 
-int rtc_irq_register(struct rtc_device *rtc, struct rtc_task *task)
-{
-	int retval = -EBUSY;
-
-	if (task == NULL || task->func == NULL)
-		return -EINVAL;
-
-	/* Cannot register while the char dev is in use */
-	if (test_and_set_bit_lock(RTC_DEV_BUSY, &rtc->flags))
-		return -EBUSY;
-
-	spin_lock_irq(&rtc->irq_task_lock);
-	if (rtc->irq_task == NULL) {
-		rtc->irq_task = task;
-		retval = 0;
-	}
-	spin_unlock_irq(&rtc->irq_task_lock);
-
-	clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(rtc_irq_register);
-
-void rtc_irq_unregister(struct rtc_device *rtc, struct rtc_task *task)
-{
-	spin_lock_irq(&rtc->irq_task_lock);
-	if (rtc->irq_task == task)
-		rtc->irq_task = NULL;
-	spin_unlock_irq(&rtc->irq_task_lock);
-}
-EXPORT_SYMBOL_GPL(rtc_irq_unregister);
-
 static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
 {
 	/*
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 6268208760e9..f868d6b619ab 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -204,10 +204,6 @@ extern void rtc_update_irq(struct rtc_device *rtc,
 extern struct rtc_device *rtc_class_open(const char *name);
 extern void rtc_class_close(struct rtc_device *rtc);
 
-extern int rtc_irq_register(struct rtc_device *rtc,
-				struct rtc_task *task);
-extern void rtc_irq_unregister(struct rtc_device *rtc,
-				struct rtc_task *task);
 extern int rtc_irq_set_state(struct rtc_device *rtc,
 				struct rtc_task *task, int enabled);
 extern int rtc_irq_set_freq(struct rtc_device *rtc,
-- 
cgit v1.2.3


From acecb3ad8b21a519ce4ad728106d45d4e978bb56 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 14:58:10 +0200
Subject: rtc: remove irq_task and irq_task_lock

There is no way to set a periodic task anymore, remove task pointer and
lock.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/class.c     |  1 -
 drivers/rtc/interface.c | 50 ++++++++++---------------------------------------
 include/linux/rtc.h     |  2 --
 3 files changed, 10 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 7fa32c922617..0fca4d74c76b 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -172,7 +172,6 @@ static struct rtc_device *rtc_allocate_device(void)
 
 	mutex_init(&rtc->ops_lock);
 	spin_lock_init(&rtc->irq_lock);
-	spin_lock_init(&rtc->irq_task_lock);
 	init_waitqueue_head(&rtc->irq_queue);
 
 	/* Init timerqueue */
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 2b1b9a0b9f8a..ae0d67610c7b 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -605,12 +605,6 @@ void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode)
 	rtc->irq_data = (rtc->irq_data + (num << 8)) | (RTC_IRQF|mode);
 	spin_unlock_irqrestore(&rtc->irq_lock, flags);
 
-	/* call the task func */
-	spin_lock_irqsave(&rtc->irq_task_lock, flags);
-	if (rtc->irq_task)
-		rtc->irq_task->func(rtc->irq_task->private_data);
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-
 	wake_up_interruptible(&rtc->irq_queue);
 	kill_fasync(&rtc->async_queue, SIGIO, POLL_IN);
 }
@@ -750,28 +744,16 @@ static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
  * Context: any
  *
  * Note that rtc_irq_set_freq() should previously have been used to
- * specify the desired frequency of periodic IRQ task->func() callbacks.
+ * specify the desired frequency of periodic IRQ.
  */
 int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled)
 {
 	int err = 0;
-	unsigned long flags;
 
-retry:
-	spin_lock_irqsave(&rtc->irq_task_lock, flags);
-	if (rtc->irq_task != NULL && task == NULL)
-		err = -EBUSY;
-	else if (rtc->irq_task != task)
-		err = -EACCES;
-	else {
-		if (rtc_update_hrtimer(rtc, enabled) < 0) {
-			spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-			cpu_relax();
-			goto retry;
-		}
-		rtc->pie_enabled = enabled;
-	}
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+	while (rtc_update_hrtimer(rtc, enabled) < 0)
+		cpu_relax();
+
+	rtc->pie_enabled = enabled;
 
 	trace_rtc_irq_set_state(enabled, err);
 	return err;
@@ -782,7 +764,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_state);
  * rtc_irq_set_freq - set 2^N Hz periodic IRQ frequency for IRQ
  * @rtc: the rtc device
  * @task: currently registered with rtc_irq_register()
- * @freq: positive frequency with which task->func() will be called
+ * @freq: positive frequency
  * Context: any
  *
  * Note that rtc_irq_set_state() is used to enable or disable the
@@ -791,25 +773,13 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_state);
 int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
 {
 	int err = 0;
-	unsigned long flags;
 
 	if (freq <= 0 || freq > RTC_MAX_FREQ)
 		return -EINVAL;
-retry:
-	spin_lock_irqsave(&rtc->irq_task_lock, flags);
-	if (rtc->irq_task != NULL && task == NULL)
-		err = -EBUSY;
-	else if (rtc->irq_task != task)
-		err = -EACCES;
-	else {
-		rtc->irq_freq = freq;
-		if (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0) {
-			spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-			cpu_relax();
-			goto retry;
-		}
-	}
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	rtc->irq_freq = freq;
+	while (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0)
+		cpu_relax();
 
 	trace_rtc_irq_set_freq(freq, err);
 	return err;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index f868d6b619ab..8cc23fdfd4ee 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -121,8 +121,6 @@ struct rtc_device {
 	wait_queue_head_t irq_queue;
 	struct fasync_struct *async_queue;
 
-	struct rtc_task *irq_task;
-	spinlock_t irq_task_lock;
 	int irq_freq;
 	int max_user_freq;
 
-- 
cgit v1.2.3


From 8719d3c9188b38db462a77ecd8c7a8e25e7e8c4c Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 15:07:09 +0200
Subject: rtc: simplify rtc_irq_set_state/rtc_irq_set_freq

The PIE doesn't handle tasks anymore, remove the pointer from the
interface.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c | 4 ++--
 drivers/rtc/rtc-dev.c   | 8 ++++----
 include/linux/rtc.h     | 6 ++----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index ae0d67610c7b..76eb3a2957cc 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -746,7 +746,7 @@ static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
  * Note that rtc_irq_set_freq() should previously have been used to
  * specify the desired frequency of periodic IRQ.
  */
-int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled)
+int rtc_irq_set_state(struct rtc_device *rtc, int enabled)
 {
 	int err = 0;
 
@@ -770,7 +770,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_state);
  * Note that rtc_irq_set_state() is used to enable or disable the
  * periodic IRQs.
  */
-int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
+int rtc_irq_set_freq(struct rtc_device *rtc, int freq)
 {
 	int err = 0;
 
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index efa221e8bc22..43d962a9c210 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -341,11 +341,11 @@ static long rtc_dev_ioctl(struct file *file,
 		return rtc_set_time(rtc, &tm);
 
 	case RTC_PIE_ON:
-		err = rtc_irq_set_state(rtc, NULL, 1);
+		err = rtc_irq_set_state(rtc, 1);
 		break;
 
 	case RTC_PIE_OFF:
-		err = rtc_irq_set_state(rtc, NULL, 0);
+		err = rtc_irq_set_state(rtc, 0);
 		break;
 
 	case RTC_AIE_ON:
@@ -365,7 +365,7 @@ static long rtc_dev_ioctl(struct file *file,
 		return rtc_update_irq_enable(rtc, 0);
 
 	case RTC_IRQP_SET:
-		err = rtc_irq_set_freq(rtc, NULL, arg);
+		err = rtc_irq_set_freq(rtc, arg);
 		break;
 
 	case RTC_IRQP_READ:
@@ -427,7 +427,7 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
 	/* Keep ioctl until all drivers are converted */
 	rtc_dev_ioctl(file, RTC_UIE_OFF, 0);
 	rtc_update_irq_enable(rtc, 0);
-	rtc_irq_set_state(rtc, NULL, 0);
+	rtc_irq_set_state(rtc, 0);
 
 	clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags);
 	return 0;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 8cc23fdfd4ee..bf4d375025d1 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -202,10 +202,8 @@ extern void rtc_update_irq(struct rtc_device *rtc,
 extern struct rtc_device *rtc_class_open(const char *name);
 extern void rtc_class_close(struct rtc_device *rtc);
 
-extern int rtc_irq_set_state(struct rtc_device *rtc,
-				struct rtc_task *task, int enabled);
-extern int rtc_irq_set_freq(struct rtc_device *rtc,
-				struct rtc_task *task, int freq);
+extern int rtc_irq_set_state(struct rtc_device *rtc, int enabled);
+extern int rtc_irq_set_freq(struct rtc_device *rtc, int freq);
 extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
-- 
cgit v1.2.3


From 1f45a4db3657c5ecafb7d3331536c987e6d18311 Mon Sep 17 00:00:00 2001
From: Paul McKenney <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2018 11:21:43 -0700
Subject: srcu: Add notrace variants of srcu_read_{lock,unlock}

This is needed for a future tracepoint patch that uses srcu, and to make
sure it doesn't call into lockdep.

tracepoint code already calls notrace variants for rcu_read_lock_sched
so this patch does the same for srcu which will be used in a later
patch. Keeps it consistent with rcu-sched.

[Joel: Added commit message]
Link: http://lkml.kernel.org/r/20180628182149.226164-2-joel@joelfernandes.org

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcu.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 91494d7e8e41..3e72a291c401 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	return retval;
 }
 
+/* Used by tracing, cannot be traced and cannot invoke lockdep. */
+static inline notrace int
+srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
+{
+	int retval;
+
+	retval = __srcu_read_lock(sp);
+	return retval;
+}
+
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
  * @sp: srcu_struct in which to unregister the old reader.
@@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__srcu_read_unlock(sp, idx);
 }
 
+/* Used by tracing, cannot be traced and cannot call lockdep. */
+static inline notrace void
+srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
+{
+	__srcu_read_unlock(sp, idx);
+}
+
 /**
  * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
  *
-- 
cgit v1.2.3


From 0b764a6e4e19dc254caca636002eaa47b1d0b0af Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 28 Jun 2018 11:21:44 -0700
Subject: srcu: Add notrace variant of srcu_dereference

In the last patch in this series, we are making lockdep register hooks
onto the irq_{disable,enable} tracepoints. These tracepoints use the
_rcuidle tracepoint variant. In this series we switch the _rcuidle
tracepoint callers to use SRCU instead of sched-RCU. Inorder to
dereference the pointer to the probe functions, we could call
srcu_dereference, however this API will call back into lockdep to check
if the lock is held *before* the lockdep probe hooks have a chance to
run and annotate the IRQ enabled/disabled state.

For this reason we need a notrace variant of srcu_dereference since
otherwise we get lockdep splats. This patch adds the needed
srcu_dereference_notrace variant.

Link: http://lkml.kernel.org/r/20180628182149.226164-3-joel@joelfernandes.org

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 3e72a291c401..67135d4a8a30 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -169,6 +169,11 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp)
  */
 #define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
 
+/**
+ * srcu_dereference_notrace - no tracing and no lockdep calls from here
+ */
+#define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1)
+
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
  * @sp: srcu_struct in which to register the new reader.
-- 
cgit v1.2.3


From 72809cbf6748830ae4a59a45bcb2367a6c24d74d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 26 Jul 2018 21:44:32 +0900
Subject: tracing: Remove orphaned function ftrace_nr_registered_ops()

Remove ftrace_nr_registered_ops() because it is no longer used.

ftrace_nr_registered_ops() has been introduced by commit ea701f11da44
("ftrace: Add selftest to test function trace recursion protection"), but
its caller has been removed by commit 05cbbf643b8e ("tracing: Fix selftest
function recursion accounting"). So it is not called anymore.

Link: http://lkml.kernel.org/r/153260907227.12474.5234899025934963683.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  6 ------
 kernel/trace/ftrace.c  | 24 ------------------------
 2 files changed, 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ebb77674be90..63af5eb0ff46 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -234,10 +234,6 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1,
  */
 #define register_ftrace_function(ops) ({ 0; })
 #define unregister_ftrace_function(ops) ({ 0; })
-static inline int ftrace_nr_registered_ops(void)
-{
-	return 0;
-}
 static inline void ftrace_kill(void) { }
 static inline void ftrace_free_init_mem(void) { }
 static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { }
@@ -328,8 +324,6 @@ struct seq_file;
 
 extern int ftrace_text_reserved(const void *start, const void *end);
 
-extern int ftrace_nr_registered_ops(void);
-
 struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr);
 
 bool is_ftrace_trampoline(unsigned long addr);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b8b3324ca1c8..0d380a98a880 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -157,30 +157,6 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
 #endif
 }
 
-/**
- * ftrace_nr_registered_ops - return number of ops registered
- *
- * Returns the number of ftrace_ops registered and tracing functions
- */
-int ftrace_nr_registered_ops(void)
-{
-	struct ftrace_ops *ops;
-	int cnt = 0;
-
-	mutex_lock(&ftrace_lock);
-
-	for (ops = rcu_dereference_protected(ftrace_ops_list,
-					     lockdep_is_held(&ftrace_lock));
-	     ops != &ftrace_list_end;
-	     ops = rcu_dereference_protected(ops->next,
-					     lockdep_is_held(&ftrace_lock)))
-		cnt++;
-
-	mutex_unlock(&ftrace_lock);
-
-	return cnt;
-}
-
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 			    struct ftrace_ops *op, struct pt_regs *regs)
 {
-- 
cgit v1.2.3


From af9b6d7570ca9afbbc6076ab7920d8f00f7e55c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 29 Jun 2018 12:45:53 -0400
Subject: pNFS: Parse the results of layoutget on open even if permissions
 checks fail

Even if the results of the permissions checks failed, we should parse
the results of the layout on open call so that we can return the
layout if required.
Note that we also want to ignore the sequence counter for whether or not
a layout recall occurred. If the recall pertained to our OPEN, then the
callback will know, and will attempt to wait for us to finih processing
anyway.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/callback_proc.c    | 2 --
 fs/nfs/nfs4proc.c         | 5 +++--
 fs/nfs/pnfs.c             | 4 ----
 include/linux/nfs_fs_sb.h | 1 -
 include/linux/nfs_xdr.h   | 1 -
 5 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index efca3d6c89f2..43ba390bb653 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -331,8 +331,6 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
 				    struct cb_layoutrecallargs *args)
 {
-	write_seqcount_begin(&clp->cl_callback_count);
-	write_seqcount_end(&clp->cl_callback_count);
 	if (args->cbl_recall_type == RETURN_FILE)
 		return initiate_file_draining(clp, args);
 	return initiate_bulk_draining(clp, args);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6dd146885da9..5a8190ec31a2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2843,6 +2843,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 				nfs_save_change_attribute(d_inode(opendata->dir)));
 	}
 
+	/* Parse layoutget results before we check for access */
+	pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
+
 	ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
 	if (ret != 0)
 		goto out;
@@ -2851,8 +2854,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 		nfs_inode_attach_open_context(ctx);
 		if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
 			nfs4_schedule_stateid_recovery(server, state);
-		else
-			pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
 	}
 
 out:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 030c39c107c2..7fdac8b504dd 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1018,7 +1018,6 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
 	nfs4_stateid_copy(&lgp->args.stateid, stateid);
 	lgp->gfp_flags = gfp_flags;
 	lgp->cred = get_rpccred(ctx->cred);
-	lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count);
 	return lgp;
 }
 
@@ -2181,9 +2180,6 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
 	} else
 		lo = NFS_I(lgp->args.inode)->layout;
 
-	if (read_seqcount_retry(&srv->nfs_client->cl_callback_count,
-				lgp->callback_count))
-		return;
 	lseg = pnfs_layout_process(lgp);
 	if (!IS_ERR(lseg)) {
 		iomode = lgp->args.range.iomode;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 74ae3e1d19a0..2c18d618604e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -28,7 +28,6 @@ struct nfs41_impl_id;
 struct nfs_client {
 	refcount_t		cl_count;
 	atomic_t		cl_mds_count;
-	seqcount_t		cl_callback_count;
 	int			cl_cons_state;	/* current construction state (-ve: init error) */
 #define NFS_CS_READY		0		/* ready to be used */
 #define NFS_CS_INITING		1		/* busy initialising */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 712eed156d09..3b7325cfb291 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -271,7 +271,6 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct rpc_cred *cred;
-	unsigned callback_count;
 	gfp_t gfp_flags;
 };
 
-- 
cgit v1.2.3


From 359f642700f2ff05d9c94cd9216c97af7b8e9553 Mon Sep 17 00:00:00 2001
From: Greg Edwards <gedwards@ddn.com>
Date: Wed, 25 Jul 2018 10:22:58 -0400
Subject: block: move bio_integrity_{intervals,bytes} into blkdev.h

This allows bio_integrity_bytes() to be called from drivers instead of
open coding it.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Edwards <gedwards@ddn.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c  | 22 ----------------------
 include/linux/blkdev.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index add7c7c85335..67b5fb861a51 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -159,28 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-/**
- * bio_integrity_intervals - Return number of integrity intervals for a bio
- * @bi:		blk_integrity profile for device
- * @sectors:	Size of the bio in 512-byte sectors
- *
- * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the data integrity
- * interval size of the storage device.  Convert the block layer sectors
- * to the appropriate number of integrity intervals.
- */
-static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
-						   unsigned int sectors)
-{
-	return sectors >> (bi->interval_exp - 9);
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
-					       unsigned int sectors)
-{
-	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
-}
-
 /**
  * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 331a6cb8805f..050d599f5ea9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1865,6 +1865,28 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 				bip_next->bip_vec[0].bv_offset);
 }
 
+/**
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
+ * @bi:		blk_integrity profile for device
+ * @sectors:	Size of the bio in 512-byte sectors
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device.  Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
+ */
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return sectors >> (bi->interval_exp - 9);
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
+}
+
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct bio;
@@ -1938,6 +1960,18 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 	return false;
 }
 
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return 0;
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return 0;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct block_device_operations {
-- 
cgit v1.2.3


From 0ab5fe5374743d5a279b1ff6297ef2c54d06cd5f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 20 Jun 2018 15:22:52 +1000
Subject: fsi: Add new central chardev support

The various FSI devices (sbefifo, occ, scom, more to come)
currently use misc devices.

This is problematic as the minor device space for misc is
limited and there can be a lot of them. Also it limits our
ability to move them to a dedicated /dev/fsi directory or
to be smart about device naming and numbering.

It also means we have IDAs on every single of these drivers

This creates a common fsi "device_type" for the optional
/dev/fsi grouping and a dev_t allocator for all FSI devices.

"Legacy" devices get to use a backward compatible numbering
scheme (as long as chip id <16 and there's only one copy
of a given unit type per chip).

A single major number and a single IDA are shared for all
FSI devices.

This doesn't convert the FSI device drivers to use the new
scheme yet, they will be converted individually.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/fsi/Kconfig    | 15 ++++++++
 drivers/fsi/fsi-core.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fsi.h    | 12 ++++++-
 3 files changed, 119 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index 8d82b1e60514..af3a20dd5aa4 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -12,6 +12,21 @@ menuconfig FSI
 
 if FSI
 
+config FSI_NEW_DEV_NODE
+	bool "Create '/dev/fsi' directory for char devices"
+	default n
+	---help---
+	This option causes char devices created for FSI devices to be
+	located under a common /dev/fsi/ directory. Set to N unless your
+	userspace has been updated to handle the new location.
+
+	Additionally, it also causes the char device names to be offset
+	by one so that chip 0 will have /dev/scom1 and chip1 /dev/scom2
+	to match old userspace expectations.
+
+	New userspace will use udev rules to generate predictable access
+	symlinks in /dev/fsi/by-path when this option is enabled.
+
 config FSI_MASTER_GPIO
 	tristate "GPIO-based FSI master"
 	depends on GPIOLIB
diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index eab6c5c4990e..faa1760a5a40 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -92,6 +92,13 @@ struct fsi_slave {
 static const int slave_retries = 2;
 static int discard_errors;
 
+static dev_t fsi_base_dev;
+static DEFINE_IDA(fsi_minor_ida);
+#define FSI_CHAR_MAX_DEVICES	0x1000
+
+/* Legacy /dev numbering: 4 devices per chip, 16 chips */
+#define FSI_CHAR_LEGACY_TOP	64
+
 static int fsi_master_read(struct fsi_master *master, int link,
 		uint8_t slave_id, uint32_t addr, void *val, size_t size);
 static int fsi_master_write(struct fsi_master *master, int link,
@@ -627,6 +634,7 @@ static void fsi_slave_release(struct device *dev)
 {
 	struct fsi_slave *slave = to_fsi_slave(dev);
 
+	fsi_free_minor(slave->dev.devt);
 	of_node_put(dev->of_node);
 	kfree(slave);
 }
@@ -729,6 +737,75 @@ static ssize_t chip_id_show(struct device *dev,
 
 static DEVICE_ATTR_RO(chip_id);
 
+static char *fsi_cdev_devnode(struct device *dev, umode_t *mode,
+			      kuid_t *uid, kgid_t *gid)
+{
+#ifdef CONFIG_FSI_NEW_DEV_NODE
+	return kasprintf(GFP_KERNEL, "fsi/%s", dev_name(dev));
+#else
+	return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+#endif
+}
+
+const struct device_type fsi_cdev_type = {
+	.name = "fsi-cdev",
+	.devnode = fsi_cdev_devnode,
+};
+EXPORT_SYMBOL_GPL(fsi_cdev_type);
+
+/* Backward compatible /dev/ numbering in "old style" mode */
+static int fsi_adjust_index(int index)
+{
+#ifdef CONFIG_FSI_NEW_DEV_NODE
+	return index;
+#else
+	return index + 1;
+#endif
+}
+
+static int __fsi_get_new_minor(struct fsi_slave *slave, enum fsi_dev_type type,
+			       dev_t *out_dev, int *out_index)
+{
+	int cid = slave->chip_id;
+	int id;
+
+	/* Check if we qualify for legacy numbering */
+	if (cid >= 0 && cid < 16 && type < 4) {
+		/* Try reserving the legacy number */
+		id = (cid << 4) | type;
+		id = ida_simple_get(&fsi_minor_ida, id, id + 1, GFP_KERNEL);
+		if (id >= 0) {
+			*out_index = fsi_adjust_index(cid);
+			*out_dev = fsi_base_dev + id;
+			return 0;
+		}
+		/* Other failure */
+		if (id != -ENOSPC)
+			return id;
+		/* Fallback to non-legacy allocation */
+	}
+	id = ida_simple_get(&fsi_minor_ida, FSI_CHAR_LEGACY_TOP,
+			    FSI_CHAR_MAX_DEVICES, GFP_KERNEL);
+	if (id < 0)
+		return id;
+	*out_index = fsi_adjust_index(id);
+	*out_dev = fsi_base_dev + id;
+	return 0;
+}
+
+int fsi_get_new_minor(struct fsi_device *fdev, enum fsi_dev_type type,
+		      dev_t *out_dev, int *out_index)
+{
+	return __fsi_get_new_minor(fdev->slave, type, out_dev, out_index);
+}
+EXPORT_SYMBOL_GPL(fsi_get_new_minor);
+
+void fsi_free_minor(dev_t dev)
+{
+	ida_simple_remove(&fsi_minor_ida, MINOR(dev));
+}
+EXPORT_SYMBOL_GPL(fsi_free_minor);
+
 static int fsi_slave_init(struct fsi_master *master, int link, uint8_t id)
 {
 	uint32_t chip_id;
@@ -953,7 +1030,7 @@ static int fsi_slave_remove_device(struct device *dev, void *arg)
 static int fsi_master_remove_slave(struct device *dev, void *arg)
 {
 	device_for_each_child(dev, NULL, fsi_slave_remove_device);
-	device_unregister(dev);
+	put_device(dev);
 	return 0;
 }
 
@@ -1091,13 +1168,27 @@ EXPORT_SYMBOL_GPL(fsi_bus_type);
 
 static int __init fsi_init(void)
 {
-	return bus_register(&fsi_bus_type);
+	int rc;
+
+	rc = alloc_chrdev_region(&fsi_base_dev, 0, FSI_CHAR_MAX_DEVICES, "fsi");
+	if (rc)
+		return rc;
+	rc = bus_register(&fsi_bus_type);
+	if (rc)
+		goto fail_bus;
+	return 0;
+
+ fail_bus:
+	unregister_chrdev_region(fsi_base_dev, FSI_CHAR_MAX_DEVICES);
+	return rc;
 }
 postcore_initcall(fsi_init);
 
 static void fsi_exit(void)
 {
 	bus_unregister(&fsi_bus_type);
+	unregister_chrdev_region(fsi_base_dev, FSI_CHAR_MAX_DEVICES);
+	ida_destroy(&fsi_minor_ida);
 }
 module_exit(fsi_exit);
 module_param(discard_errors, int, 0664);
diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index 141fd38d061f..ec3be0d5b786 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -76,8 +76,18 @@ extern int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
 extern int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
 		const void *val, size_t size);
 
+extern struct bus_type fsi_bus_type;
+extern const struct device_type fsi_cdev_type;
 
+enum fsi_dev_type {
+	fsi_dev_cfam,
+	fsi_dev_sbefifo,
+	fsi_dev_scom,
+	fsi_dev_occ
+};
 
-extern struct bus_type fsi_bus_type;
+extern int fsi_get_new_minor(struct fsi_device *fdev, enum fsi_dev_type type,
+			     dev_t *out_dev, int *out_index);
+extern void fsi_free_minor(dev_t dev);
 
 #endif /* LINUX_FSI_H */
-- 
cgit v1.2.3


From 179909ecafc3bae1f34289e88bacd45e391f0554 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 24 Jul 2018 11:38:14 -0700
Subject: Input: stop telling users to snail-mail Vojtech

I do not think Vojtech wants snail mail these days (and he mentioned that
nobody has ever sent him snail mail), and the address is not even valid
anymore, so let's remove snail-mail instructions from the sources.

Acked-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/evbug.c                          | 4 ----
 drivers/input/gameport/emu10k1-gp.c            | 4 ----
 drivers/input/gameport/lightning.c             | 4 ----
 drivers/input/gameport/ns558.c                 | 4 ----
 drivers/input/joystick/a3d.c                   | 4 ----
 drivers/input/joystick/adi.c                   | 4 ----
 drivers/input/joystick/amijoy.c                | 4 ----
 drivers/input/joystick/analog.c                | 4 ----
 drivers/input/joystick/cobra.c                 | 4 ----
 drivers/input/joystick/db9.c                   | 4 ----
 drivers/input/joystick/gamecon.c               | 4 ----
 drivers/input/joystick/gf2k.c                  | 4 ----
 drivers/input/joystick/grip.c                  | 4 ----
 drivers/input/joystick/guillemot.c             | 4 ----
 drivers/input/joystick/iforce/iforce-ff.c      | 4 ----
 drivers/input/joystick/iforce/iforce-main.c    | 4 ----
 drivers/input/joystick/iforce/iforce-packets.c | 4 ----
 drivers/input/joystick/iforce/iforce-serio.c   | 4 ----
 drivers/input/joystick/iforce/iforce-usb.c     | 4 ----
 drivers/input/joystick/iforce/iforce.h         | 4 ----
 drivers/input/joystick/interact.c              | 4 ----
 drivers/input/joystick/joydump.c               | 4 ----
 drivers/input/joystick/magellan.c              | 4 ----
 drivers/input/joystick/sidewinder.c            | 4 ----
 drivers/input/joystick/spaceball.c             | 4 ----
 drivers/input/joystick/spaceorb.c              | 4 ----
 drivers/input/joystick/stinger.c               | 4 ----
 drivers/input/joystick/tmdc.c                  | 4 ----
 drivers/input/joystick/turbografx.c            | 4 ----
 drivers/input/joystick/warrior.c               | 4 ----
 drivers/input/keyboard/amikbd.c                | 4 ----
 drivers/input/keyboard/atakbd.c                | 4 ----
 drivers/input/keyboard/newtonkbd.c             | 4 ----
 drivers/input/keyboard/stowaway.c              | 4 ----
 drivers/input/keyboard/sunkbd.c                | 4 ----
 drivers/input/keyboard/xtkbd.c                 | 4 ----
 drivers/input/mouse/inport.c                   | 4 ----
 drivers/input/mouse/logibm.c                   | 4 ----
 drivers/input/mouse/pc110pad.c                 | 4 ----
 drivers/input/mouse/sermouse.c                 | 4 ----
 drivers/input/serio/ct82c710.c                 | 4 ----
 drivers/input/serio/q40kbd.c                   | 4 ----
 drivers/input/serio/rpckbd.c                   | 4 ----
 drivers/input/serio/serio.c                    | 4 ----
 drivers/input/touchscreen/gunze.c              | 4 ----
 include/linux/joystick.h                       | 4 ----
 include/uapi/linux/joystick.h                  | 4 ----
 47 files changed, 188 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/evbug.c b/drivers/input/evbug.c
index cd4e6679d61a..5419c1c1f621 100644
--- a/drivers/input/evbug.c
+++ b/drivers/input/evbug.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/input/gameport/emu10k1-gp.c b/drivers/input/gameport/emu10k1-gp.c
index 2909e9561cf3..afdc20ca0e24 100644
--- a/drivers/input/gameport/emu10k1-gp.c
+++ b/drivers/input/gameport/emu10k1-gp.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <asm/io.h>
diff --git a/drivers/input/gameport/lightning.c b/drivers/input/gameport/lightning.c
index 85d6ee09f11f..c6e74c7945cb 100644
--- a/drivers/input/gameport/lightning.c
+++ b/drivers/input/gameport/lightning.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <asm/io.h>
diff --git a/drivers/input/gameport/ns558.c b/drivers/input/gameport/ns558.c
index 7c217848613e..6437645858f9 100644
--- a/drivers/input/gameport/ns558.c
+++ b/drivers/input/gameport/ns558.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <asm/io.h>
diff --git a/drivers/input/joystick/a3d.c b/drivers/input/joystick/a3d.c
index 55efdfc7eb62..98307039a534 100644
--- a/drivers/input/joystick/a3d.c
+++ b/drivers/input/joystick/a3d.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/adi.c b/drivers/input/joystick/adi.c
index 15a71acb6997..f466c0d34247 100644
--- a/drivers/input/joystick/adi.c
+++ b/drivers/input/joystick/adi.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/amijoy.c b/drivers/input/joystick/amijoy.c
index c65b5fa69f1e..2b82a838c511 100644
--- a/drivers/input/joystick/amijoy.c
+++ b/drivers/input/joystick/amijoy.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/types.h>
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index c79dbcb4d146..2b445c8d3fcd 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/cobra.c b/drivers/input/joystick/cobra.c
index ae3ee24a2368..14cb956beac4 100644
--- a/drivers/input/joystick/cobra.c
+++ b/drivers/input/joystick/cobra.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/db9.c b/drivers/input/joystick/db9.c
index de0dd4756c84..7721cc0bf569 100644
--- a/drivers/input/joystick/db9.c
+++ b/drivers/input/joystick/db9.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 4e10ffdf8a36..d62e73dd9f7f 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -24,10 +24,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/input/joystick/gf2k.c b/drivers/input/joystick/gf2k.c
index 0f519db64748..50a60065ab14 100644
--- a/drivers/input/joystick/gf2k.c
+++ b/drivers/input/joystick/gf2k.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/grip.c b/drivers/input/joystick/grip.c
index eac9c5b8d73e..e10395ba62bc 100644
--- a/drivers/input/joystick/grip.c
+++ b/drivers/input/joystick/grip.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/guillemot.c b/drivers/input/joystick/guillemot.c
index a9ac2f9cfce0..43ff817d80ac 100644
--- a/drivers/input/joystick/guillemot.c
+++ b/drivers/input/joystick/guillemot.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/iforce/iforce-ff.c b/drivers/input/joystick/iforce/iforce-ff.c
index 0de9a0943a9e..2ed923874bbc 100644
--- a/drivers/input/joystick/iforce/iforce-ff.c
+++ b/drivers/input/joystick/iforce/iforce-ff.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index daeeb4c7e3b0..95bbd61d3fbe 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-packets.c b/drivers/input/joystick/iforce/iforce-packets.c
index fedaaea5c16d..1cea950276dd 100644
--- a/drivers/input/joystick/iforce/iforce-packets.c
+++ b/drivers/input/joystick/iforce/iforce-packets.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c
index 154e827b559b..f4ba4a751fe0 100644
--- a/drivers/input/joystick/iforce/iforce-serio.c
+++ b/drivers/input/joystick/iforce/iforce-serio.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index e8724f1a4a25..78073259c9a1 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
index 96ae4f5bd0eb..aee43d14f699 100644
--- a/drivers/input/joystick/iforce/iforce.h
+++ b/drivers/input/joystick/iforce/iforce.h
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/interact.c b/drivers/input/joystick/interact.c
index 17c2c800743c..598788b3da62 100644
--- a/drivers/input/joystick/interact.c
+++ b/drivers/input/joystick/interact.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/joydump.c b/drivers/input/joystick/joydump.c
index d1c6e4846a4a..2ea05ade4d4e 100644
--- a/drivers/input/joystick/joydump.c
+++ b/drivers/input/joystick/joydump.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c
index a9d0e3edca94..95a34ab34fc3 100644
--- a/drivers/input/joystick/magellan.c
+++ b/drivers/input/joystick/magellan.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/sidewinder.c b/drivers/input/joystick/sidewinder.c
index 5e602a6852b7..f46bf4d41972 100644
--- a/drivers/input/joystick/sidewinder.c
+++ b/drivers/input/joystick/sidewinder.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index bb3faeff8cac..ffb9c1f495b6 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -24,10 +24,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/spaceorb.c b/drivers/input/joystick/spaceorb.c
index 05da0ed514e2..20540ee71d7f 100644
--- a/drivers/input/joystick/spaceorb.c
+++ b/drivers/input/joystick/spaceorb.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/stinger.c b/drivers/input/joystick/stinger.c
index cb10e7b097ae..ba8579435d6c 100644
--- a/drivers/input/joystick/stinger.c
+++ b/drivers/input/joystick/stinger.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c
index 7e17cde464f0..6f4a01cfe79f 100644
--- a/drivers/input/joystick/tmdc.c
+++ b/drivers/input/joystick/tmdc.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/turbografx.c b/drivers/input/joystick/turbografx.c
index e2685753e460..bf2f9925e416 100644
--- a/drivers/input/joystick/turbografx.c
+++ b/drivers/input/joystick/turbografx.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/warrior.c b/drivers/input/joystick/warrior.c
index ef5391ba4470..b60cab168e2a 100644
--- a/drivers/input/joystick/warrior.c
+++ b/drivers/input/joystick/warrior.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index e04a3b4e55d6..420e33c49e58 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index f1235831283d..6f62da2909ec 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -34,10 +34,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/keyboard/newtonkbd.c b/drivers/input/keyboard/newtonkbd.c
index fb9b8e23ab93..de26e2df0ad5 100644
--- a/drivers/input/keyboard/newtonkbd.c
+++ b/drivers/input/keyboard/newtonkbd.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <j.cormack@doc.ic.ac.uk>, or by paper mail:
- * Justin Cormack, 68 Dartmouth Park Road, London NW5 1SN, UK.
  */
 
 #include <linux/slab.h>
diff --git a/drivers/input/keyboard/stowaway.c b/drivers/input/keyboard/stowaway.c
index 8b6de9a692dc..15a5e74dbe91 100644
--- a/drivers/input/keyboard/stowaway.c
+++ b/drivers/input/keyboard/stowaway.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <marek.vasut@gmail.com>, or by paper mail:
- * Marek Vasut, Liskovecka 559, Frydek-Mistek, 738 01 Czech Republic
  */
 
 #include <linux/slab.h>
diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c
index c95707ea2656..ad5d7f94f95a 100644
--- a/drivers/input/keyboard/sunkbd.c
+++ b/drivers/input/keyboard/sunkbd.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/keyboard/xtkbd.c b/drivers/input/keyboard/xtkbd.c
index 8f64b9ded8d0..f7598114b962 100644
--- a/drivers/input/keyboard/xtkbd.c
+++ b/drivers/input/keyboard/xtkbd.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/slab.h>
diff --git a/drivers/input/mouse/inport.c b/drivers/input/mouse/inport.c
index 9ce71dfa0de1..b9e68606c44a 100644
--- a/drivers/input/mouse/inport.c
+++ b/drivers/input/mouse/inport.c
@@ -26,10 +26,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/mouse/logibm.c b/drivers/input/mouse/logibm.c
index 6f165e053f4d..2fd6c84cd5b7 100644
--- a/drivers/input/mouse/logibm.c
+++ b/drivers/input/mouse/logibm.c
@@ -27,10 +27,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/mouse/pc110pad.c b/drivers/input/mouse/pc110pad.c
index 7b02b652e267..b8965e6bc890 100644
--- a/drivers/input/mouse/pc110pad.c
+++ b/drivers/input/mouse/pc110pad.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/mouse/sermouse.c b/drivers/input/mouse/sermouse.c
index 8df526620ebf..d27816ac038f 100644
--- a/drivers/input/mouse/sermouse.c
+++ b/drivers/input/mouse/sermouse.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/serio/ct82c710.c b/drivers/input/serio/ct82c710.c
index 9c54c43c9749..2d1e2993b5a8 100644
--- a/drivers/input/serio/ct82c710.c
+++ b/drivers/input/serio/ct82c710.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c
index d0fccc8ec259..fbb6b33845fa 100644
--- a/drivers/input/serio/q40kbd.c
+++ b/drivers/input/serio/q40kbd.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/serio/rpckbd.c b/drivers/input/serio/rpckbd.c
index 8cf964736902..a308d7811427 100644
--- a/drivers/input/serio/rpckbd.c
+++ b/drivers/input/serio/rpckbd.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 24a90c8db5b3..2e1fb0649260 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/input/touchscreen/gunze.c b/drivers/input/touchscreen/gunze.c
index 481586909d28..054c2537b392 100644
--- a/drivers/input/touchscreen/gunze.c
+++ b/drivers/input/touchscreen/gunze.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/errno.h>
diff --git a/include/linux/joystick.h b/include/linux/joystick.h
index cbf2aa9e93b9..5153f5b9294c 100644
--- a/include/linux/joystick.h
+++ b/include/linux/joystick.h
@@ -17,10 +17,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * 
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@suse.cz>, or by paper mail:
- * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic
  */
 #ifndef _LINUX_JOYSTICK_H
 #define _LINUX_JOYSTICK_H
diff --git a/include/uapi/linux/joystick.h b/include/uapi/linux/joystick.h
index 64aabb84a66d..192bf2cf182d 100644
--- a/include/uapi/linux/joystick.h
+++ b/include/uapi/linux/joystick.h
@@ -18,10 +18,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * 
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@suse.cz>, or by paper mail:
- * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic
  */
 #ifndef _UAPI_LINUX_JOYSTICK_H
 #define _UAPI_LINUX_JOYSTICK_H
-- 
cgit v1.2.3


From b512719f771a82180211c9a315b8a7f628832b3d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 26 Jul 2018 16:37:08 -0700
Subject: delayacct: fix crash in delayacct_blkio_end() after delayacct init
 failure

While forking, if delayacct init fails due to memory shortage, it
continues expecting all delayacct users to check task->delays pointer
against NULL before dereferencing it, which all of them used to do.

Commit c96f5471ce7d ("delayacct: Account blkio completion on the correct
task"), while updating delayacct_blkio_end() to take the target task
instead of always using %current, made the function test NULL on
%current->delays and then continue to operated on @p->delays.  If
%current succeeded init while @p didn't, it leads to the following
crash.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
 IP: __delayacct_blkio_end+0xc/0x40
 PGD 8000001fd07e1067 P4D 8000001fd07e1067 PUD 1fcffbb067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 4 PID: 25774 Comm: QIOThread0 Not tainted 4.16.0-9_fbk1_rc2_1180_g6b593215b4d7 #9
 RIP: 0010:__delayacct_blkio_end+0xc/0x40
 Call Trace:
  try_to_wake_up+0x2c0/0x600
  autoremove_wake_function+0xe/0x30
  __wake_up_common+0x74/0x120
  wake_up_page_bit+0x9c/0xe0
  mpage_end_io+0x27/0x70
  blk_update_request+0x78/0x2c0
  scsi_end_request+0x2c/0x1e0
  scsi_io_completion+0x20b/0x5f0
  blk_mq_complete_request+0xa2/0x100
  ata_scsi_qc_complete+0x79/0x400
  ata_qc_complete_multiple+0x86/0xd0
  ahci_handle_port_interrupt+0xc9/0x5c0
  ahci_handle_port_intr+0x54/0xb0
  ahci_single_level_irq_intr+0x3b/0x60
  __handle_irq_event_percpu+0x43/0x190
  handle_irq_event_percpu+0x20/0x50
  handle_irq_event+0x2a/0x50
  handle_edge_irq+0x80/0x1c0
  handle_irq+0xaf/0x120
  do_IRQ+0x41/0xc0
  common_interrupt+0xf/0xf

Fix it by updating delayacct_blkio_end() check @p->delays instead.

Link: http://lkml.kernel.org/r/20180724175542.GP1934745@devbig577.frc2.facebook.com
Fixes: c96f5471ce7d ("delayacct: Account blkio completion on the correct task")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Dave Jones <dsj@fb.com>
Debugged-by: Dave Jones <dsj@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Josh Snyder <joshs@netflix.com>
Cc: <stable@vger.kernel.org>	[4.15+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index e6c0448ebcc7..31c865d1842e 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -124,7 +124,7 @@ static inline void delayacct_blkio_start(void)
 
 static inline void delayacct_blkio_end(struct task_struct *p)
 {
-	if (current->delays)
+	if (p->delays)
 		__delayacct_blkio_end(p);
 	delayacct_clear_flag(DELAYACCT_PF_BLKIO);
 }
-- 
cgit v1.2.3


From 027232da7c7c1c7f04383f93bd798e475dde5285 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 26 Jul 2018 16:37:25 -0700
Subject: mm: introduce vma_init()

Not all VMAs allocated with vm_area_alloc().  Some of them allocated on
stack or in data segment.

The new helper can be use to initialize VMA properly regardless where it
was allocated.

Link: http://lkml.kernel.org/r/20180724121139.62570-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++++
 kernel/fork.c      | 6 ++----
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3a3842316b8..31540f166987 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -452,6 +452,12 @@ struct vm_operations_struct {
 					  unsigned long addr);
 };
 
+static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
+{
+	vma->vm_mm = mm;
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
+}
+
 struct mmu_gather;
 struct inode;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index a191c05e757d..1b27babc4c78 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -312,10 +312,8 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 
-	if (vma) {
-		vma->vm_mm = mm;
-		INIT_LIST_HEAD(&vma->anon_vma_chain);
-	}
+	if (vma)
+		vma_init(vma, mm);
 	return vma;
 }
 
-- 
cgit v1.2.3


From bfd40eaff5abb9f62c8ef94ca13ed0d94a560f10 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 26 Jul 2018 16:37:35 -0700
Subject: mm: fix vma_is_anonymous() false-positives

vma_is_anonymous() relies on ->vm_ops being NULL to detect anonymous
VMA.  This is unreliable as ->mmap may not set ->vm_ops.

False-positive vma_is_anonymous() may lead to crashes:

	next ffff8801ce5e7040 prev ffff8801d20eca50 mm ffff88019c1e13c0
	prot 27 anon_vma ffff88019680cdd8 vm_ops 0000000000000000
	pgoff 0 file ffff8801b2ec2d00 private_data 0000000000000000
	flags: 0xff(read|write|exec|shared|mayread|maywrite|mayexec|mayshare)
	------------[ cut here ]------------
	kernel BUG at mm/memory.c:1422!
	invalid opcode: 0000 [#1] SMP KASAN
	CPU: 0 PID: 18486 Comm: syz-executor3 Not tainted 4.18.0-rc3+ #136
	Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google
	01/01/2011
	RIP: 0010:zap_pmd_range mm/memory.c:1421 [inline]
	RIP: 0010:zap_pud_range mm/memory.c:1466 [inline]
	RIP: 0010:zap_p4d_range mm/memory.c:1487 [inline]
	RIP: 0010:unmap_page_range+0x1c18/0x2220 mm/memory.c:1508
	Call Trace:
	 unmap_single_vma+0x1a0/0x310 mm/memory.c:1553
	 zap_page_range_single+0x3cc/0x580 mm/memory.c:1644
	 unmap_mapping_range_vma mm/memory.c:2792 [inline]
	 unmap_mapping_range_tree mm/memory.c:2813 [inline]
	 unmap_mapping_pages+0x3a7/0x5b0 mm/memory.c:2845
	 unmap_mapping_range+0x48/0x60 mm/memory.c:2880
	 truncate_pagecache+0x54/0x90 mm/truncate.c:800
	 truncate_setsize+0x70/0xb0 mm/truncate.c:826
	 simple_setattr+0xe9/0x110 fs/libfs.c:409
	 notify_change+0xf13/0x10f0 fs/attr.c:335
	 do_truncate+0x1ac/0x2b0 fs/open.c:63
	 do_sys_ftruncate+0x492/0x560 fs/open.c:205
	 __do_sys_ftruncate fs/open.c:215 [inline]
	 __se_sys_ftruncate fs/open.c:213 [inline]
	 __x64_sys_ftruncate+0x59/0x80 fs/open.c:213
	 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
	 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Reproducer:

	#include <stdio.h>
	#include <stddef.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <sys/ioctl.h>
	#include <sys/mman.h>
	#include <unistd.h>
	#include <fcntl.h>

	#define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
	#define KCOV_ENABLE			_IO('c', 100)
	#define KCOV_DISABLE			_IO('c', 101)
	#define COVER_SIZE			(1024<<10)

	#define KCOV_TRACE_PC  0
	#define KCOV_TRACE_CMP 1

	int main(int argc, char **argv)
	{
		int fd;
		unsigned long *cover;

		system("mount -t debugfs none /sys/kernel/debug");
		fd = open("/sys/kernel/debug/kcov", O_RDWR);
		ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE);
		cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long),
				PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
		munmap(cover, COVER_SIZE * sizeof(unsigned long));
		cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long),
				PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
		memset(cover, 0, COVER_SIZE * sizeof(unsigned long));
		ftruncate(fd, 3UL << 20);
		return 0;
	}

This can be fixed by assigning anonymous VMAs own vm_ops and not relying
on it being NULL.

If ->mmap() failed to set ->vm_ops, mmap_region() will set it to
dummy_vm_ops.  This way we will have non-NULL ->vm_ops for all VMAs.

Link: http://lkml.kernel.org/r/20180724121139.62570-4-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: syzbot+3f84280d52be9b7083cc@syzkaller.appspotmail.com
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mem.c | 1 +
 fs/exec.c          | 1 +
 include/linux/mm.h | 8 ++++++++
 mm/mmap.c          | 3 +++
 mm/nommu.c         | 2 ++
 5 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index ffeb60d3434c..df66a9dd0aae 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -708,6 +708,7 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 #endif
 	if (vma->vm_flags & VM_SHARED)
 		return shmem_zero_setup(vma);
+	vma_set_anonymous(vma);
 	return 0;
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index 72e961a62adb..bdd0eacefdf5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -293,6 +293,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	bprm->vma = vma = vm_area_alloc(mm);
 	if (!vma)
 		return -ENOMEM;
+	vma_set_anonymous(vma);
 
 	if (down_write_killable(&mm->mmap_sem)) {
 		err = -EINTR;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 31540f166987..7ba6d356d18f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -454,10 +454,18 @@ struct vm_operations_struct {
 
 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
+	static const struct vm_operations_struct dummy_vm_ops = {};
+
 	vma->vm_mm = mm;
+	vma->vm_ops = &dummy_vm_ops;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
 }
 
+static inline void vma_set_anonymous(struct vm_area_struct *vma)
+{
+	vma->vm_ops = NULL;
+}
+
 struct mmu_gather;
 struct inode;
 
diff --git a/mm/mmap.c b/mm/mmap.c
index ff1944d8d458..17bbf4d3e24f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1778,6 +1778,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		error = shmem_zero_setup(vma);
 		if (error)
 			goto free_vma;
+	} else {
+		vma_set_anonymous(vma);
 	}
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
@@ -2983,6 +2985,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 		return -ENOMEM;
 	}
 
+	vma_set_anonymous(vma);
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
diff --git a/mm/nommu.c b/mm/nommu.c
index 1d22fdbf7d7c..9fc9e43335b6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1145,6 +1145,8 @@ static int do_mmap_private(struct vm_area_struct *vma,
 		if (ret < len)
 			memset(base + ret, 0, len - ret);
 
+	} else {
+		vma_set_anonymous(vma);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From fa3fc2ad99b4f025446d1cff589a8d2dd7db92f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Jul 2018 16:37:38 -0700
Subject: include/linux/eventfd.h: include linux/errno.h

The new gasket staging driver ran into a randconfig build failure when
CONFIG_EVENTFD is disabled:

  In file included from drivers/staging/gasket/gasket_interrupt.h:11,
                   from drivers/staging/gasket/gasket_interrupt.c:4:
  include/linux/eventfd.h: In function 'eventfd_ctx_fdget':
  include/linux/eventfd.h:51:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration]

I can't see anything wrong with including eventfd.h before err.h, so the
easiest fix is to make it possible to do this by including the file
where it is needed.

Link: http://lkml.kernel.org/r/20180724110737.3985088-1-arnd@arndb.de
Fixes: 9a69f5087ccc ("drivers/staging: Gasket driver framework + Apex driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/eventfd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 7094718b653b..ffcc7724ca21 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -11,6 +11,7 @@
 
 #include <linux/fcntl.h>
 #include <linux/wait.h>
+#include <linux/err.h>
 
 /*
  * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
-- 
cgit v1.2.3


From 2b8de8a832add91a4e522536078b505283d3979f Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 6 Jul 2018 19:41:06 -0700
Subject: mfd: rave-sp: Add legacy EEPROM access command translation

This is needed to make rave-sp-eeprom driver work on "legacy"
firmware.

Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rave-sp.c       | 2 ++
 include/linux/mfd/rave-sp.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/rave-sp.c b/drivers/mfd/rave-sp.c
index aa75d5841ca0..a999fa721b03 100644
--- a/drivers/mfd/rave-sp.c
+++ b/drivers/mfd/rave-sp.c
@@ -635,6 +635,8 @@ static int rave_sp_default_cmd_translate(enum rave_sp_command command)
 		return 0x1E;
 	case RAVE_SP_CMD_RESET_REASON:
 		return 0x1F;
+	case RAVE_SP_CMD_RMB_EEPROM:
+		return 0x20;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/linux/mfd/rave-sp.h b/include/linux/mfd/rave-sp.h
index fe0ce7bc59cf..11eef77ef976 100644
--- a/include/linux/mfd/rave-sp.h
+++ b/include/linux/mfd/rave-sp.h
@@ -21,6 +21,7 @@ enum rave_sp_command {
 	RAVE_SP_CMD_STATUS			= 0xA0,
 	RAVE_SP_CMD_SW_WDT			= 0xA1,
 	RAVE_SP_CMD_PET_WDT			= 0xA2,
+	RAVE_SP_CMD_RMB_EEPROM			= 0xA4,
 	RAVE_SP_CMD_SET_BACKLIGHT		= 0xA6,
 	RAVE_SP_CMD_RESET			= 0xA7,
 	RAVE_SP_CMD_RESET_REASON		= 0xA8,
-- 
cgit v1.2.3


From 4d3e55bc7690b289eeae0c7e994db965cb2a993d Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <alberto@amarulasolutions.com>
Date: Mon, 9 Jul 2018 19:46:46 +0200
Subject: mfd: wm8994: Allow to configure CS/ADDR Pulldown from dts

For designs where CS/ADDR pin is floating, it is useful to
allow dts to define whether to keep internal pull down or not.

Signed-off-by: Alberto Panizzo <alberto@amarulasolutions.com>
Signed-off-by: Anthony Brandon <anthony@amarulasolutions.com>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm8994-core.c        | 4 ++++
 include/linux/mfd/wm8994/pdata.h | 6 ++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index fa4b1b7f6db1..22bd6525e09c 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -304,6 +304,8 @@ static int wm8994_set_pdata_from_of(struct wm8994 *wm8994)
 
 	pdata->spkmode_pu = of_property_read_bool(np, "wlf,spkmode-pu");
 
+	pdata->csnaddr_pd = of_property_read_bool(np, "wlf,csnaddr-pd");
+
 	pdata->ldo[0].enable = of_get_named_gpio(np, "wlf,ldo1ena", 0);
 	if (pdata->ldo[0].enable < 0)
 		pdata->ldo[0].enable = 0;
@@ -562,6 +564,8 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 
 	if (pdata->spkmode_pu)
 		pulls |= WM8994_SPKMODE_PU;
+	if (pdata->csnaddr_pd)
+		pulls |= WM8994_CSNADDR_PD;
 
 	/* Disable unneeded pulls */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 90c60524a496..b19c370fe81a 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -222,6 +222,12 @@ struct wm8994_pdata {
 	 */
 	bool spkmode_pu;
 
+	/*
+	 * CS/ADDR must be pulled internally by the device on this
+	 * system.
+	 */
+	bool csnaddr_pd;
+
 	/**
 	 * Maximum number of channels clocks will be generated for,
 	 * useful for systems where and I2S bus with multiple data
-- 
cgit v1.2.3


From c8fda5bfa9972dc1175b898dfdaa3a375dca6022 Mon Sep 17 00:00:00 2001
From: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Date: Tue, 3 Jul 2018 17:04:11 +0200
Subject: mfd: as3722: Disable auto-power-on when AC OK

On ams AS3722, power on when AC OK is enabled by default.
Making this option as disable by default and enable only
when platform need this explicitly.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Tested-by: Bibek Basu <bbasu@nvidia.com>
Signed-off-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/as3722.txt |  2 ++
 drivers/mfd/as3722.c                             | 12 ++++++++++++
 include/linux/mfd/as3722.h                       |  3 +++
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/as3722.txt b/Documentation/devicetree/bindings/mfd/as3722.txt
index 5297b2210704..2a665741d7fe 100644
--- a/Documentation/devicetree/bindings/mfd/as3722.txt
+++ b/Documentation/devicetree/bindings/mfd/as3722.txt
@@ -20,6 +20,8 @@ Optional properties:
 - ams,enable-internal-i2c-pullup: Boolean property, to enable internal pullup on
 	i2c scl/sda pins. Missing this will disable internal pullup on i2c
 	scl/sda lines.
+- ams,enable-ac-ok-power-on: Boolean property, to enable exit out of power off
+	mode with AC_OK pin (pin enabled in power off mode).
 
 Optional submodule and their properties:
 =======================================
diff --git a/drivers/mfd/as3722.c b/drivers/mfd/as3722.c
index f87342c211bc..4d069ed21ff6 100644
--- a/drivers/mfd/as3722.c
+++ b/drivers/mfd/as3722.c
@@ -349,6 +349,8 @@ static int as3722_i2c_of_probe(struct i2c_client *i2c,
 					"ams,enable-internal-int-pullup");
 	as3722->en_intern_i2c_pullup = of_property_read_bool(np,
 					"ams,enable-internal-i2c-pullup");
+	as3722->en_ac_ok_pwr_on = of_property_read_bool(np,
+					"ams,enable-ac-ok-power-on");
 	as3722->irq_flags = irqd_get_trigger_type(irq_data);
 	dev_dbg(&i2c->dev, "IRQ flags are 0x%08lx\n", as3722->irq_flags);
 	return 0;
@@ -360,6 +362,7 @@ static int as3722_i2c_probe(struct i2c_client *i2c,
 	struct as3722 *as3722;
 	unsigned long irq_flags;
 	int ret;
+	u8 val = 0;
 
 	as3722 = devm_kzalloc(&i2c->dev, sizeof(struct as3722), GFP_KERNEL);
 	if (!as3722)
@@ -398,6 +401,15 @@ static int as3722_i2c_probe(struct i2c_client *i2c,
 	if (ret < 0)
 		return ret;
 
+	if (as3722->en_ac_ok_pwr_on)
+		val = AS3722_CTRL_SEQU1_AC_OK_PWR_ON;
+	ret = as3722_update_bits(as3722, AS3722_CTRL_SEQU1_REG,
+			AS3722_CTRL_SEQU1_AC_OK_PWR_ON, val);
+	if (ret < 0) {
+		dev_err(as3722->dev, "CTRLsequ1 update failed: %d\n", ret);
+		return ret;
+	}
+
 	ret = devm_mfd_add_devices(&i2c->dev, -1, as3722_devs,
 				   ARRAY_SIZE(as3722_devs), NULL, 0,
 				   regmap_irq_get_domain(as3722->irq_data));
diff --git a/include/linux/mfd/as3722.h b/include/linux/mfd/as3722.h
index 51e6f9414575..b404a5af9bba 100644
--- a/include/linux/mfd/as3722.h
+++ b/include/linux/mfd/as3722.h
@@ -296,6 +296,8 @@
 #define AS3722_ADC1_CONV_NOTREADY			BIT(7)
 #define AS3722_ADC1_SOURCE_SELECT_MASK			0x1F
 
+#define AS3722_CTRL_SEQU1_AC_OK_PWR_ON			BIT(0)
+
 /* GPIO modes */
 #define AS3722_GPIO_MODE_MASK				0x07
 #define AS3722_GPIO_MODE_INPUT				0x00
@@ -391,6 +393,7 @@ struct as3722 {
 	unsigned long irq_flags;
 	bool en_intern_int_pullup;
 	bool en_intern_i2c_pullup;
+	bool en_ac_ok_pwr_on;
 	struct regmap_irq_chip_data *irq_data;
 };
 
-- 
cgit v1.2.3


From 038709071328ff68a936c6b8c33a24a805eea3c5 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu5@cn.bosch.com>
Date: Wed, 13 Jun 2018 16:37:17 +0200
Subject: can: dev: enable multi-queue for SocketCAN devices

The existing SocketCAN implementation provides alloc_candev() to
allocate a CAN device using a single Tx and Rx queue. This can lead to
priority inversion in case the single Tx queue is already full with low
priority messages and a high priority message needs to be sent while the
bus is fully loaded with medium priority messages.

This problem can be solved by using the existing multi-queue support of
the network subsytem. The commit makes it possible to use multi-queue in
the CAN subsystem in the same way it is used in the Ethernet subsystem
by adding an alloc_candev_mqs() call and accompanying macros. With this
support a CAN device can use multi-queue qdisc (e.g. mqprio) to avoid
the aforementioned priority inversion.

The exisiting functionality of alloc_candev() is the same as before.

CAN devices need to have prioritized multiple hardware queues or are
able to abort waiting for arbitration to make sensible use of
multi-queues.

Signed-off-by: Zhu Yi <yi.zhu5@cn.bosch.com>
Signed-off-by: Mark Jonas <mark.jonas@de.bosch.com>
Reviewed-by: Heiko Schocher <hs@denx.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   | 8 +++++---
 include/linux/can/dev.h | 7 ++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 5bb6e8896601..49163570a63a 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -701,7 +701,8 @@ EXPORT_SYMBOL_GPL(alloc_can_err_skb);
 /*
  * Allocate and setup space for the CAN network device
  */
-struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
+struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
+				    unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
 	struct can_priv *priv;
@@ -713,7 +714,8 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
 	else
 		size = sizeof_priv;
 
-	dev = alloc_netdev(size, "can%d", NET_NAME_UNKNOWN, can_setup);
+	dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup,
+			       txqs, rxqs);
 	if (!dev)
 		return NULL;
 
@@ -732,7 +734,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
 
 	return dev;
 }
-EXPORT_SYMBOL_GPL(alloc_candev);
+EXPORT_SYMBOL_GPL(alloc_candev_mqs);
 
 /*
  * Free space of the CAN network device
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 055aaf5ed9af..a83e1f632eb7 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -143,7 +143,12 @@ u8 can_dlc2len(u8 can_dlc);
 /* map the sanitized data length to an appropriate data length code */
 u8 can_len2dlc(u8 len);
 
-struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
+struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
+				    unsigned int txqs, unsigned int rxqs);
+#define alloc_candev(sizeof_priv, echo_skb_max) \
+	alloc_candev_mqs(sizeof_priv, echo_skb_max, 1, 1)
+#define alloc_candev_mq(sizeof_priv, echo_skb_max, count) \
+	alloc_candev_mqs(sizeof_priv, echo_skb_max, count, count)
 void free_candev(struct net_device *dev);
 
 /* a candev safe wrapper around netdev_priv */
-- 
cgit v1.2.3


From 1fb2e3f276ddafee81073d884f599cd2574c31e2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 17 Jul 2018 18:05:36 +0200
Subject: lib/crc: Move polynomial definition to separate header

Allow other drivers and parts of kernel to use the same define for
CRC32 polynomial, instead of duplicating it in many places.  This code
does not bring any functional changes, except moving existing code.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crc32poly.h | 20 ++++++++++++++++++++
 lib/crc32.c               |  1 +
 lib/crc32defs.h           | 14 --------------
 lib/gen_crc32table.c      |  1 +
 4 files changed, 22 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/crc32poly.h

(limited to 'include/linux')

diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h
new file mode 100644
index 000000000000..7ad5aa92d3c7
--- /dev/null
+++ b/include/linux/crc32poly.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CRC32_POLY_H
+#define _LINUX_CRC32_POLY_H
+
+/*
+ * There are multiple 16-bit CRC polynomials in common use, but this is
+ * *the* standard CRC-32 polynomial, first popularized by Ethernet.
+ * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
+ */
+#define CRCPOLY_LE 0xedb88320
+#define CRCPOLY_BE 0x04c11db7
+
+/*
+ * This is the CRC32c polynomial, as outlined by Castagnoli.
+ * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+
+ * x^8+x^6+x^0
+ */
+#define CRC32C_POLY_LE 0x82F63B78
+
+#endif /* _LINUX_CRC32_POLY_H */
diff --git a/lib/crc32.c b/lib/crc32.c
index 2ef20fe84b69..341c54cb4edf 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -27,6 +27,7 @@
 /* see: Documentation/crc32.txt for a description of algorithms */
 
 #include <linux/crc32.h>
+#include <linux/crc32poly.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/sched.h>
diff --git a/lib/crc32defs.h b/lib/crc32defs.h
index cb275a28a750..0c8fb5923e7e 100644
--- a/lib/crc32defs.h
+++ b/lib/crc32defs.h
@@ -1,18 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * There are multiple 16-bit CRC polynomials in common use, but this is
- * *the* standard CRC-32 polynomial, first popularized by Ethernet.
- * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
- */
-#define CRCPOLY_LE 0xedb88320
-#define CRCPOLY_BE 0x04c11db7
-
-/*
- * This is the CRC32c polynomial, as outlined by Castagnoli.
- * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+
- * x^8+x^6+x^0
- */
-#define CRC32C_POLY_LE 0x82F63B78
 
 /* Try to choose an implementation variant via Kconfig */
 #ifdef CONFIG_CRC32_SLICEBY8
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index 8f26660ea10a..34c3bc826f45 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
+#include "../include/linux/crc32poly.h"
 #include "../include/generated/autoconf.h"
 #include "crc32defs.h"
 #include <inttypes.h>
-- 
cgit v1.2.3


From e37f2f93afe594682702439ca34eb8130598cdf2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 17 Jul 2018 18:05:37 +0200
Subject: lib/crc: Use consistent naming for CRC-32 polynomials

Header was defining CRCPOLY_LE/BE and CRC32C_POLY_LE but in fact all of
them are CRC-32 polynomials so use consistent naming.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crc32poly.h |  4 ++--
 lib/crc32.c               | 10 +++++-----
 lib/gen_crc32table.c      |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h
index 7ad5aa92d3c7..62c4b7790a28 100644
--- a/include/linux/crc32poly.h
+++ b/include/linux/crc32poly.h
@@ -7,8 +7,8 @@
  * *the* standard CRC-32 polynomial, first popularized by Ethernet.
  * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
  */
-#define CRCPOLY_LE 0xedb88320
-#define CRCPOLY_BE 0x04c11db7
+#define CRC32_POLY_LE 0xedb88320
+#define CRC32_POLY_BE 0x04c11db7
 
 /*
  * This is the CRC32c polynomial, as outlined by Castagnoli.
diff --git a/lib/crc32.c b/lib/crc32.c
index 341c54cb4edf..a6c9afafc8c8 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -185,7 +185,7 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
 #if CRC_LE_BITS == 1
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE);
+	return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
 }
 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 {
@@ -195,7 +195,7 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_le_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_le, CRCPOLY_LE);
+			(const u32 (*)[256])crc32table_le, CRC32_POLY_LE);
 }
 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 {
@@ -269,7 +269,7 @@ static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
 
 u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
 {
-	return crc32_generic_shift(crc, len, CRCPOLY_LE);
+	return crc32_generic_shift(crc, len, CRC32_POLY_LE);
 }
 
 u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
@@ -331,13 +331,13 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
 #if CRC_LE_BITS == 1
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_be_generic(crc, p, len, NULL, CRCPOLY_BE);
+	return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
 }
 #else
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_be, CRCPOLY_BE);
+			(const u32 (*)[256])crc32table_be, CRC32_POLY_BE);
 }
 #endif
 EXPORT_SYMBOL(crc32_be);
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index 34c3bc826f45..f755b997b967 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -58,7 +58,7 @@ static void crc32init_le_generic(const uint32_t polynomial,
 
 static void crc32init_le(void)
 {
-	crc32init_le_generic(CRCPOLY_LE, crc32table_le);
+	crc32init_le_generic(CRC32_POLY_LE, crc32table_le);
 }
 
 static void crc32cinit_le(void)
@@ -77,7 +77,7 @@ static void crc32init_be(void)
 	crc32table_be[0][0] = 0;
 
 	for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
-		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
+		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32_POLY_BE : 0);
 		for (j = 0; j < i; j++)
 			crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
 	}
-- 
cgit v1.2.3


From f07d141fe9430cdf9f8a65a87c4136bd83b8ab2e Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 23 Jul 2018 23:16:07 +0100
Subject: dma-mapping: Generalise dma_32bit_limit flag

Whilst the notion of an upstream DMA restriction is most commonly seen
in PCI host bridges saddled with a 32-bit native interface, a more
general version of the same issue can exist on complex SoCs where a bus
or point-to-point interconnect link from a device's DMA master interface
to another component along the path to memory (often an IOMMU) may carry
fewer address bits than the interfaces at both ends nominally support.
In order to properly deal with this, the first step is to expand the
dma_32bit_limit flag into an arbitrary mask.

To minimise the impact on existing code, we'll make sure to only
consider this new mask valid if set. That makes sense anyway, since a
mask of zero would represent DMA not being wired up at all, and that
would be better handled by not providing valid ops in the first place.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kernel/pci-dma.c | 2 +-
 include/linux/device.h    | 6 +++---
 kernel/dma/direct.c       | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index ab5d9dd668d2..80f9fe8d27d0 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -175,7 +175,7 @@ rootfs_initcall(pci_iommu_init);
 
 static int via_no_dac_cb(struct pci_dev *pdev, void *data)
 {
-	pdev->dev.dma_32bit_limit = true;
+	pdev->dev.bus_dma_mask = DMA_BIT_MASK(32);
 	return 0;
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..6d3b000be57e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -886,6 +886,8 @@ struct dev_links_info {
  * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
  * 		hardware supports 64-bit addresses for consistent allocations
  * 		such descriptors.
+ * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA
+ *		limit than the device itself supports.
  * @dma_pfn_offset: offset of DMA memory range relatively of RAM
  * @dma_parms:	A low level driver may set these to teach IOMMU code about
  * 		segment limitations.
@@ -912,8 +914,6 @@ struct dev_links_info {
  * @offline:	Set after successful invocation of bus type's .offline().
  * @of_node_reused: Set if the device-tree node is shared with an ancestor
  *              device.
- * @dma_32bit_limit: bridge limited to 32bit DMA even if the device itself
- *		indicates support for a higher limit in the dma_mask field.
  *
  * At the lowest level, every device in a Linux system is represented by an
  * instance of struct device. The device structure contains the information
@@ -967,6 +967,7 @@ struct device {
 					     not all hardware supports
 					     64 bit addresses for consistent
 					     allocations such descriptors. */
+	u64		bus_dma_mask;	/* upstream dma_mask constraint */
 	unsigned long	dma_pfn_offset;
 
 	struct device_dma_parameters *dma_parms;
@@ -1002,7 +1003,6 @@ struct device {
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
-	bool			dma_32bit_limit:1;
 };
 
 static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8be8106270c2..c2860c5a9e96 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -180,10 +180,10 @@ int dma_direct_supported(struct device *dev, u64 mask)
 		return 0;
 #endif
 	/*
-	 * Various PCI/PCIe bridges have broken support for > 32bit DMA even
-	 * if the device itself might support it.
+	 * Upstream PCI/PCIe bridges or SoC interconnects may not carry
+	 * as many DMA address bits as the device itself supports.
 	 */
-	if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
+	if (dev->bus_dma_mask && mask > dev->bus_dma_mask)
 		return 0;
 	return 1;
 }
-- 
cgit v1.2.3


From 9b89bc3857a6c0dfda18ddae2a42c114ecc32753 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 12 May 2018 18:18:12 +0200
Subject: nvme.h: add support for the log specific field

NVMe 1.3 added a new log specific field to the get log page CQ
defintion, add it to our get_log_page SQ structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 80dfedcf0bf7..39f05b0b8c7f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -885,7 +885,7 @@ struct nvme_get_log_page_command {
 	__u64			rsvd2[2];
 	union nvme_data_ptr	dptr;
 	__u8			lid;
-	__u8			rsvd10;
+	__u8			lsp; /* upper 4 bits reserved */
 	__le16			numdl;
 	__le16			numdu;
 	__u16			rsvd11;
-- 
cgit v1.2.3


From 1a37621658fe06b10cf8bac02c32304d2a1c888c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 13 May 2018 18:53:57 +0200
Subject: nvme.h: add ANA definitions

Add various defintions from NVMe 1.3 TP 4004.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 include/linux/nvme.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 39f05b0b8c7f..64c9175723de 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -242,7 +242,12 @@ struct nvme_id_ctrl {
 	__le32			sanicap;
 	__le32			hmminds;
 	__le16			hmmaxd;
-	__u8			rsvd338[174];
+	__u8			rsvd338[4];
+	__u8			anatt;
+	__u8			anacap;
+	__le32			anagrpmax;
+	__le32			nanagrpid;
+	__u8			rsvd352[160];
 	__u8			sqes;
 	__u8			cqes;
 	__le16			maxcmd;
@@ -258,7 +263,8 @@ struct nvme_id_ctrl {
 	__le16			acwu;
 	__u8			rsvd534[2];
 	__le32			sgls;
-	__u8			rsvd540[228];
+	__le32			mnan;
+	__u8			rsvd544[224];
 	char			subnqn[256];
 	__u8			rsvd1024[768];
 	__le32			ioccsz;
@@ -312,7 +318,9 @@ struct nvme_id_ns {
 	__le16			nabspf;
 	__le16			noiob;
 	__u8			nvmcap[16];
-	__u8			rsvd64[40];
+	__u8			rsvd64[28];
+	__le32			anagrpid;
+	__u8			rsvd96[8];
 	__u8			nguid[16];
 	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
@@ -425,6 +433,32 @@ struct nvme_effects_log {
 	__u8   resv[2048];
 };
 
+enum nvme_ana_state {
+	NVME_ANA_OPTIMIZED		= 0x01,
+	NVME_ANA_NONOPTIMIZED		= 0x02,
+	NVME_ANA_INACCESSIBLE		= 0x03,
+	NVME_ANA_PERSISTENT_LOSS	= 0x04,
+	NVME_ANA_CHANGE			= 0x0f,
+};
+
+struct nvme_ana_group_desc {
+	__le32	grpid;
+	__le32	nnsids;
+	__le64	chgcnt;
+	__u8	state;
+	__u8	rsvd17[7];
+	__le32	nsids[];
+};
+
+/* flag for the log specific field of the ANA log */
+#define NVME_ANA_LOG_RGO	(1 << 0)
+
+struct nvme_ana_rsp_hdr {
+	__le64	chgcnt;
+	__le16	ngrps;
+	__le16	rsvd10[3];
+};
+
 enum {
 	NVME_SMART_CRIT_SPARE		= 1 << 0,
 	NVME_SMART_CRIT_TEMPERATURE	= 1 << 1,
@@ -444,11 +478,13 @@ enum {
 enum {
 	NVME_AER_NOTICE_NS_CHANGED	= 0x00,
 	NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
+	NVME_AER_NOTICE_ANA		= 0x03,
 };
 
 enum {
 	NVME_AEN_CFG_NS_ATTR		= 1 << 8,
 	NVME_AEN_CFG_FW_ACT		= 1 << 9,
+	NVME_AEN_CFG_ANA_CHANGE		= 1 << 11,
 };
 
 struct nvme_lba_range_type {
@@ -763,6 +799,7 @@ enum {
 	NVME_LOG_FW_SLOT	= 0x03,
 	NVME_LOG_CHANGED_NS	= 0x04,
 	NVME_LOG_CMD_EFFECTS	= 0x05,
+	NVME_LOG_ANA		= 0x0c,
 	NVME_LOG_DISC		= 0x70,
 	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
@@ -1185,6 +1222,13 @@ enum {
 	NVME_SC_ACCESS_DENIED		= 0x286,
 	NVME_SC_UNWRITTEN_BLOCK		= 0x287,
 
+	/*
+	 * Path-related Errors:
+	 */
+	NVME_SC_ANA_PERSISTENT_LOSS	= 0x301,
+	NVME_SC_ANA_INACCESSIBLE	= 0x302,
+	NVME_SC_ANA_TRANSITION		= 0x303,
+
 	NVME_SC_DNR			= 0x4000,
 };
 
-- 
cgit v1.2.3


From 22a65aa8b1a84ca429c0fc8415dee5681ab36eb3 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Mon, 25 Dec 2017 18:40:52 +0200
Subject: net/mlx5e: Vxlan, check maximum number of UDP ports

The NIC has a limited number of offloaded VXLAN UDP ports (usually 4).
Instead of letting the firmware fail when trying to add more ports than
it can handle, let the driver check it on its own.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h    |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 14 ++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                   |  4 +++-
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c41cfc2a4b70..c4d4db8722f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -657,6 +657,7 @@ enum {
 struct mlx5e_vxlan_db {
 	spinlock_t			lock; /* protect vxlan table */
 	struct radix_tree_root		tree;
+	int				num_ports;
 };
 
 struct mlx5e_l2_rule {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 2f699998d13e..e3af2efe18ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -52,6 +52,11 @@ void mlx5e_vxlan_init(struct mlx5e_priv *priv)
 		mlx5e_vxlan_add_port(priv, 4789);
 }
 
+static inline u8 mlx5e_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
 static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
 	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
@@ -98,6 +103,13 @@ static void mlx5e_vxlan_add_port(struct mlx5e_priv *priv, u16 port)
 		return;
 	}
 
+	if (vxlan_db->num_ports >= mlx5e_vxlan_max_udp_ports(priv->mdev)) {
+		netdev_info(priv->netdev,
+			    "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
+			    port, mlx5e_vxlan_max_udp_ports(priv->mdev));
+		return;
+	}
+
 	if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
 		return;
 
@@ -114,6 +126,7 @@ static void mlx5e_vxlan_add_port(struct mlx5e_priv *priv, u16 port)
 	if (err)
 		goto err_free;
 
+	vxlan_db->num_ports++;
 	return;
 
 err_free:
@@ -163,6 +176,7 @@ out_unlock:
 	if (remove) {
 		mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
 		kfree(vxlan);
+		vxlan_db->num_ports--;
 	}
 	mutex_unlock(&priv->state_lock);
 	kfree(vxlan_work);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 22f54bedfaae..60c2308fe062 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -668,7 +668,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         swp[0x1];
 	u8         swp_csum[0x1];
 	u8         swp_lso[0x1];
-	u8         reserved_at_23[0x1b];
+	u8         reserved_at_23[0xd];
+	u8         max_vxlan_udp_ports[0x8];
+	u8         reserved_at_38[0x6];
 	u8         max_geneve_opt_len[0x1];
 	u8         tunnel_stateless_geneve_rx[0x1];
 
-- 
cgit v1.2.3


From 358aa5ce288aa1085f0f3ef9f315119563fa6541 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 9 May 2018 13:28:00 -0700
Subject: net/mlx5e: Vxlan, move vxlan logic to core driver

Move vxlan logic and objects to mlx5 core dirver.
Since it going to be used from different mlx5 interfaces.
e.g. mlx5e PF NIC netdev and mlx5e E-Switch representors.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   2 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  21 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |   6 +-
 .../net/ethernet/mellanox/mlx5/core/lib/vxlan.c    | 230 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lib/vxlan.h    |  64 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   5 +
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c    | 230 ---------------------
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.h    |  64 ------
 include/linux/mlx5/driver.h                        |   2 +
 10 files changed, 315 insertions(+), 313 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vxlan.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index ae2bdcb1647c..f20fda1ced4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -14,8 +14,8 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
 		fpga/ipsec.o fpga/tls.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
-		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o vxlan.o \
-		en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o
+		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
+		en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o lib/vxlan.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1bd4536b9061..c7ed3d20fd54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -52,7 +52,6 @@
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
-#include "vxlan.h"
 
 struct page_pool;
 
@@ -812,7 +811,6 @@ struct mlx5e_priv {
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
 	struct mlx5e_flow_steering fs;
-	struct mlx5_vxlan          *vxlan;
 
 	struct workqueue_struct    *wq;
 	struct work_struct         update_carrier_work;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ef4b2f0c427c..fde35021a257 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -45,7 +45,7 @@
 #include "en_accel/tls.h"
 #include "accel/ipsec.h"
 #include "accel/tls.h"
-#include "vxlan.h"
+#include "lib/vxlan.h"
 #include "en/port.h"
 #include "en/xdp.h"
 
@@ -2974,7 +2974,7 @@ int mlx5e_open(struct net_device *netdev)
 		mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
 	mutex_unlock(&priv->state_lock);
 
-	if (mlx5_vxlan_allowed(priv->vxlan))
+	if (mlx5_vxlan_allowed(priv->mdev->vxlan))
 		udp_tunnel_get_rx_info(netdev);
 
 	return err;
@@ -3983,7 +3983,7 @@ static void mlx5e_vxlan_add_work(struct work_struct *work)
 	u16 port = vxlan_work->port;
 
 	mutex_lock(&priv->state_lock);
-	mlx5_vxlan_add_port(priv->vxlan, port);
+	mlx5_vxlan_add_port(priv->mdev->vxlan, port);
 	mutex_unlock(&priv->state_lock);
 
 	kfree(vxlan_work);
@@ -3997,7 +3997,7 @@ static void mlx5e_vxlan_del_work(struct work_struct *work)
 	u16 port = vxlan_work->port;
 
 	mutex_lock(&priv->state_lock);
-	mlx5_vxlan_del_port(priv->vxlan, port);
+	mlx5_vxlan_del_port(priv->mdev->vxlan, port);
 	mutex_unlock(&priv->state_lock);
 	kfree(vxlan_work);
 }
@@ -4028,7 +4028,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (!mlx5_vxlan_allowed(priv->vxlan))
+	if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
 		return;
 
 	mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
@@ -4042,7 +4042,7 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev,
 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (!mlx5_vxlan_allowed(priv->vxlan))
+	if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
 		return;
 
 	mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
@@ -4076,7 +4076,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 		port = be16_to_cpu(udph->dest);
 
 		/* Verify if UDP port is being offloaded by HW */
-		if (mlx5_vxlan_lookup_port(priv->vxlan, port))
+		if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
 			return features;
 	}
 
@@ -4648,7 +4648,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
-	if (mlx5_vxlan_allowed(priv->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+	if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
 		netdev->hw_enc_features |= NETIF_F_IP_CSUM;
 		netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
 		netdev->hw_enc_features |= NETIF_F_TSO;
@@ -4656,7 +4656,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 		netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
 	}
 
-	if (mlx5_vxlan_allowed(priv->vxlan)) {
+	if (mlx5_vxlan_allowed(mdev->vxlan)) {
 		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
 					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
 		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
@@ -4758,8 +4758,6 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
 
-	priv->vxlan = mlx5_vxlan_create(mdev);
-
 	mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
 	err = mlx5e_ipsec_init(priv);
 	if (err)
@@ -4773,7 +4771,6 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
-	mlx5_vxlan_destroy(priv->vxlan);
 	mlx5e_tls_cleanup(priv);
 	mlx5e_ipsec_cleanup(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1b4931b62094..288a57f76e84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -50,7 +50,7 @@
 #include "en_rep.h"
 #include "en_tc.h"
 #include "eswitch.h"
-#include "vxlan.h"
+#include "lib/vxlan.h"
 #include "fs_core.h"
 #include "en/port.h"
 
@@ -1133,7 +1133,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
 			goto vxlan_match_offload_err;
 
-		if (mlx5_vxlan_lookup_port(up_priv->vxlan, be16_to_cpu(key->dst)) &&
+		if (mlx5_vxlan_lookup_port(up_priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
 		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
 			parse_vxlan_attr(spec, f);
 		else {
@@ -2557,7 +2557,7 @@ vxlan_encap_offload_err:
 		return -EOPNOTSUPP;
 	}
 
-	if (mlx5_vxlan_lookup_port(up_priv->vxlan, be16_to_cpu(key->tp_dst)) &&
+	if (mlx5_vxlan_lookup_port(up_priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
 		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
 	} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
new file mode 100644
index 000000000000..9a8fd762167b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "vxlan.h"
+
+struct mlx5_vxlan {
+	struct mlx5_core_dev		*mdev;
+	spinlock_t			lock; /* protect vxlan table */
+	/* max_num_ports is usuallly 4, 16 buckets is more than enough */
+	DECLARE_HASHTABLE(htable, 4);
+	int				num_ports;
+	struct mutex                    sync_lock; /* sync add/del port HW operations */
+};
+
+struct mlx5_vxlan_port {
+	struct hlist_node hlist;
+	atomic_t refcount;
+	u16 udp_port;
+};
+
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
+static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
+
+	MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
+		 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
+	MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
+
+	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
+		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+	MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static struct mlx5_vxlan_port*
+mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+
+	hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
+		if (vxlanp->udp_port == port)
+			return vxlanp;
+	}
+
+	return NULL;
+}
+
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+
+	if (!mlx5_vxlan_allowed(vxlan))
+		return NULL;
+
+	spin_lock_bh(&vxlan->lock);
+	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+	spin_unlock_bh(&vxlan->lock);
+
+	return vxlanp;
+}
+
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	int ret = -ENOSPC;
+
+	vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
+	if (vxlanp) {
+		atomic_inc(&vxlanp->refcount);
+		return 0;
+	}
+
+	mutex_lock(&vxlan->sync_lock);
+	if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
+		mlx5_core_info(vxlan->mdev,
+			       "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
+			       port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
+		ret = -ENOSPC;
+		goto unlock;
+	}
+
+	ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+	if (ret)
+		goto unlock;
+
+	vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
+	if (!vxlanp) {
+		ret = -ENOMEM;
+		goto err_delete_port;
+	}
+
+	vxlanp->udp_port = port;
+	atomic_set(&vxlanp->refcount, 1);
+
+	spin_lock_bh(&vxlan->lock);
+	hash_add(vxlan->htable, &vxlanp->hlist, port);
+	spin_unlock_bh(&vxlan->lock);
+
+	vxlan->num_ports++;
+	mutex_unlock(&vxlan->sync_lock);
+	return 0;
+
+err_delete_port:
+	mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+
+unlock:
+	mutex_unlock(&vxlan->sync_lock);
+	return ret;
+}
+
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	bool remove = false;
+	int ret = 0;
+
+	mutex_lock(&vxlan->sync_lock);
+
+	spin_lock_bh(&vxlan->lock);
+	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+	if (!vxlanp) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	if (atomic_dec_and_test(&vxlanp->refcount)) {
+		hash_del(&vxlanp->hlist);
+		remove = true;
+	}
+
+out_unlock:
+	spin_unlock_bh(&vxlan->lock);
+
+	if (remove) {
+		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+		kfree(vxlanp);
+		vxlan->num_ports--;
+	}
+
+	mutex_unlock(&vxlan->sync_lock);
+
+	return ret;
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_vxlan *vxlan;
+
+	if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
+		return ERR_PTR(-ENOTSUPP);
+
+	vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
+	if (!vxlan)
+		return ERR_PTR(-ENOMEM);
+
+	vxlan->mdev = mdev;
+	mutex_init(&vxlan->sync_lock);
+	spin_lock_init(&vxlan->lock);
+	hash_init(vxlan->htable);
+
+	/* Hardware adds 4789 by default */
+	mlx5_vxlan_add_port(vxlan, 4789);
+
+	return vxlan;
+}
+
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	struct hlist_node *tmp;
+	int bkt;
+
+	if (!mlx5_vxlan_allowed(vxlan))
+		return;
+
+	/* Lockless since we are the only hash table consumers*/
+	hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
+		hash_del(&vxlanp->hlist);
+		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
+		kfree(vxlanp);
+	}
+
+	kfree(vxlan);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
new file mode 100644
index 000000000000..fd874a30c4d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_VXLAN_H__
+#define __MLX5_VXLAN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vxlan;
+struct mlx5_vxlan_port;
+
+#ifdef CONFIG_MLX5_CORE_EN
+
+static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
+{
+	/* not allowed reason is encoded in vxlan pointer as error,
+	 * on mlx5_vxlan_create
+	 */
+	return !IS_ERR_OR_NULL(vxlan);
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+
+#else
+
+static inline struct mlx5_vxlan*
+mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-ENOTSUPP); }
+static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
+
+#endif
+
+#endif /* __MLX5_VXLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6ddbb70e95de..03b9c6733eed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -62,6 +62,7 @@
 #include "accel/ipsec.h"
 #include "accel/tls.h"
 #include "lib/clock.h"
+#include "lib/vxlan.h"
 #include "diag/fw_tracer.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -961,6 +962,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
 	mlx5_init_clock(dev);
 
+	dev->vxlan = mlx5_vxlan_create(dev);
+
 	err = mlx5_init_rl_table(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -1004,6 +1007,7 @@ err_mpfs_cleanup:
 err_rl_cleanup:
 	mlx5_cleanup_rl_table(dev);
 err_tables_cleanup:
+	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
@@ -1024,6 +1028,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 	mlx5_mpfs_cleanup(dev);
 	mlx5_cleanup_rl_table(dev);
+	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_clock(dev);
 	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cleanup_mkey_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
deleted file mode 100644
index 9a8fd762167b..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include "mlx5_core.h"
-#include "vxlan.h"
-
-struct mlx5_vxlan {
-	struct mlx5_core_dev		*mdev;
-	spinlock_t			lock; /* protect vxlan table */
-	/* max_num_ports is usuallly 4, 16 buckets is more than enough */
-	DECLARE_HASHTABLE(htable, 4);
-	int				num_ports;
-	struct mutex                    sync_lock; /* sync add/del port HW operations */
-};
-
-struct mlx5_vxlan_port {
-	struct hlist_node hlist;
-	atomic_t refcount;
-	u16 udp_port;
-};
-
-static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
-{
-	return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
-}
-
-static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
-{
-	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
-
-	MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
-		 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
-	MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
-{
-	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
-
-	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
-		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
-	MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static struct mlx5_vxlan_port*
-mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-
-	hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
-		if (vxlanp->udp_port == port)
-			return vxlanp;
-	}
-
-	return NULL;
-}
-
-struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-
-	if (!mlx5_vxlan_allowed(vxlan))
-		return NULL;
-
-	spin_lock_bh(&vxlan->lock);
-	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
-	spin_unlock_bh(&vxlan->lock);
-
-	return vxlanp;
-}
-
-int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-	int ret = -ENOSPC;
-
-	vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
-	if (vxlanp) {
-		atomic_inc(&vxlanp->refcount);
-		return 0;
-	}
-
-	mutex_lock(&vxlan->sync_lock);
-	if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
-		mlx5_core_info(vxlan->mdev,
-			       "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
-			       port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
-		ret = -ENOSPC;
-		goto unlock;
-	}
-
-	ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
-	if (ret)
-		goto unlock;
-
-	vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
-	if (!vxlanp) {
-		ret = -ENOMEM;
-		goto err_delete_port;
-	}
-
-	vxlanp->udp_port = port;
-	atomic_set(&vxlanp->refcount, 1);
-
-	spin_lock_bh(&vxlan->lock);
-	hash_add(vxlan->htable, &vxlanp->hlist, port);
-	spin_unlock_bh(&vxlan->lock);
-
-	vxlan->num_ports++;
-	mutex_unlock(&vxlan->sync_lock);
-	return 0;
-
-err_delete_port:
-	mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
-
-unlock:
-	mutex_unlock(&vxlan->sync_lock);
-	return ret;
-}
-
-int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-	bool remove = false;
-	int ret = 0;
-
-	mutex_lock(&vxlan->sync_lock);
-
-	spin_lock_bh(&vxlan->lock);
-	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
-	if (!vxlanp) {
-		ret = -ENOENT;
-		goto out_unlock;
-	}
-
-	if (atomic_dec_and_test(&vxlanp->refcount)) {
-		hash_del(&vxlanp->hlist);
-		remove = true;
-	}
-
-out_unlock:
-	spin_unlock_bh(&vxlan->lock);
-
-	if (remove) {
-		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
-		kfree(vxlanp);
-		vxlan->num_ports--;
-	}
-
-	mutex_unlock(&vxlan->sync_lock);
-
-	return ret;
-}
-
-struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
-{
-	struct mlx5_vxlan *vxlan;
-
-	if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
-		return ERR_PTR(-ENOTSUPP);
-
-	vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
-	if (!vxlan)
-		return ERR_PTR(-ENOMEM);
-
-	vxlan->mdev = mdev;
-	mutex_init(&vxlan->sync_lock);
-	spin_lock_init(&vxlan->lock);
-	hash_init(vxlan->htable);
-
-	/* Hardware adds 4789 by default */
-	mlx5_vxlan_add_port(vxlan, 4789);
-
-	return vxlan;
-}
-
-void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
-{
-	struct mlx5_vxlan_port *vxlanp;
-	struct hlist_node *tmp;
-	int bkt;
-
-	if (!mlx5_vxlan_allowed(vxlan))
-		return;
-
-	/* Lockless since we are the only hash table consumers*/
-	hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
-		hash_del(&vxlanp->hlist);
-		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
-		kfree(vxlanp);
-	}
-
-	kfree(vxlan);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
deleted file mode 100644
index fd874a30c4d0..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __MLX5_VXLAN_H__
-#define __MLX5_VXLAN_H__
-
-#include <linux/mlx5/driver.h>
-
-struct mlx5_vxlan;
-struct mlx5_vxlan_port;
-
-#ifdef CONFIG_MLX5_CORE_EN
-
-static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
-{
-	/* not allowed reason is encoded in vxlan pointer as error,
-	 * on mlx5_vxlan_create
-	 */
-	return !IS_ERR_OR_NULL(vxlan);
-}
-
-struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
-void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
-int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
-int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
-struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
-
-#else
-
-static inline struct mlx5_vxlan*
-mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-ENOTSUPP); }
-static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
-
-#endif
-
-#endif /* __MLX5_VXLAN_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index fd0aaa5568fe..54f385cc8811 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -818,6 +818,7 @@ struct mlx5_clock {
 };
 
 struct mlx5_fw_tracer;
+struct mlx5_vxlan;
 
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
@@ -850,6 +851,7 @@ struct mlx5_core_dev {
 	atomic_t		num_qps;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
+	struct mlx5_vxlan       *vxlan;
 	struct {
 		struct mlx5_rsvd_gids	reserved_gids;
 		u32			roce_en;
-- 
cgit v1.2.3


From 627448e85c766587f6fdde1ea3886d6615081c77 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 28 Jun 2018 18:13:33 +0300
Subject: tpm: separate cmd_ready/go_idle from runtime_pm

Fix tpm ptt initialization error:
tpm tpm0: A TPM error (378) occurred get tpm pcr allocation.

We cannot use go_idle cmd_ready commands via runtime_pm handles
as with the introduction of localities this is no longer an optional
feature, while runtime pm can be not enabled.
Though cmd_ready/go_idle provides a power saving, it's also a part of
TPM2 protocol and should be called explicitly.
This patch exposes cmd_read/go_idle via tpm class ops and removes
runtime pm support as it is not used by any driver.

When calling from nested context always use both flags:
TPM_TRANSMIT_UNLOCKED and TPM_TRANSMIT_RAW. Both are needed to resolve
tpm spaces and locality request recursive calls to tpm_transmit().
TPM_TRANSMIT_RAW should never be used standalone as it will fail
on double locking. While TPM_TRANSMIT_UNLOCKED standalone should be
called from non-recursive locked contexts.

New wrappers are added tpm_cmd_ready() and tpm_go_idle() to
streamline tpm_try_transmit code.

tpm_crb no longer needs own power saving functions and can drop using
tpm_pm_suspend/resume.

This patch cannot be really separated from the locality fix.
Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality)

Cc: stable@vger.kernel.org
Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality)
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-interface.c |  51 ++++++++++++++++----
 drivers/char/tpm/tpm.h           |  12 ++++-
 drivers/char/tpm/tpm2-space.c    |  16 ++++---
 drivers/char/tpm/tpm_crb.c       | 101 +++++++++++----------------------------
 include/linux/tpm.h              |   2 +
 5 files changed, 90 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index e32f6e85dc6d..31b86e027f9d 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -29,7 +29,6 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/freezer.h>
-#include <linux/pm_runtime.h>
 #include <linux/tpm_eventlog.h>
 
 #include "tpm.h"
@@ -369,10 +368,13 @@ err_len:
 	return -EINVAL;
 }
 
-static int tpm_request_locality(struct tpm_chip *chip)
+static int tpm_request_locality(struct tpm_chip *chip, unsigned int flags)
 {
 	int rc;
 
+	if (flags & TPM_TRANSMIT_RAW)
+		return 0;
+
 	if (!chip->ops->request_locality)
 		return 0;
 
@@ -385,10 +387,13 @@ static int tpm_request_locality(struct tpm_chip *chip)
 	return 0;
 }
 
-static void tpm_relinquish_locality(struct tpm_chip *chip)
+static void tpm_relinquish_locality(struct tpm_chip *chip, unsigned int flags)
 {
 	int rc;
 
+	if (flags & TPM_TRANSMIT_RAW)
+		return;
+
 	if (!chip->ops->relinquish_locality)
 		return;
 
@@ -399,6 +404,28 @@ static void tpm_relinquish_locality(struct tpm_chip *chip)
 	chip->locality = -1;
 }
 
+static int tpm_cmd_ready(struct tpm_chip *chip, unsigned int flags)
+{
+	if (flags & TPM_TRANSMIT_RAW)
+		return 0;
+
+	if (!chip->ops->cmd_ready)
+		return 0;
+
+	return chip->ops->cmd_ready(chip);
+}
+
+static int tpm_go_idle(struct tpm_chip *chip, unsigned int flags)
+{
+	if (flags & TPM_TRANSMIT_RAW)
+		return 0;
+
+	if (!chip->ops->go_idle)
+		return 0;
+
+	return chip->ops->go_idle(chip);
+}
+
 static ssize_t tpm_try_transmit(struct tpm_chip *chip,
 				struct tpm_space *space,
 				u8 *buf, size_t bufsiz,
@@ -449,14 +476,15 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip,
 	/* Store the decision as chip->locality will be changed. */
 	need_locality = chip->locality == -1;
 
-	if (!(flags & TPM_TRANSMIT_RAW) && need_locality) {
-		rc = tpm_request_locality(chip);
+	if (need_locality) {
+		rc = tpm_request_locality(chip, flags);
 		if (rc < 0)
 			goto out_no_locality;
 	}
 
-	if (chip->dev.parent)
-		pm_runtime_get_sync(chip->dev.parent);
+	rc = tpm_cmd_ready(chip, flags);
+	if (rc)
+		goto out;
 
 	rc = tpm2_prepare_space(chip, space, ordinal, buf);
 	if (rc)
@@ -516,13 +544,16 @@ out_recv:
 	}
 
 	rc = tpm2_commit_space(chip, space, ordinal, buf, &len);
+	if (rc)
+		dev_err(&chip->dev, "tpm2_commit_space: error %d\n", rc);
 
 out:
-	if (chip->dev.parent)
-		pm_runtime_put_sync(chip->dev.parent);
+	rc = tpm_go_idle(chip, flags);
+	if (rc)
+		goto out;
 
 	if (need_locality)
-		tpm_relinquish_locality(chip);
+		tpm_relinquish_locality(chip, flags);
 
 out_no_locality:
 	if (chip->ops->clk_enable != NULL)
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 9824cccb2c76..17833ef63f6c 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -512,9 +512,17 @@ extern const struct file_operations tpm_fops;
 extern const struct file_operations tpmrm_fops;
 extern struct idr dev_nums_idr;
 
+/**
+ * enum tpm_transmit_flags
+ *
+ * @TPM_TRANSMIT_UNLOCKED: used to lock sequence of tpm_transmit calls.
+ * @TPM_TRANSMIT_RAW: prevent recursive calls into setup steps
+ *                    (go idle, locality,..). Always use with UNLOCKED
+ *                    as it will fail on double locking.
+ */
 enum tpm_transmit_flags {
-	TPM_TRANSMIT_UNLOCKED	= BIT(0),
-	TPM_TRANSMIT_RAW	= BIT(1),
+	TPM_TRANSMIT_UNLOCKED = BIT(0),
+	TPM_TRANSMIT_RAW      = BIT(1),
 };
 
 ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
index 6122d3276f72..11c85ed8c113 100644
--- a/drivers/char/tpm/tpm2-space.c
+++ b/drivers/char/tpm/tpm2-space.c
@@ -39,7 +39,8 @@ static void tpm2_flush_sessions(struct tpm_chip *chip, struct tpm_space *space)
 	for (i = 0; i < ARRAY_SIZE(space->session_tbl); i++) {
 		if (space->session_tbl[i])
 			tpm2_flush_context_cmd(chip, space->session_tbl[i],
-					       TPM_TRANSMIT_UNLOCKED);
+					       TPM_TRANSMIT_UNLOCKED |
+					       TPM_TRANSMIT_RAW);
 	}
 }
 
@@ -84,7 +85,7 @@ static int tpm2_load_context(struct tpm_chip *chip, u8 *buf,
 	tpm_buf_append(&tbuf, &buf[*offset], body_size);
 
 	rc = tpm_transmit_cmd(chip, NULL, tbuf.data, PAGE_SIZE, 4,
-			      TPM_TRANSMIT_UNLOCKED, NULL);
+			      TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW, NULL);
 	if (rc < 0) {
 		dev_warn(&chip->dev, "%s: failed with a system error %d\n",
 			 __func__, rc);
@@ -133,7 +134,7 @@ static int tpm2_save_context(struct tpm_chip *chip, u32 handle, u8 *buf,
 	tpm_buf_append_u32(&tbuf, handle);
 
 	rc = tpm_transmit_cmd(chip, NULL, tbuf.data, PAGE_SIZE, 0,
-			      TPM_TRANSMIT_UNLOCKED, NULL);
+			      TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW, NULL);
 	if (rc < 0) {
 		dev_warn(&chip->dev, "%s: failed with a system error %d\n",
 			 __func__, rc);
@@ -170,7 +171,8 @@ static void tpm2_flush_space(struct tpm_chip *chip)
 	for (i = 0; i < ARRAY_SIZE(space->context_tbl); i++)
 		if (space->context_tbl[i] && ~space->context_tbl[i])
 			tpm2_flush_context_cmd(chip, space->context_tbl[i],
-					       TPM_TRANSMIT_UNLOCKED);
+					       TPM_TRANSMIT_UNLOCKED |
+					       TPM_TRANSMIT_RAW);
 
 	tpm2_flush_sessions(chip, space);
 }
@@ -377,7 +379,8 @@ static int tpm2_map_response_header(struct tpm_chip *chip, u32 cc, u8 *rsp,
 
 	return 0;
 out_no_slots:
-	tpm2_flush_context_cmd(chip, phandle, TPM_TRANSMIT_UNLOCKED);
+	tpm2_flush_context_cmd(chip, phandle,
+			       TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW);
 	dev_warn(&chip->dev, "%s: out of slots for 0x%08X\n", __func__,
 		 phandle);
 	return -ENOMEM;
@@ -465,7 +468,8 @@ static int tpm2_save_space(struct tpm_chip *chip)
 			return rc;
 
 		tpm2_flush_context_cmd(chip, space->context_tbl[i],
-				       TPM_TRANSMIT_UNLOCKED);
+				       TPM_TRANSMIT_UNLOCKED |
+				       TPM_TRANSMIT_RAW);
 		space->context_tbl[i] = ~0;
 	}
 
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index 34fbc6cb097b..36952ef98f90 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -132,7 +132,7 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
 }
 
 /**
- * crb_go_idle - request tpm crb device to go the idle state
+ * __crb_go_idle - request tpm crb device to go the idle state
  *
  * @dev:  crb device
  * @priv: crb private data
@@ -147,7 +147,7 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
  *
  * Return: 0 always
  */
-static int crb_go_idle(struct device *dev, struct crb_priv *priv)
+static int __crb_go_idle(struct device *dev, struct crb_priv *priv)
 {
 	if ((priv->sm == ACPI_TPM2_START_METHOD) ||
 	    (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
@@ -163,11 +163,20 @@ static int crb_go_idle(struct device *dev, struct crb_priv *priv)
 		dev_warn(dev, "goIdle timed out\n");
 		return -ETIME;
 	}
+
 	return 0;
 }
 
+static int crb_go_idle(struct tpm_chip *chip)
+{
+	struct device *dev = &chip->dev;
+	struct crb_priv *priv = dev_get_drvdata(dev);
+
+	return __crb_go_idle(dev, priv);
+}
+
 /**
- * crb_cmd_ready - request tpm crb device to enter ready state
+ * __crb_cmd_ready - request tpm crb device to enter ready state
  *
  * @dev:  crb device
  * @priv: crb private data
@@ -181,7 +190,7 @@ static int crb_go_idle(struct device *dev, struct crb_priv *priv)
  *
  * Return: 0 on success -ETIME on timeout;
  */
-static int crb_cmd_ready(struct device *dev, struct crb_priv *priv)
+static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv)
 {
 	if ((priv->sm == ACPI_TPM2_START_METHOD) ||
 	    (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
@@ -200,6 +209,14 @@ static int crb_cmd_ready(struct device *dev, struct crb_priv *priv)
 	return 0;
 }
 
+static int crb_cmd_ready(struct tpm_chip *chip)
+{
+	struct device *dev = &chip->dev;
+	struct crb_priv *priv = dev_get_drvdata(dev);
+
+	return __crb_cmd_ready(dev, priv);
+}
+
 static int __crb_request_locality(struct device *dev,
 				  struct crb_priv *priv, int loc)
 {
@@ -401,6 +418,8 @@ static const struct tpm_class_ops tpm_crb = {
 	.send = crb_send,
 	.cancel = crb_cancel,
 	.req_canceled = crb_req_canceled,
+	.go_idle  = crb_go_idle,
+	.cmd_ready = crb_cmd_ready,
 	.request_locality = crb_request_locality,
 	.relinquish_locality = crb_relinquish_locality,
 	.req_complete_mask = CRB_DRV_STS_COMPLETE,
@@ -520,7 +539,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 	 * PTT HW bug w/a: wake up the device to access
 	 * possibly not retained registers.
 	 */
-	ret = crb_cmd_ready(dev, priv);
+	ret = __crb_cmd_ready(dev, priv);
 	if (ret)
 		goto out_relinquish_locality;
 
@@ -565,7 +584,7 @@ out:
 	if (!ret)
 		priv->cmd_size = cmd_size;
 
-	crb_go_idle(dev, priv);
+	__crb_go_idle(dev, priv);
 
 out_relinquish_locality:
 
@@ -628,32 +647,7 @@ static int crb_acpi_add(struct acpi_device *device)
 	chip->acpi_dev_handle = device->handle;
 	chip->flags = TPM_CHIP_FLAG_TPM2;
 
-	rc = __crb_request_locality(dev, priv, 0);
-	if (rc)
-		return rc;
-
-	rc  = crb_cmd_ready(dev, priv);
-	if (rc)
-		goto out;
-
-	pm_runtime_get_noresume(dev);
-	pm_runtime_set_active(dev);
-	pm_runtime_enable(dev);
-
-	rc = tpm_chip_register(chip);
-	if (rc) {
-		crb_go_idle(dev, priv);
-		pm_runtime_put_noidle(dev);
-		pm_runtime_disable(dev);
-		goto out;
-	}
-
-	pm_runtime_put_sync(dev);
-
-out:
-	__crb_relinquish_locality(dev, priv, 0);
-
-	return rc;
+	return tpm_chip_register(chip);
 }
 
 static int crb_acpi_remove(struct acpi_device *device)
@@ -663,52 +657,11 @@ static int crb_acpi_remove(struct acpi_device *device)
 
 	tpm_chip_unregister(chip);
 
-	pm_runtime_disable(dev);
-
 	return 0;
 }
 
-static int __maybe_unused crb_pm_runtime_suspend(struct device *dev)
-{
-	struct tpm_chip *chip = dev_get_drvdata(dev);
-	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
-
-	return crb_go_idle(dev, priv);
-}
-
-static int __maybe_unused crb_pm_runtime_resume(struct device *dev)
-{
-	struct tpm_chip *chip = dev_get_drvdata(dev);
-	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
-
-	return crb_cmd_ready(dev, priv);
-}
-
-static int __maybe_unused crb_pm_suspend(struct device *dev)
-{
-	int ret;
-
-	ret = tpm_pm_suspend(dev);
-	if (ret)
-		return ret;
-
-	return crb_pm_runtime_suspend(dev);
-}
-
-static int __maybe_unused crb_pm_resume(struct device *dev)
-{
-	int ret;
-
-	ret = crb_pm_runtime_resume(dev);
-	if (ret)
-		return ret;
-
-	return tpm_pm_resume(dev);
-}
-
 static const struct dev_pm_ops crb_pm = {
-	SET_SYSTEM_SLEEP_PM_OPS(crb_pm_suspend, crb_pm_resume)
-	SET_RUNTIME_PM_OPS(crb_pm_runtime_suspend, crb_pm_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tpm_pm_suspend, tpm_pm_resume)
 };
 
 static const struct acpi_device_id crb_device_ids[] = {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 06639fb6ab85..8eb5e5ebe136 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -43,6 +43,8 @@ struct tpm_class_ops {
 	u8 (*status) (struct tpm_chip *chip);
 	bool (*update_timeouts)(struct tpm_chip *chip,
 				unsigned long *timeout_cap);
+	int (*go_idle)(struct tpm_chip *chip);
+	int (*cmd_ready)(struct tpm_chip *chip);
 	int (*request_locality)(struct tpm_chip *chip, int loc);
 	int (*relinquish_locality)(struct tpm_chip *chip, int loc);
 	void (*clk_enable)(struct tpm_chip *chip, bool value);
-- 
cgit v1.2.3


From aaae81536351f831fe6719e5b4e6afbadc5e5f85 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.vnet.ibm.com>
Date: Tue, 26 Jun 2018 15:09:30 -0400
Subject: tpm: Implement tpm_default_chip() to find a TPM chip

Implement tpm_default_chip() to find the first TPM chip and return it to
the caller while increasing the reference count on its device. This
function can be used by other subsystems, such as IMA, to find the system's
default TPM chip and use it for all subsequent TPM operations.

Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-chip.c | 27 +++++++++++++++++++++++++++
 include/linux/tpm.h         |  5 +++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 242b716aed5e..f551061262c9 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -80,6 +80,33 @@ void tpm_put_ops(struct tpm_chip *chip)
 }
 EXPORT_SYMBOL_GPL(tpm_put_ops);
 
+/**
+ * tpm_default_chip() - find a TPM chip and get a reference to it
+ */
+struct tpm_chip *tpm_default_chip(void)
+{
+	struct tpm_chip *chip, *res = NULL;
+	int chip_num = 0;
+	int chip_prev;
+
+	mutex_lock(&idr_lock);
+
+	do {
+		chip_prev = chip_num;
+		chip = idr_get_next(&dev_nums_idr, &chip_num);
+		if (chip) {
+			get_device(&chip->dev);
+			res = chip;
+			break;
+		}
+	} while (chip_prev != chip_num);
+
+	mutex_unlock(&idr_lock);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(tpm_default_chip);
+
 /**
  * tpm_find_get_ops() - find and reserve a TPM chip
  * @chip:	a &struct tpm_chip instance, %NULL for the default chip
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 8eb5e5ebe136..4609b94142d4 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -63,6 +63,7 @@ extern int tpm_seal_trusted(struct tpm_chip *chip,
 extern int tpm_unseal_trusted(struct tpm_chip *chip,
 			      struct trusted_key_payload *payload,
 			      struct trusted_key_options *options);
+extern struct tpm_chip *tpm_default_chip(void);
 #else
 static inline int tpm_is_tpm2(struct tpm_chip *chip)
 {
@@ -98,5 +99,9 @@ static inline int tpm_unseal_trusted(struct tpm_chip *chip,
 {
 	return -ENODEV;
 }
+static inline struct tpm_chip *tpm_default_chip(void)
+{
+	return NULL;
+}
 #endif
 #endif
-- 
cgit v1.2.3


From e15d54d5009688ccb2a5312f3b70d631615329c9 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Wed, 27 Jun 2018 14:46:21 +0800
Subject: f2fs: Allocate and stat mem used by free nid bitmap more accurately

This patch used f2fs_bitmap_size macro to calculate mem used by
free nid bitmap, and stat used mem including aligned part.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c         | 3 ++-
 fs/f2fs/node.c          | 2 +-
 include/linux/f2fs_fs.h | 5 -----
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 2d65e77ae5cf..214a968962a1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
 	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
-	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+	si->base_mem += NM_I(sbi)->nat_blocks *
+				f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
 	si->base_mem += NM_I(sbi)->nat_blocks / 8;
 	si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8e58990b9120..142b34130749 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2797,7 +2797,7 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 
 	for (i = 0; i < nm_i->nat_blocks; i++) {
 		nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
-				NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
+			f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
 		if (!nm_i->free_nid_bitmap)
 			return -ENOMEM;
 	}
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index aa5db8b5521a..f70f8ac9c4f4 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -304,11 +304,6 @@ struct f2fs_node {
  * For NAT entries
  */
 #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
-#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
-#define NAT_ENTRY_BITMAP_SIZE_ALIGNED				\
-	((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) /		\
-	BITS_PER_LONG * BITS_PER_LONG)
-
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
-- 
cgit v1.2.3


From 5cbf777cfdf6e5a7b7149006e4881a255da78fdd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 27 Jul 2018 16:37:28 +0800
Subject: route: add support for directed broadcast forwarding

This patch implements the feature described in rfc1812#section-5.3.5.2
and rfc2644. It allows the router to forward directed broadcast when
sysctl bc_forwarding is enabled.

Note that this feature could be done by iptables -j TEE, but it would
cause some problems:
  - target TEE's gateway param has to be set with a specific address,
    and it's not flexible especially when the route wants forward all
    directed broadcasts.
  - this duplicates the directed broadcasts so this may cause side
    effects to applications.

Besides, to keep consistent with other os router like BSD, it's also
necessary to implement it in the route rx path.

Note that route cache needs to be flushed when bc_forwarding is
changed.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h   |  1 +
 include/uapi/linux/ip.h      |  1 +
 include/uapi/linux/netconf.h |  1 +
 net/ipv4/devinet.c           | 11 +++++++++++
 net/ipv4/route.c             |  6 +++++-
 5 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 27650f1bff3d..c759d1cbcedd 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -93,6 +93,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
+#define IN_DEV_BFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), BC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
 #define IN_DEV_SRC_VMARK(in_dev)    	IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index b24a742beae5..e42d13b55cf3 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -168,6 +168,7 @@ enum
 	IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	IPV4_DEVCONF_DROP_GRATUITOUS_ARP,
+	IPV4_DEVCONF_BC_FORWARDING,
 	__IPV4_DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h
index c84fcdfca862..fac4edd55379 100644
--- a/include/uapi/linux/netconf.h
+++ b/include/uapi/linux/netconf.h
@@ -18,6 +18,7 @@ enum {
 	NETCONFA_PROXY_NEIGH,
 	NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
 	NETCONFA_INPUT,
+	NETCONFA_BC_FORWARDING,
 	__NETCONFA_MAX
 };
 #define NETCONFA_MAX	(__NETCONFA_MAX - 1)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d7585ab1a77a..ea4bd8a52422 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1827,6 +1827,8 @@ static int inet_netconf_msgsize_devconf(int type)
 		size += nla_total_size(4);
 	if (all || type == NETCONFA_MC_FORWARDING)
 		size += nla_total_size(4);
+	if (all || type == NETCONFA_BC_FORWARDING)
+		size += nla_total_size(4);
 	if (all || type == NETCONFA_PROXY_NEIGH)
 		size += nla_total_size(4);
 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
@@ -1873,6 +1875,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
 		goto nla_put_failure;
+	if ((all || type == NETCONFA_BC_FORWARDING) &&
+	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
+			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+		goto nla_put_failure;
 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
@@ -2143,6 +2149,10 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
 			if ((new_value == 0) && (old_value != 0))
 				rt_cache_flush(net);
 
+		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
+		    new_value != old_value)
+			rt_cache_flush(net);
+
 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
 		    new_value != old_value) {
 			ifindex = devinet_conf_ifindex(net, cnf);
@@ -2259,6 +2269,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
 					     devinet_sysctl_forward),
 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
 
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1df6e97106d7..b678466da451 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1996,8 +1996,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto no_route;
 	}
 
-	if (res->type == RTN_BROADCAST)
+	if (res->type == RTN_BROADCAST) {
+		if (IN_DEV_BFORWARD(in_dev))
+			goto make_route;
 		goto brd_input;
+	}
 
 	if (res->type == RTN_LOCAL) {
 		err = fib_validate_source(skb, saddr, daddr, tos,
@@ -2014,6 +2017,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res->type != RTN_UNICAST)
 		goto martian_destination;
 
+make_route:
 	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:	return err;
 
-- 
cgit v1.2.3


From 7a4c53bee3324ac00bf964aa2f82d15d279e86e4 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 27 Jul 2018 13:43:23 -0700
Subject: net: report invalid mtu value via netlink extack

If an invalid MTU value is set through rtnetlink return extra error
information instead of putting message in kernel log. For other cases
where there is no visible API, keep the error report in the log.

Example:
	# ip li set dev enp12s0 mtu 10000
	Error: mtu greater than device maximum.

	# ifconfig enp12s0 mtu 10000
	SIOCSIFMTU: Invalid argument
	# dmesg | tail -1
	[ 2047.795467] enp12s0: mtu greater than device maximum

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 23 +++++++++++++++++------
 net/core/rtnetlink.c      |  2 +-
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c1295c7a452e..9c917467a2c7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3546,6 +3546,8 @@ int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_get_alias(const struct net_device *, char *, size_t);
 int dev_change_net_namespace(struct net_device *, struct net *, const char *);
 int __dev_set_mtu(struct net_device *, int);
+int dev_set_mtu_ext(struct net_device *dev, int mtu,
+		    struct netlink_ext_ack *extack);
 int dev_set_mtu(struct net_device *, int);
 int dev_change_tx_queue_len(struct net_device *, unsigned long);
 void dev_set_group(struct net_device *, int);
diff --git a/net/core/dev.c b/net/core/dev.c
index 87c42c8249ae..89031b5fef9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7523,13 +7523,15 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
 EXPORT_SYMBOL(__dev_set_mtu);
 
 /**
- *	dev_set_mtu - Change maximum transfer unit
+ *	dev_set_mtu_ext - Change maximum transfer unit
  *	@dev: device
  *	@new_mtu: new transfer unit
+ *	@extack: netlink extended ack
  *
  *	Change the maximum transfer size of the network device.
  */
-int dev_set_mtu(struct net_device *dev, int new_mtu)
+int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
+		    struct netlink_ext_ack *extack)
 {
 	int err, orig_mtu;
 
@@ -7538,14 +7540,12 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 
 	/* MTU must be positive, and in range */
 	if (new_mtu < 0 || new_mtu < dev->min_mtu) {
-		net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
-				    dev->name, new_mtu, dev->min_mtu);
+		NL_SET_ERR_MSG(extack, "mtu less than device minimum");
 		return -EINVAL;
 	}
 
 	if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
-		net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
-				    dev->name, new_mtu, dev->max_mtu);
+		NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
 		return -EINVAL;
 	}
 
@@ -7573,6 +7573,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	}
 	return err;
 }
+
+int dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+	struct netlink_ext_ack extack;
+	int err;
+
+	err = dev_set_mtu_ext(dev, new_mtu, &extack);
+	if (err)
+		net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
+	return err;
+}
 EXPORT_SYMBOL(dev_set_mtu);
 
 /**
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 510d4f765a13..24431e578310 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2382,7 +2382,7 @@ static int do_setlink(const struct sk_buff *skb,
 	}
 
 	if (tb[IFLA_MTU]) {
-		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack);
 		if (err < 0)
 			goto errout;
 		status |= DO_SETLINK_MODIFIED;
-- 
cgit v1.2.3


From 0969a204bfdaf7470d2666736b90a8595ae671e9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 27 Jul 2018 17:47:01 +0300
Subject: gpiolib: Use GPIOD_OUT_{LOW,HIGH} macros in open drain ones

There should not be anything more than stated by the name of newly
introduced constants, i.e.
	GPIOD_OUT_LOW_OPEN_DRAIN == GPIOD_OUT_LOW + open drain
and nothing more.

Make it better to read and slightly more robust by using GPIOD_OUT_LOW
and GPIOD_OUT_HIGH constants with open drain flag.

No functional change intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index e8aaf34dd65d..21ddbe440030 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -41,11 +41,8 @@ enum gpiod_flags {
 	GPIOD_OUT_LOW	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT,
 	GPIOD_OUT_HIGH	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT |
 			  GPIOD_FLAGS_BIT_DIR_VAL,
-	GPIOD_OUT_LOW_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET |
-			  GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_OPEN_DRAIN,
-	GPIOD_OUT_HIGH_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET |
-			  GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_DIR_VAL |
-			  GPIOD_FLAGS_BIT_OPEN_DRAIN,
+	GPIOD_OUT_LOW_OPEN_DRAIN = GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_OPEN_DRAIN,
+	GPIOD_OUT_HIGH_OPEN_DRAIN = GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_OPEN_DRAIN,
 };
 
 #ifdef CONFIG_GPIOLIB
-- 
cgit v1.2.3


From f39b948dbeaf9da0dfd17e68704f38fe4237788f Mon Sep 17 00:00:00 2001
From: Huang Shijie <sjhuang@iluvatar.ai>
Date: Thu, 26 Jul 2018 14:45:53 +0800
Subject: dmaengine: add a new helper dmaenginem_async_device_register

This patch adds the dmaenginem_async_device_register for DMA code.
Use the Devres to call the release for the DMA engine driver.

Signed-off-by: Huang Shijie <sjhuang@iluvatar.ai>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/driver-model/devres.txt |  1 +
 drivers/dma/dmaengine.c               | 35 +++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h             |  1 +
 3 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 7c1bb3d0c222..43681ca0837f 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -240,6 +240,7 @@ CLOCK
   devm_of_clk_add_hw_provider()
 
 DMA
+  dmaenginem_async_device_register()
   dmam_alloc_coherent()
   dmam_alloc_attrs()
   dmam_declare_coherent_memory()
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 504420f213ff..272bed6c8ba7 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1141,6 +1141,41 @@ void dma_async_device_unregister(struct dma_device *device)
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
+static void dmam_device_release(struct device *dev, void *res)
+{
+	struct dma_device *device;
+
+	device = *(struct dma_device **)res;
+	dma_async_device_unregister(device);
+}
+
+/**
+ * dmaenginem_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ *
+ * The operation is managed and will be undone on driver detach.
+ */
+int dmaenginem_async_device_register(struct dma_device *device)
+{
+	void *p;
+	int ret;
+
+	p = devres_alloc(dmam_device_release, sizeof(void *), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = dma_async_device_register(device);
+	if (!ret) {
+		*(struct dma_device **)p = device;
+		devres_add(device->dev, p);
+	} else {
+		devres_free(p);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(dmaenginem_async_device_register);
+
 struct dmaengine_unmap_pool {
 	struct kmem_cache *cache;
 	const char *name;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c8c3a7a93802..d49ec5c31944 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -1406,6 +1406,7 @@ static inline int dmaengine_desc_free(struct dma_async_tx_descriptor *desc)
 /* --- DMA device --- */
 
 int dma_async_device_register(struct dma_device *device);
+int dmaenginem_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
-- 
cgit v1.2.3


From 51c23b47e6b8590ea7a6a6776ffb21810ece73bf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 13 Jul 2018 14:54:45 +0200
Subject: netfilter: nf_osf: add nf_osf_find()

This new function returns the OS genre as a string. Plan is to use to
from the new nft_osf extension.

Note that this doesn't yet support ttl options, but it could be easily
extended to do so.

Tested-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_osf.h |  9 +++++++++
 net/netfilter/nf_osf.c           | 30 ++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
index 0e114c492fb8..aee460fcbd31 100644
--- a/include/linux/netfilter/nf_osf.h
+++ b/include/linux/netfilter/nf_osf.h
@@ -1,3 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NFOSF_H
+#define _NFOSF_H
+
 #include <uapi/linux/netfilter/nf_osf.h>
 
 /* Initial window size option state machine: multiple of mss, mtu or
@@ -31,3 +35,8 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 		  int hooknum, struct net_device *in, struct net_device *out,
 		  const struct nf_osf_info *info, struct net *net,
 		  const struct list_head *nf_osf_fingers);
+
+const char *nf_osf_find(const struct sk_buff *skb,
+                        const struct list_head *nf_osf_fingers);
+
+#endif /* _NFOSF_H */
diff --git a/net/netfilter/nf_osf.c b/net/netfilter/nf_osf.c
index b44d62d5d9a9..f4c75e982902 100644
--- a/net/netfilter/nf_osf.c
+++ b/net/netfilter/nf_osf.c
@@ -249,4 +249,34 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 }
 EXPORT_SYMBOL_GPL(nf_osf_match);
 
+const char *nf_osf_find(const struct sk_buff *skb,
+			const struct list_head *nf_osf_fingers)
+{
+	const struct iphdr *ip = ip_hdr(skb);
+	const struct nf_osf_user_finger *f;
+	unsigned char opts[MAX_IPOPTLEN];
+	const struct nf_osf_finger *kf;
+	struct nf_osf_hdr_ctx ctx;
+	const struct tcphdr *tcp;
+	const char *genre = NULL;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+	if (!tcp)
+		return false;
+
+	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
+		f = &kf->finger;
+		if (!nf_osf_match_one(skb, f, -1, &ctx))
+			continue;
+
+		genre = f->genre;
+		break;
+	}
+
+	return genre;
+}
+EXPORT_SYMBOL_GPL(nf_osf_find);
+
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From f609e7a0f3956e3b03af735b83600a4eef2a04ec Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 15:55:25 -0400
Subject: media: sii9234: remove unused header

The include/linux/platform_data/media/sii9234.h header file is unused,
let's remove it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/linux/platform_data/media/sii9234.h | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 include/linux/platform_data/media/sii9234.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/media/sii9234.h b/include/linux/platform_data/media/sii9234.h
deleted file mode 100644
index 6a4a809fe9a3..000000000000
--- a/include/linux/platform_data/media/sii9234.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Driver header for SII9234 MHL converter chip.
- *
- * Copyright (c) 2011 Samsung Electronics, Co. Ltd
- * Contact: Tomasz Stanislawski <t.stanislaws@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef SII9234_H
-#define SII9234_H
-
-/**
- * @gpio_n_reset: GPIO driving nRESET pin
- */
-
-struct sii9234_platform_data {
-	int gpio_n_reset;
-};
-
-#endif /* SII9234_H */
-- 
cgit v1.2.3


From 489cae632fc04927e8ef36ac8d8847193a41df3b Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 24 Jul 2018 10:33:19 -0600
Subject: include: Move ascii85 functions from i915 to linux/ascii85.h

The i915 DRM driver very cleverly used ascii85 encoding for their
GPU state file. Move the encode functions to a general header file to
support other drivers that might be interested in the same
functionality.

v4: Make the return value const char * as suggested by Chris Wilson
v3: Fix error_puts -> err_puts pointed out by the 01.org bot
v2: Update API to be cleaner for the caller as suggested by Chris Wilson

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 34 ++++---------------------------
 include/linux/ascii85.h               | 38 +++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/ascii85.h

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8c81cf3aa182..f7f2aa71d8d9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -31,6 +31,7 @@
 #include <linux/stop_machine.h>
 #include <linux/zlib.h>
 #include <drm/drm_print.h>
+#include <linux/ascii85.h>
 
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
@@ -517,35 +518,12 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-static int
-ascii85_encode_len(int len)
-{
-	return DIV_ROUND_UP(len, 4);
-}
-
-static bool
-ascii85_encode(u32 in, char *out)
-{
-	int i;
-
-	if (in == 0)
-		return false;
-
-	out[5] = '\0';
-	for (i = 5; i--; ) {
-		out[i] = '!' + in % 85;
-		in /= 85;
-	}
-
-	return true;
-}
-
 static void print_error_obj(struct drm_i915_error_state_buf *m,
 			    struct intel_engine_cs *engine,
 			    const char *name,
 			    struct drm_i915_error_object *obj)
 {
-	char out[6];
+	char out[ASCII85_BUFSZ];
 	int page;
 
 	if (!obj)
@@ -567,12 +545,8 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
 			len -= obj->unused;
 		len = ascii85_encode_len(len);
 
-		for (i = 0; i < len; i++) {
-			if (ascii85_encode(obj->pages[page][i], out))
-				err_puts(m, out);
-			else
-				err_puts(m, "z");
-		}
+		for (i = 0; i < len; i++)
+			err_puts(m, ascii85_encode(obj->pages[page][i], out));
 	}
 	err_puts(m, "\n");
 }
diff --git a/include/linux/ascii85.h b/include/linux/ascii85.h
new file mode 100644
index 000000000000..4cc40201273e
--- /dev/null
+++ b/include/linux/ascii85.h
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2008 Intel Corporation
+ * Copyright (c) 2018 The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _ASCII85_H_
+#define _ASCII85_H_
+
+#include <linux/kernel.h>
+
+#define ASCII85_BUFSZ 6
+
+static inline long
+ascii85_encode_len(long len)
+{
+	return DIV_ROUND_UP(len, 4);
+}
+
+static inline const char *
+ascii85_encode(u32 in, char *out)
+{
+	int i;
+
+	if (in == 0)
+		return "z";
+
+	out[5] = '\0';
+	for (i = 5; i--; ) {
+		out[i] = '!' + in % 85;
+		in /= 85;
+	}
+
+	return out;
+}
+
+#endif
-- 
cgit v1.2.3


From a5ec5a7bfd1f28d1905499641c9f589be36808c1 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 27 Jul 2018 13:47:02 -0700
Subject: ata: ahci: Support state with min power but Partial low power state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently when min_power policy is selected, the partial low power state
is not entered and link will try aggressively enter to only slumber state.
Add a new policy which still enable DEVSLP but also try to enter partial
low power state. This policy is presented as "min_power_with_partial".

For information the difference between partial and slumber
Partial – PHY logic is powered up, and in a reduced power state. The link
PM exit latency to active state maximum is 10 ns.
Slumber – PHY logic is powered up, and in a reduced power state. The link
PM exit latency to active state maximum is 10 ms.
Devslp – PHY logic is powered down. The link PM exit latency from this
state to active state maximum is 20 ms, unless otherwise specified by
DETO.

Suggested-and-reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libahci.c     | 5 ++++-
 drivers/ata/libata-core.c | 1 +
 drivers/ata/libata-scsi.c | 1 +
 include/linux/libata.h    | 3 ++-
 4 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 5038cee7d982..f79dce613e54 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -800,6 +800,8 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 			cmd |= PORT_CMD_ALPE;
 			if (policy == ATA_LPM_MIN_POWER)
 				cmd |= PORT_CMD_ASP;
+			else if (policy == ATA_LPM_MIN_POWER_WITH_PARTIAL)
+				cmd &= ~PORT_CMD_ASP;
 
 			/* write out new cmd value */
 			writel(cmd, port_mmio + PORT_CMD);
@@ -810,7 +812,8 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	if ((hpriv->cap2 & HOST_CAP2_SDS) &&
 	    (hpriv->cap2 & HOST_CAP2_SADM) &&
 	    (link->device->flags & ATA_DFLAG_DEVSLP)) {
-		if (policy == ATA_LPM_MIN_POWER)
+		if (policy == ATA_LPM_MIN_POWER ||
+		    policy == ATA_LPM_MIN_POWER_WITH_PARTIAL)
 			ahci_set_aggressive_devslp(ap, true);
 		else
 			ahci_set_aggressive_devslp(ap, false);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 1e28d054851f..f1b1be17383b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3967,6 +3967,7 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 		scontrol |= (0x6 << 8);
 		break;
 	case ATA_LPM_MED_POWER_WITH_DIPM:
+	case ATA_LPM_MIN_POWER_WITH_PARTIAL:
 	case ATA_LPM_MIN_POWER:
 		if (ata_link_nr_enabled(link) > 0)
 			/* no restrictions on LPM transitions */
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 980190c9eb48..63c25f1499c3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -110,6 +110,7 @@ static const char *ata_lpm_policy_names[] = {
 	[ATA_LPM_MAX_POWER]		= "max_performance",
 	[ATA_LPM_MED_POWER]		= "medium_power",
 	[ATA_LPM_MED_POWER_WITH_DIPM]	= "med_power_with_dipm",
+	[ATA_LPM_MIN_POWER_WITH_PARTIAL] = "min_power_with_partial",
 	[ATA_LPM_MIN_POWER]		= "min_power",
 };
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index aa8583655a18..80c2bd202367 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -522,7 +522,8 @@ enum ata_lpm_policy {
 	ATA_LPM_MAX_POWER,
 	ATA_LPM_MED_POWER,
 	ATA_LPM_MED_POWER_WITH_DIPM, /* Med power + DIPM as win IRST does */
-	ATA_LPM_MIN_POWER,
+	ATA_LPM_MIN_POWER_WITH_PARTIAL, /* Min Power + partial and slumber */
+	ATA_LPM_MIN_POWER, /* Min power + no partial (slumber only) */
 };
 
 enum ata_lpm_hints {
-- 
cgit v1.2.3


From c454edc21b12dd7d416de6c81555e87aaec9685c Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 30 Jul 2018 10:10:01 -0400
Subject: block: don't account for split bio's size in cgroup stats

We need to check in blkcg_bio_issue_check if the bio is flagged as
QUEUE_ENTERED, because if it is then we've already accounted for the
size of the IO in the cgroup stats.  We can still however account for
the extra IO since it'll be another request.

Reported-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 3bed5e02a873..f7b910768306 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -769,8 +769,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
-		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
-				bio->bi_iter.bi_size);
+		/*
+		 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
+		 * is a split bio and we would have already accounted for the
+		 * size of the bio.
+		 */
+		if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
+			blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
+					bio->bi_iter.bi_size);
 		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
-- 
cgit v1.2.3


From ddd0bc756983dc4d19000a4fe021b4c7f9d59aab Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 30 Jul 2018 00:15:31 +0300
Subject: block: move ref_tag calculation func to the block layer

Currently this function is implemented in the scsi layer, but it's
actual place should be the block layer since T10-PI is a general
data integrity feature that is used in the nvme protocol as well.

Suggested-by: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/infiniband/ulp/iser/iser_memory.c |  2 +-
 drivers/nvme/host/core.c                  |  3 +--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c      |  2 +-
 drivers/scsi/sd_dif.c                     |  4 ++--
 include/linux/t10-pi.h                    | 10 ++++++++++
 include/scsi/scsi_cmnd.h                  |  7 +------
 6 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ca844a926e6a..130bf163f066 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -311,7 +311,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
 {
 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
 	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
-	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
+	domain->sig.dif.ref_tag = t10_pi_ref_tag(sc->request);
 	/*
 	 * At the moment we hard code those, but in the future
 	 * we will take them from sc.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e77e6418a21c..16c8b86fe95d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -611,8 +611,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		case NVME_NS_DPS_PI_TYPE2:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD |
 					NVME_RW_PRINFO_PRCHK_REF;
-			cmnd->rw.reftag = cpu_to_le32(
-					nvme_block_nr(ns, blk_rq_pos(req)));
+			cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
 			break;
 		}
 	}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b8d131a455d0..dd738ae5c75b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4568,7 +4568,7 @@ _scsih_setup_eedp(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
 		mpi_request->CDB.EEDP32.PrimaryReferenceTag =
-		    cpu_to_be32(scsi_prot_ref_tag(scmd));
+		    cpu_to_be32(t10_pi_ref_tag(scmd->request));
 		break;
 
 	case SCSI_PROT_DIF_TYPE3:
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 9035380c0dda..d8de43d359ac 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -124,7 +124,7 @@ void sd_dif_prepare(struct scsi_cmnd *scmd)
 	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
 		return;
 
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -176,7 +176,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		return;
 
 	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c6aa8a3c42ed..c40511f4e63d 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -37,6 +37,16 @@ struct t10_pi_tuple {
 #define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
 #define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
+static inline u32 t10_pi_ref_tag(struct request *rq)
+{
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	return blk_rq_pos(rq) >>
+		(rq->q->integrity.interval_exp - 9) & 0xffffffff;
+#else
+	return -1U;
+#endif
+}
+
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index aaf1e971c6a3..cae229b5395c 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -4,6 +4,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
+#include <linux/t10-pi.h>
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/timer.h>
@@ -313,12 +314,6 @@ static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
 	return scmd->device->sector_size;
 }
 
-static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
-{
-	return blk_rq_pos(scmd->request) >>
-		(ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
-}
-
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
 {
 	return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
-- 
cgit v1.2.3


From 10c41ddd61323b27b447bc8e18296ac6c06107ad Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 30 Jul 2018 00:15:32 +0300
Subject: block: move dif_prepare/dif_complete functions to block layer

Currently these functions are implemented in the scsi layer, but their
actual place should be the block layer since T10-PI is a general data
integrity feature that is used in the nvme protocol as well. Also, use
the tuple size from the integrity profile since it may vary between
integrity types.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/t10-pi.c         | 110 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      |   8 ++--
 drivers/scsi/sd.h      |   9 ----
 drivers/scsi/sd_dif.c  | 113 -------------------------------------------------
 include/linux/t10-pi.h |   3 ++
 5 files changed, 118 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/block/t10-pi.c b/block/t10-pi.c
index a98db384048f..62aed77d0bb9 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -184,3 +184,113 @@ const struct blk_integrity_profile t10_pi_type3_ip = {
 	.verify_fn		= t10_pi_type3_verify_ip,
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
+
+/**
+ * t10_pi_prepare - prepare PI prior submitting request to device
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Remap protection information to match the
+ * physical LBA.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		/* Already remapped? */
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+			break;
+
+		bip_for_each_vec(iv, bip, iter) {
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page);
+			p = pmap + iv.bv_offset;
+			for (j = 0; j < iv.bv_len; j += tuple_sz) {
+				struct t10_pi_tuple *pi = p;
+
+				if (be32_to_cpu(pi->ref_tag) == virt)
+					pi->ref_tag = cpu_to_be32(ref_tag);
+				virt++;
+				ref_tag++;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+	}
+}
+EXPORT_SYMBOL(t10_pi_prepare);
+
+/**
+ * t10_pi_complete - prepare PI prior returning request to the block layer
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ * @intervals:       total elements to prepare
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Since the physical start sector was submitted
+ * to the device, we should remap it back to virtual values expected by the
+ * block layer.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_complete(struct request *rq, u8 protection_type,
+		     unsigned int intervals)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		bip_for_each_vec(iv, bip, iter) {
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page);
+			p = pmap + iv.bv_offset;
+			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
+				struct t10_pi_tuple *pi = p;
+
+				if (be32_to_cpu(pi->ref_tag) == ref_tag)
+					pi->ref_tag = cpu_to_be32(virt);
+				virt++;
+				ref_tag++;
+				intervals--;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+	}
+}
+EXPORT_SYMBOL(t10_pi_complete);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9421d9877730..bbebdc3769b0 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1119,7 +1119,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		SCpnt->cmnd[0] = WRITE_6;
 
 		if (blk_integrity_rq(rq))
-			sd_dif_prepare(SCpnt);
+			t10_pi_prepare(SCpnt->request, sdkp->protection_type);
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
@@ -2047,8 +2047,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
-		sd_dif_complete(SCpnt, good_bytes);
+	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
+	    good_bytes)
+		t10_pi_complete(SCpnt->request, sdkp->protection_type,
+				good_bytes / scsi_prot_interval(SCpnt));
 
 	return good_bytes;
 }
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 392c7d078ae3..a7d4f50b67d4 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -254,21 +254,12 @@ static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
 extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct scsi_cmnd *scmd);
-extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
-static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	return 0;
-}
-static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
-{
-}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index d8de43d359ac..db72c82486e3 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -95,116 +95,3 @@ out:
 	blk_integrity_register(disk, &bi);
 }
 
-/*
- * The virtual start sector is the one that was originally submitted
- * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
- * actual physical start sector is likely to be different.  Remap
- * protection information to match the physical LBA.
- *
- * From a protocol perspective there's a slight difference between
- * Type 1 and 2.  The latter uses 32-byte CDBs exclusively, and the
- * reference tag is seeded in the CDB.  This gives us the potential to
- * avoid virt->phys remapping during write.  However, at read time we
- * don't know whether the virt sector is the same as when we wrote it
- * (we could be reading from real disk as opposed to MD/DM device.  So
- * we always remap Type 2 making it identical to Type 1.
- *
- * Type 3 does not have a reference tag so no remapping is required.
- */
-void sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct bio *bio;
-	struct scsi_disk *sdkp;
-	struct t10_pi_tuple *pi;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
-		return;
-
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-		unsigned int j;
-
-		/* Already remapped? */
-		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
-			break;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (be32_to_cpu(pi->ref_tag) == virt)
-					pi->ref_tag = cpu_to_be32(phys);
-
-				virt++;
-				phys++;
-			}
-
-			kunmap_atomic(pi);
-		}
-
-		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
-	}
-}
-
-/*
- * Remap physical sector values in the reference tag to the virtual
- * values expected by the block layer.
- */
-void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct scsi_disk *sdkp;
-	struct bio *bio;
-	struct t10_pi_tuple *pi;
-	unsigned int j, intervals;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION || good_bytes == 0)
-		return;
-
-	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (intervals == 0) {
-					kunmap_atomic(pi);
-					return;
-				}
-
-				if (be32_to_cpu(pi->ref_tag) == phys)
-					pi->ref_tag = cpu_to_be32(virt);
-
-				virt++;
-				phys++;
-				intervals--;
-			}
-
-			kunmap_atomic(pi);
-		}
-	}
-}
-
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c40511f4e63d..5a427c289f58 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -51,5 +51,8 @@ extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
+extern void t10_pi_prepare(struct request *rq, u8 protection_type);
+extern void t10_pi_complete(struct request *rq, u8 protection_type,
+			    unsigned int intervals);
 
 #endif
-- 
cgit v1.2.3


From 82b98ca566ca2af170eb0ab50cef09dd7335fa55 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Thu, 5 Jul 2018 16:48:50 +0000
Subject: net/sunrpc: Make rpc_auth_create_args a const

This turns rpc_auth_create_args into a const as it gets passed through the
auth stack.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h    | 5 +++--
 net/sunrpc/auth.c              | 2 +-
 net/sunrpc/auth_gss/auth_gss.c | 9 +++++----
 net/sunrpc/auth_null.c         | 2 +-
 net/sunrpc/auth_unix.c         | 2 +-
 5 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index d9af474a857d..58a6765c1c5e 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -125,7 +125,8 @@ struct rpc_authops {
 	struct module		*owner;
 	rpc_authflavor_t	au_flavor;	/* flavor (RPC_AUTH_*) */
 	char *			au_name;
-	struct rpc_auth *	(*create)(struct rpc_auth_create_args *, struct rpc_clnt *);
+	struct rpc_auth *	(*create)(const struct rpc_auth_create_args *,
+					  struct rpc_clnt *);
 	void			(*destroy)(struct rpc_auth *);
 
 	int			(*hash_cred)(struct auth_cred *, unsigned int);
@@ -174,7 +175,7 @@ struct rpc_cred *	rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t);
 struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
-struct rpc_auth *	rpcauth_create(struct rpc_auth_create_args *,
+struct rpc_auth *	rpcauth_create(const struct rpc_auth_create_args *,
 				struct rpc_clnt *);
 void			rpcauth_release(struct rpc_auth *);
 rpc_authflavor_t	rpcauth_get_pseudoflavor(rpc_authflavor_t,
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index d2623b9f23d6..661e2277f468 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -253,7 +253,7 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
 EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
 
 struct rpc_auth *
-rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	struct rpc_auth		*auth;
 	const struct rpc_authops *ops;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index be8f103d22fd..21a19a9f0e33 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -985,7 +985,7 @@ static void gss_pipe_free(struct gss_pipe *p)
  * parameters based on the input flavor (which must be a pseudoflavor)
  */
 static struct gss_auth *
-gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	rpc_authflavor_t flavor = args->pseudoflavor;
 	struct gss_auth *gss_auth;
@@ -1132,7 +1132,7 @@ gss_destroy(struct rpc_auth *auth)
  * (which is guaranteed to last as long as any of its descendants).
  */
 static struct gss_auth *
-gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
+gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args,
 		struct rpc_clnt *clnt,
 		struct gss_auth *new)
 {
@@ -1169,7 +1169,8 @@ out:
 }
 
 static struct gss_auth *
-gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+gss_create_hashed(const struct rpc_auth_create_args *args,
+		  struct rpc_clnt *clnt)
 {
 	struct gss_auth *gss_auth;
 	struct gss_auth *new;
@@ -1188,7 +1189,7 @@ out:
 }
 
 static struct rpc_auth *
-gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	struct gss_auth *gss_auth;
 	struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 75d72e109a04..4b48228ee8c7 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -19,7 +19,7 @@ static struct rpc_auth null_auth;
 static struct rpc_cred null_cred;
 
 static struct rpc_auth *
-nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	atomic_inc(&null_auth.au_count);
 	return &null_auth;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index dafd6b870ba3..185e56d4f9ae 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -30,7 +30,7 @@ static struct rpc_auth		unix_auth;
 static const struct rpc_credops	unix_credops;
 
 static struct rpc_auth *
-unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	dprintk("RPC:       creating UNIX authenticator for client %p\n",
 			clnt);
-- 
cgit v1.2.3


From e6753f23d961d601dbae50a2fc2a3975c9715b14 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 30 Jul 2018 15:24:22 -0700
Subject: tracepoint: Make rcuidle tracepoint callers use SRCU

In recent tests with IRQ on/off tracepoints, a large performance
overhead ~10% is noticed when running hackbench. This is root caused to
calls to rcu_irq_enter_irqson and rcu_irq_exit_irqson from the
tracepoint code. Following a long discussion on the list [1] about this,
we concluded that srcu is a better alternative for use during rcu idle.
Although it does involve extra barriers, its lighter than the sched-rcu
version which has to do additional RCU calls to notify RCU idle about
entry into RCU sections.

In this patch, we change the underlying implementation of the
trace_*_rcuidle API to use SRCU. This has shown to improve performance
alot for the high frequency irq enable/disable tracepoints.

Test: Tested idle and preempt/irq tracepoints.

Here are some performance numbers:

With a run of the following 30 times on a single core x86 Qemu instance
with 1GB memory:
hackbench -g 4 -f 2 -l 3000

Completion times in seconds. CONFIG_PROVE_LOCKING=y.

No patches (without this series)
Mean: 3.048
Median: 3.025
Std Dev: 0.064

With Lockdep using irq tracepoints with RCU implementation:
Mean: 3.451   (-11.66 %)
Median: 3.447 (-12.22%)
Std Dev: 0.049

With Lockdep using irq tracepoints with SRCU implementation (this series):
Mean: 3.020   (I would consider the improvement against the "without
	       this series" case as just noise).
Median: 3.013
Std Dev: 0.033

[1] https://patchwork.kernel.org/patch/10344297/

[remove rcu_read_lock_sched_notrace as its the equivalent of
preempt_disable_notrace and is unnecessary to call in tracepoint code]
Link: http://lkml.kernel.org/r/20180730222423.196630-3-joel@joelfernandes.org

Cleaned-up-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ Simplified WARN_ON_ONCE() ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 40 ++++++++++++++++++++++++++++++++--------
 kernel/tracepoint.c        | 16 +++++++++++++++-
 2 files changed, 47 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 19a690b559ca..d9a084c72541 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -15,6 +15,7 @@
  */
 
 #include <linux/smp.h>
+#include <linux/srcu.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/cpumask.h>
@@ -33,6 +34,8 @@ struct trace_eval_map {
 
 #define TRACEPOINT_DEFAULT_PRIO	10
 
+extern struct srcu_struct tracepoint_srcu;
+
 extern int
 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
 extern int
@@ -75,10 +78,16 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
  * probe unregistration and the end of module exit to make sure there is no
  * caller executing a probe when it is freed.
  */
+#ifdef CONFIG_TRACEPOINTS
 static inline void tracepoint_synchronize_unregister(void)
 {
+	synchronize_srcu(&tracepoint_srcu);
 	synchronize_sched();
 }
+#else
+static inline void tracepoint_synchronize_unregister(void)
+{ }
+#endif
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 extern int syscall_regfunc(void);
@@ -129,18 +138,31 @@ extern void syscall_unregfunc(void);
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args, cond, rcucheck)			\
+#define __DO_TRACE(tp, proto, args, cond, rcuidle)			\
 	do {								\
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
+		int __maybe_unused idx = 0;				\
 									\
 		if (!(cond))						\
 			return;						\
-		if (rcucheck)						\
-			rcu_irq_enter_irqson();				\
-		rcu_read_lock_sched_notrace();				\
-		it_func_ptr = rcu_dereference_sched((tp)->funcs);	\
+									\
+		/* srcu can't be used from NMI */			\
+		WARN_ON_ONCE(rcuidle && in_nmi());			\
+									\
+		/* keep srcu and sched-rcu usage consistent */		\
+		preempt_disable_notrace();				\
+									\
+		/*							\
+		 * For rcuidle callers, use srcu since sched-rcu	\
+		 * doesn't work from the idle path.			\
+		 */							\
+		if (rcuidle)						\
+			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+									\
+		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
+									\
 		if (it_func_ptr) {					\
 			do {						\
 				it_func = (it_func_ptr)->func;		\
@@ -148,9 +170,11 @@ extern void syscall_unregfunc(void);
 				((void(*)(proto))(it_func))(args);	\
 			} while ((++it_func_ptr)->func);		\
 		}							\
-		rcu_read_unlock_sched_notrace();			\
-		if (rcucheck)						\
-			rcu_irq_exit_irqson();				\
+									\
+		if (rcuidle)						\
+			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+									\
+		preempt_enable_notrace();				\
 	} while (0)
 
 #ifndef MODULE
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 6dc6356c3327..955148d91b74 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -31,6 +31,9 @@
 extern struct tracepoint * const __start___tracepoints_ptrs[];
 extern struct tracepoint * const __stop___tracepoints_ptrs[];
 
+DEFINE_SRCU(tracepoint_srcu);
+EXPORT_SYMBOL_GPL(tracepoint_srcu);
+
 /* Set to 1 to enable tracepoint debug output */
 static const int tracepoint_debug;
 
@@ -67,16 +70,27 @@ static inline void *allocate_probes(int count)
 	return p == NULL ? NULL : p->probes;
 }
 
-static void rcu_free_old_probes(struct rcu_head *head)
+static void srcu_free_old_probes(struct rcu_head *head)
 {
 	kfree(container_of(head, struct tp_probes, rcu));
 }
 
+static void rcu_free_old_probes(struct rcu_head *head)
+{
+	call_srcu(&tracepoint_srcu, head, srcu_free_old_probes);
+}
+
 static inline void release_probes(struct tracepoint_func *old)
 {
 	if (old) {
 		struct tp_probes *tp_probes = container_of(old,
 			struct tp_probes, probes[0]);
+		/*
+		 * Tracepoint probes are protected by both sched RCU and SRCU,
+		 * by calling the SRCU callback in the sched RCU callback we
+		 * cover both cases. So let us chain the SRCU and sched RCU
+		 * callbacks to wait for both grace periods.
+		 */
 		call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes);
 	}
 }
-- 
cgit v1.2.3


From 760c435e0f85ed19e48a90d746ce1de2cd02def7 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 19 Jul 2018 00:09:12 +0200
Subject: mtd: rawnand: make subop helpers return unsigned values

A report from Colin Ian King pointed a CoverityScan issue where error
values on these helpers where not checked in the drivers. These
helpers can error out only in case of a software bug in driver code,
not because of a runtime/hardware error. Hence, let's WARN_ON() in this
case and return 0 which is harmless anyway.

Fixes: 8878b126df76 ("mtd: nand: add ->exec_op() implementation")
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 44 ++++++++++++++++++++--------------------
 include/linux/mtd/rawnand.h      | 16 +++++++--------
 2 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index c4b74630f4c5..ef10beab99f5 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -2668,8 +2668,8 @@ static bool nand_subop_instr_is_valid(const struct nand_subop *subop,
 	return subop && instr_idx < subop->ninstrs;
 }
 
-static int nand_subop_get_start_off(const struct nand_subop *subop,
-				    unsigned int instr_idx)
+static unsigned int nand_subop_get_start_off(const struct nand_subop *subop,
+					     unsigned int instr_idx)
 {
 	if (instr_idx)
 		return 0;
@@ -2688,12 +2688,12 @@ static int nand_subop_get_start_off(const struct nand_subop *subop,
  *
  * Given an address instruction, returns the offset of the first cycle to issue.
  */
-int nand_subop_get_addr_start_off(const struct nand_subop *subop,
-				  unsigned int instr_idx)
+unsigned int nand_subop_get_addr_start_off(const struct nand_subop *subop,
+					   unsigned int instr_idx)
 {
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR)
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR))
+		return 0;
 
 	return nand_subop_get_start_off(subop, instr_idx);
 }
@@ -2710,14 +2710,14 @@ EXPORT_SYMBOL_GPL(nand_subop_get_addr_start_off);
  *
  * Given an address instruction, returns the number of address cycle to issue.
  */
-int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
-				unsigned int instr_idx)
+unsigned int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
+					 unsigned int instr_idx)
 {
 	int start_off, end_off;
 
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR)
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR))
+		return 0;
 
 	start_off = nand_subop_get_addr_start_off(subop, instr_idx);
 
@@ -2742,12 +2742,12 @@ EXPORT_SYMBOL_GPL(nand_subop_get_num_addr_cyc);
  *
  * Given a data instruction, returns the offset to start from.
  */
-int nand_subop_get_data_start_off(const struct nand_subop *subop,
-				  unsigned int instr_idx)
+unsigned int nand_subop_get_data_start_off(const struct nand_subop *subop,
+					   unsigned int instr_idx)
 {
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    !nand_instr_is_data(&subop->instrs[instr_idx]))
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    !nand_instr_is_data(&subop->instrs[instr_idx])))
+		return 0;
 
 	return nand_subop_get_start_off(subop, instr_idx);
 }
@@ -2764,14 +2764,14 @@ EXPORT_SYMBOL_GPL(nand_subop_get_data_start_off);
  *
  * Returns the length of the chunk of data to send/receive.
  */
-int nand_subop_get_data_len(const struct nand_subop *subop,
-			    unsigned int instr_idx)
+unsigned int nand_subop_get_data_len(const struct nand_subop *subop,
+				     unsigned int instr_idx)
 {
 	int start_off = 0, end_off;
 
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    !nand_instr_is_data(&subop->instrs[instr_idx]))
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    !nand_instr_is_data(&subop->instrs[instr_idx])))
+		return 0;
 
 	start_off = nand_subop_get_data_start_off(subop, instr_idx);
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index f60fad29eae6..598d356de83f 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1007,14 +1007,14 @@ struct nand_subop {
 	unsigned int last_instr_end_off;
 };
 
-int nand_subop_get_addr_start_off(const struct nand_subop *subop,
-				  unsigned int op_id);
-int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
-				unsigned int op_id);
-int nand_subop_get_data_start_off(const struct nand_subop *subop,
-				  unsigned int op_id);
-int nand_subop_get_data_len(const struct nand_subop *subop,
-			    unsigned int op_id);
+unsigned int nand_subop_get_addr_start_off(const struct nand_subop *subop,
+					   unsigned int op_id);
+unsigned int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
+					 unsigned int op_id);
+unsigned int nand_subop_get_data_start_off(const struct nand_subop *subop,
+					   unsigned int op_id);
+unsigned int nand_subop_get_data_len(const struct nand_subop *subop,
+				     unsigned int op_id);
 
 /**
  * struct nand_op_parser_addr_constraints - Constraints for address instructions
-- 
cgit v1.2.3


From 7da45139d264f3b7ead04e00ebb29b189cf9826e Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 17 Jul 2018 09:08:02 +0200
Subject: mtd: rawnand: better name for the controller structure

In the raw NAND core, a NAND chip is described by a nand_chip structure,
while a NAND controller is described with a nand_hw_control structure
which is not very meaningful.

Rename this structure nand_controller.

As the structure gets renamed, it is logical to also rename the core
function initializing it from nand_hw_control_init() to
nand_controller_init().

Lastly, the 'hwcontrol' entry of the nand_chip structure is not
meaningful neither while it has the role of fallback when no controller
structure is provided by the driver (the controller driver is dumb and
can only control a single chip). Thus, it is renamed dummy_controller.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c | 10 +++++-----
 drivers/mtd/nand/raw/brcmnand/brcmnand.c     |  4 ++--
 drivers/mtd/nand/raw/docg4.c                 |  4 ++--
 drivers/mtd/nand/raw/fsl_elbc_nand.c         |  4 ++--
 drivers/mtd/nand/raw/fsl_ifc_nand.c          |  4 ++--
 drivers/mtd/nand/raw/jz4780_nand.c           |  7 ++++---
 drivers/mtd/nand/raw/marvell_nand.c          |  6 +++---
 drivers/mtd/nand/raw/mtk_nand.c              |  2 +-
 drivers/mtd/nand/raw/nand_base.c             |  4 ++--
 drivers/mtd/nand/raw/ndfc.c                  |  4 ++--
 drivers/mtd/nand/raw/omap2.c                 |  2 +-
 drivers/mtd/nand/raw/oxnas_nand.c            |  4 ++--
 drivers/mtd/nand/raw/qcom_nandc.c            |  4 ++--
 drivers/mtd/nand/raw/s3c2410.c               |  4 ++--
 drivers/mtd/nand/raw/sunxi_nand.c            |  6 +++---
 drivers/mtd/nand/raw/tango_nand.c            |  4 ++--
 drivers/mtd/nand/raw/tegra_nand.c            |  6 +++---
 drivers/mtd/nand/raw/txx9ndfmc.c             |  4 ++--
 include/linux/mtd/rawnand.h                  | 14 ++++++++------
 19 files changed, 50 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 30dae4c9d439..855cc7729c43 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -216,7 +216,7 @@ struct atmel_nand_controller_caps {
 };
 
 struct atmel_nand_controller {
-	struct nand_hw_control base;
+	struct nand_controller base;
 	const struct atmel_nand_controller_caps *caps;
 	struct device *dev;
 	struct regmap *smc;
@@ -227,7 +227,7 @@ struct atmel_nand_controller {
 };
 
 static inline struct atmel_nand_controller *
-to_nand_controller(struct nand_hw_control *ctl)
+to_nand_controller(struct nand_controller *ctl)
 {
 	return container_of(ctl, struct atmel_nand_controller, base);
 }
@@ -239,7 +239,7 @@ struct atmel_smc_nand_controller {
 };
 
 static inline struct atmel_smc_nand_controller *
-to_smc_nand_controller(struct nand_hw_control *ctl)
+to_smc_nand_controller(struct nand_controller *ctl)
 {
 	return container_of(to_nand_controller(ctl),
 			    struct atmel_smc_nand_controller, base);
@@ -263,7 +263,7 @@ struct atmel_hsmc_nand_controller {
 };
 
 static inline struct atmel_hsmc_nand_controller *
-to_hsmc_nand_controller(struct nand_hw_control *ctl)
+to_hsmc_nand_controller(struct nand_controller *ctl)
 {
 	return container_of(to_nand_controller(ctl),
 			    struct atmel_hsmc_nand_controller, base);
@@ -1966,7 +1966,7 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
 	struct device_node *np = dev->of_node;
 	int ret;
 
-	nand_hw_control_init(&nc->base);
+	nand_controller_init(&nc->base);
 	INIT_LIST_HEAD(&nc->chips);
 	nc->dev = dev;
 	nc->caps = caps;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 1306aaa7a8bf..2e5efa0f9ea2 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -114,7 +114,7 @@ enum {
 
 struct brcmnand_controller {
 	struct device		*dev;
-	struct nand_hw_control	controller;
+	struct nand_controller	controller;
 	void __iomem		*nand_base;
 	void __iomem		*nand_fc; /* flash cache */
 	void __iomem		*flash_dma_base;
@@ -2433,7 +2433,7 @@ int brcmnand_probe(struct platform_device *pdev, struct brcmnand_soc *soc)
 
 	init_completion(&ctrl->done);
 	init_completion(&ctrl->dma_done);
-	nand_hw_control_init(&ctrl->controller);
+	nand_controller_init(&ctrl->controller);
 	INIT_LIST_HEAD(&ctrl->host_list);
 
 	/* NAND register range */
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index bb96cb33cd6b..4dccdfba6140 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -1257,8 +1257,8 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->ecc.strength = DOCG4_T;
 	nand->options = NAND_BUSWIDTH_16 | NAND_NO_SUBPAGE_WRITE;
 	nand->IO_ADDR_R = nand->IO_ADDR_W = doc->virtadr + DOC_IOSPACE_DATA;
-	nand->controller = &nand->hwcontrol;
-	nand_hw_control_init(nand->controller);
+	nand->controller = &nand->dummy_controller;
+	nand_controller_init(nand->controller);
 
 	/* methods */
 	nand->cmdfunc = docg4_command;
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 51f0b340bc0d..0aa54a949653 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -61,7 +61,7 @@ struct fsl_elbc_mtd {
 /* Freescale eLBC FCM controller information */
 
 struct fsl_elbc_fcm_ctrl {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct fsl_elbc_mtd *chips[MAX_BANKS];
 
 	u8 __iomem *addr;        /* Address of assigned FCM buffer        */
@@ -879,7 +879,7 @@ static int fsl_elbc_nand_probe(struct platform_device *pdev)
 		}
 		elbc_fcm_ctrl->counter++;
 
-		nand_hw_control_init(&elbc_fcm_ctrl->controller);
+		nand_controller_init(&elbc_fcm_ctrl->controller);
 		fsl_lbc_ctrl_dev->nand = elbc_fcm_ctrl;
 	} else {
 		elbc_fcm_ctrl = fsl_lbc_ctrl_dev->nand;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 75d3c951f61a..96130d91e32c 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -51,7 +51,7 @@ struct fsl_ifc_mtd {
 
 /* overview of the fsl ifc controller */
 struct fsl_ifc_nand_ctrl {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct fsl_ifc_mtd *chips[FSL_IFC_BANK_COUNT];
 
 	void __iomem *addr;	/* Address of assigned IFC buffer	*/
@@ -1004,7 +1004,7 @@ static int fsl_ifc_nand_probe(struct platform_device *dev)
 		ifc_nand_ctrl->addr = NULL;
 		fsl_ifc_ctrl_dev->nand = ifc_nand_ctrl;
 
-		nand_hw_control_init(&ifc_nand_ctrl->controller);
+		nand_controller_init(&ifc_nand_ctrl->controller);
 	} else {
 		ifc_nand_ctrl = fsl_ifc_ctrl_dev->nand;
 	}
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index e69f6ae4c539..49841dad347c 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -44,7 +44,7 @@ struct jz4780_nand_cs {
 struct jz4780_nand_controller {
 	struct device *dev;
 	struct jz4780_bch *bch;
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	unsigned int num_banks;
 	struct list_head chips;
 	int selected;
@@ -65,7 +65,8 @@ static inline struct jz4780_nand_chip *to_jz4780_nand_chip(struct mtd_info *mtd)
 	return container_of(mtd_to_nand(mtd), struct jz4780_nand_chip, chip);
 }
 
-static inline struct jz4780_nand_controller *to_jz4780_nand_controller(struct nand_hw_control *ctrl)
+static inline struct jz4780_nand_controller
+*to_jz4780_nand_controller(struct nand_controller *ctrl)
 {
 	return container_of(ctrl, struct jz4780_nand_controller, controller);
 }
@@ -368,7 +369,7 @@ static int jz4780_nand_probe(struct platform_device *pdev)
 	nfc->dev = dev;
 	nfc->num_banks = num_banks;
 
-	nand_hw_control_init(&nfc->controller);
+	nand_controller_init(&nfc->controller);
 	INIT_LIST_HEAD(&nfc->chips);
 
 	ret = jz4780_nand_init_chips(nfc, pdev);
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 80a074cccb82..bd5f9a4b7b16 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -318,7 +318,7 @@ struct marvell_nfc_caps {
  * @dma_buf:		32-bit aligned buffer for DMA transfers (NFCv1 only)
  */
 struct marvell_nfc {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct device *dev;
 	void __iomem *regs;
 	struct clk *core_clk;
@@ -335,7 +335,7 @@ struct marvell_nfc {
 	u8 *dma_buf;
 };
 
-static inline struct marvell_nfc *to_marvell_nfc(struct nand_hw_control *ctrl)
+static inline struct marvell_nfc *to_marvell_nfc(struct nand_controller *ctrl)
 {
 	return container_of(ctrl, struct marvell_nfc, controller);
 }
@@ -2745,7 +2745,7 @@ static int marvell_nfc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	nfc->dev = dev;
-	nand_hw_control_init(&nfc->controller);
+	nand_controller_init(&nfc->controller);
 	INIT_LIST_HEAD(&nfc->chips);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index e6b14b79c8a8..7bc6be3f6ec0 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -145,7 +145,7 @@ struct mtk_nfc_clk {
 };
 
 struct mtk_nfc {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct mtk_ecc_config ecc_cfg;
 	struct mtk_nfc_clk clk;
 	struct mtk_ecc *ecc;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index e545e03a214e..dcdf0f373100 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5000,8 +5000,8 @@ static void nand_set_defaults(struct nand_chip *chip)
 		chip->read_buf = busw ? nand_read_buf16 : nand_read_buf;
 
 	if (!chip->controller) {
-		chip->controller = &chip->hwcontrol;
-		nand_hw_control_init(chip->controller);
+		chip->controller = &chip->dummy_controller;
+		nand_controller_init(chip->controller);
 	}
 
 	if (!chip->buf_align)
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index d8a806894937..540fa1a0ea24 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -39,7 +39,7 @@ struct ndfc_controller {
 	void __iomem *ndfcbase;
 	struct nand_chip chip;
 	int chip_select;
-	struct nand_hw_control ndfc_control;
+	struct nand_controller ndfc_control;
 };
 
 static struct ndfc_controller ndfc_ctrl[NDFC_MAX_CS];
@@ -218,7 +218,7 @@ static int ndfc_probe(struct platform_device *ofdev)
 	ndfc = &ndfc_ctrl[cs];
 	ndfc->chip_select = cs;
 
-	nand_hw_control_init(&ndfc->ndfc_control);
+	nand_controller_init(&ndfc->ndfc_control);
 	ndfc->ofdev = ofdev;
 	dev_set_drvdata(&ofdev->dev, ndfc);
 
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index e50c64adc3c8..e943b2e5a5e2 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -145,7 +145,7 @@ static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
 static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
 
 /* Shared among all NAND instances to synchronize access to the ECC Engine */
-static struct nand_hw_control omap_gpmc_controller = {
+static struct nand_controller omap_gpmc_controller = {
 	.lock = __SPIN_LOCK_UNLOCKED(omap_gpmc_controller.lock),
 	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(omap_gpmc_controller.wq),
 };
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index d649d5944826..01b00bb69c1e 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -32,7 +32,7 @@
 #define OXNAS_NAND_MAX_CHIPS	1
 
 struct oxnas_nand_ctrl {
-	struct nand_hw_control base;
+	struct nand_controller base;
 	void __iomem *io_base;
 	struct clk *clk;
 	struct nand_chip *chips[OXNAS_NAND_MAX_CHIPS];
@@ -96,7 +96,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 	if (!oxnas)
 		return -ENOMEM;
 
-	nand_hw_control_init(&oxnas->base);
+	nand_controller_init(&oxnas->base);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	oxnas->io_base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 645630953f38..aa6c3e026ef1 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -365,7 +365,7 @@ struct nandc_regs {
  *				from all connected NAND devices pagesize
  */
 struct qcom_nand_controller {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct list_head host_list;
 
 	struct device *dev;
@@ -2728,7 +2728,7 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
 	INIT_LIST_HEAD(&nandc->desc_list);
 	INIT_LIST_HEAD(&nandc->host_list);
 
-	nand_hw_control_init(&nandc->controller);
+	nand_controller_init(&nandc->controller);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 5a4a68790653..e8bf64832213 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -162,7 +162,7 @@ enum s3c_nand_clk_state {
  */
 struct s3c2410_nand_info {
 	/* mtd info */
-	struct nand_hw_control		controller;
+	struct nand_controller		controller;
 	struct s3c2410_nand_mtd		*mtds;
 	struct s3c2410_platform_nand	*platform;
 
@@ -1094,7 +1094,7 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, info);
 
-	nand_hw_control_init(&info->controller);
+	nand_controller_init(&info->controller);
 
 	/* get the clock source and enable it */
 
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 4b11cd4a79be..07f3ff9a28f2 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -234,7 +234,7 @@ static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand)
  *			controller events
  */
 struct sunxi_nfc {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct device *dev;
 	void __iomem *regs;
 	struct clk *ahb_clk;
@@ -247,7 +247,7 @@ struct sunxi_nfc {
 	struct dma_chan *dmac;
 };
 
-static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_hw_control *ctrl)
+static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_controller *ctrl)
 {
 	return container_of(ctrl, struct sunxi_nfc, controller);
 }
@@ -2012,7 +2012,7 @@ static int sunxi_nfc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	nfc->dev = dev;
-	nand_hw_control_init(&nfc->controller);
+	nand_controller_init(&nfc->controller);
 	INIT_LIST_HEAD(&nfc->chips);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index f2052fae21c7..dd7a26efdf4f 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -83,7 +83,7 @@
 #define MAX_CS		4
 
 struct tango_nfc {
-	struct nand_hw_control hw;
+	struct nand_controller hw;
 	void __iomem *reg_base;
 	void __iomem *mem_base;
 	void __iomem *pbus_base;
@@ -654,7 +654,7 @@ static int tango_nand_probe(struct platform_device *pdev)
 		return PTR_ERR(nfc->chan);
 
 	platform_set_drvdata(pdev, nfc);
-	nand_hw_control_init(&nfc->hw);
+	nand_controller_init(&nfc->hw);
 	nfc->freq_kHz = clk_get_rate(clk) / 1000;
 
 	for_each_child_of_node(pdev->dev.of_node, np) {
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index 56c0aa1bc81f..31c0d9ca9d23 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -164,7 +164,7 @@
 				HWSTATUS_RBSY_VALUE(NAND_STATUS_READY))
 
 struct tegra_nand_controller {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct device *dev;
 	void __iomem *regs;
 	int irq;
@@ -187,7 +187,7 @@ struct tegra_nand_chip {
 };
 
 static inline struct tegra_nand_controller *
-			to_tegra_ctrl(struct nand_hw_control *hw_ctrl)
+			to_tegra_ctrl(struct nand_controller *hw_ctrl)
 {
 	return container_of(hw_ctrl, struct tegra_nand_controller, controller);
 }
@@ -1136,7 +1136,7 @@ static int tegra_nand_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	ctrl->dev = &pdev->dev;
-	nand_hw_control_init(&ctrl->controller);
+	nand_controller_init(&ctrl->controller);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ctrl->regs = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 5fe9da8b4a0a..8f5bbbac4612 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -73,7 +73,7 @@ struct txx9ndfmc_drvdata {
 	void __iomem *base;
 	unsigned char hold;	/* in gbusclock */
 	unsigned char spw;	/* in gbusclock */
-	struct nand_hw_control hw_control;
+	struct nand_controller hw_control;
 };
 
 static struct platform_device *mtd_to_platdev(struct mtd_info *mtd)
@@ -303,7 +303,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 	dev_info(&dev->dev, "CLK:%ldMHz HOLD:%d SPW:%d\n",
 		 (gbusclk + 500000) / 1000000, hold, spw);
 
-	nand_hw_control_init(&drvdata->hw_control);
+	nand_controller_init(&drvdata->hw_control);
 
 	platform_set_drvdata(dev, drvdata);
 	txx9ndfmc_initialize(dev);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 598d356de83f..93a2678e0f0d 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -510,20 +510,21 @@ struct nand_id {
 };
 
 /**
- * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
+ * struct nand_controller - Structure used to describe a NAND controller
+ *
  * @lock:               protection lock
  * @active:		the mtd device which holds the controller currently
  * @wq:			wait queue to sleep on if a NAND operation is in
  *			progress used instead of the per chip wait queue
  *			when a hw controller is available.
  */
-struct nand_hw_control {
+struct nand_controller {
 	spinlock_t lock;
 	struct nand_chip *active;
 	wait_queue_head_t wq;
 };
 
-static inline void nand_hw_control_init(struct nand_hw_control *nfc)
+static inline void nand_controller_init(struct nand_controller *nfc)
 {
 	nfc->active = NULL;
 	spin_lock_init(&nfc->lock);
@@ -1197,7 +1198,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buf_align:		minimum buffer alignment required by a platform
- * @hwcontrol:		platform-specific hardware control structure
+ * @dummy_controller:	dummy controller implementation for drivers that can
+ *			only control a single chip
  * @erase:		[REPLACEABLE] erase function
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
@@ -1333,11 +1335,11 @@ struct nand_chip {
 	flstate_t state;
 
 	uint8_t *oob_poi;
-	struct nand_hw_control *controller;
+	struct nand_controller *controller;
 
 	struct nand_ecc_ctrl ecc;
 	unsigned long buf_align;
-	struct nand_hw_control hwcontrol;
+	struct nand_controller dummy_controller;
 
 	uint8_t *bbt;
 	struct nand_bbt_descr *bbt_td;
-- 
cgit v1.2.3


From 05b54c7bac906c443fd0b23ab8954e0560b33e5c Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 19 Jul 2018 01:05:46 +0200
Subject: mtd: rawnand: add hooks that may be called during nand_scan()

In order to remove the limitation that forbids dynamic allocation in
nand_scan_ident(), we must create a path that will be the same for all
controller drivers. The idea is to use nand_scan() instead of the widely
used nand_scan_ident()/nand_scan_tail() couple. In order to achieve
this, controller drivers will need to adjust some parameters between
these two functions depending on the NAND chip wired on them.

This takes the form of two new hooks (->{attach,detach}_chip()) that are
placed in a new nand_controller_ops structure, which is then attached
to the nand_controller object at driver initialization time.
->attach_chip() is called between nand_scan_ident() and
nand_scan_tail(), and ->detach_chip() is called in the error path of
nand_scan() and in nand_cleanup().

Note that some NAND controller drivers don't have a dedicated
nand_controller object and instead rely on the default/dummy one
embedded in nand_chip. If you're in this case and still want to
initialize the controller ops, you'll have to manipulate
chip->dummy_controller directly.

Last but not least, it's worth mentioning that we plan to move some of
the controller related hooks placed in nand_chip into
nand_controller_ops to make the separation between NAND chip and NAND
controller methods clearer.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 32 ++++++++++++++++++++++++++++++--
 include/linux/mtd/rawnand.h      | 21 +++++++++++++++++++++
 2 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index dcdf0f373100..dea41fa25be1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -6718,6 +6718,20 @@ err_free_buf:
 }
 EXPORT_SYMBOL(nand_scan_tail);
 
+static int nand_attach(struct nand_chip *chip)
+{
+	if (chip->controller->ops && chip->controller->ops->attach_chip)
+		return chip->controller->ops->attach_chip(chip);
+
+	return 0;
+}
+
+static void nand_detach(struct nand_chip *chip)
+{
+	if (chip->controller->ops && chip->controller->ops->detach_chip)
+		chip->controller->ops->detach_chip(chip);
+}
+
 /**
  * nand_scan_with_ids - [NAND Interface] Scan for the NAND device
  * @mtd: MTD device structure
@@ -6731,11 +6745,21 @@ EXPORT_SYMBOL(nand_scan_tail);
 int nand_scan_with_ids(struct mtd_info *mtd, int maxchips,
 		       struct nand_flash_dev *ids)
 {
+	struct nand_chip *chip = mtd_to_nand(mtd);
 	int ret;
 
 	ret = nand_scan_ident(mtd, maxchips, ids);
-	if (!ret)
-		ret = nand_scan_tail(mtd);
+	if (ret)
+		return ret;
+
+	ret = nand_attach(chip);
+	if (ret)
+		return ret;
+
+	ret = nand_scan_tail(mtd);
+	if (ret)
+		nand_detach(chip);
+
 	return ret;
 }
 EXPORT_SYMBOL(nand_scan_with_ids);
@@ -6763,7 +6787,11 @@ void nand_cleanup(struct nand_chip *chip)
 
 	/* Free manufacturer priv data. */
 	nand_manufacturer_cleanup(chip);
+
+	/* Free controller specific allocations after chip identification */
+	nand_detach(chip);
 }
+
 EXPORT_SYMBOL_GPL(nand_cleanup);
 
 /**
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 93a2678e0f0d..fbd6b29cf22c 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -509,6 +509,25 @@ struct nand_id {
 	int len;
 };
 
+/**
+ * struct nand_controller_ops - Controller operations
+ *
+ * @attach_chip: this method is called after the NAND detection phase after
+ *		 flash ID and MTD fields such as erase size, page size and OOB
+ *		 size have been set up. ECC requirements are available if
+ *		 provided by the NAND chip or device tree. Typically used to
+ *		 choose the appropriate ECC configuration and allocate
+ *		 associated resources.
+ *		 This hook is optional.
+ * @detach_chip: free all resources allocated/claimed in
+ *		 nand_controller_ops->attach_chip().
+ *		 This hook is optional.
+ */
+struct nand_controller_ops {
+	int (*attach_chip)(struct nand_chip *chip);
+	void (*detach_chip)(struct nand_chip *chip);
+};
+
 /**
  * struct nand_controller - Structure used to describe a NAND controller
  *
@@ -517,11 +536,13 @@ struct nand_id {
  * @wq:			wait queue to sleep on if a NAND operation is in
  *			progress used instead of the per chip wait queue
  *			when a hw controller is available.
+ * @ops:		NAND controller operations.
  */
 struct nand_controller {
 	spinlock_t lock;
 	struct nand_chip *active;
 	wait_queue_head_t wq;
+	const struct nand_controller_ops *ops;
 };
 
 static inline void nand_controller_init(struct nand_controller *nfc)
-- 
cgit v1.2.3


From 98732da1a08ebb666983d469981a8b994e77d556 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jul 2018 15:31:50 +0200
Subject: mtd: rawnand: do not export nand_scan_[ident|tail]() anymore

Both nand_scan_ident() and nand_scan_tail() helpers used to be called
directly from controller drivers that needed to tweak some ECC-related
parameters before nand_scan_tail(). This separation prevented dynamic
allocations during the phase of NAND identification, which was
inconvenient.

All controller drivers have been moved to use nand_scan(), in
conjunction with the chip->ecc.[attach|detach]_chip() hooks that
actually do the required tweaking sequence between both ident/tail
calls, allowing programmers to use dynamic allocation as they need all
across the scanning sequence.

Declare nand_scan_[ident|tail]() statically now.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 16 +++++++++-------
 include/linux/mtd/rawnand.h      | 18 ++++++------------
 2 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 42a7a934a17b..34ea44f90fd8 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5924,7 +5924,7 @@ static int nand_dt_init(struct nand_chip *chip)
 }
 
 /**
- * nand_scan_ident - [NAND Interface] Scan for the NAND device
+ * nand_scan_ident - Scan for the NAND device
  * @mtd: MTD device structure
  * @maxchips: number of chips to scan for
  * @table: alternative NAND ID table
@@ -5932,9 +5932,13 @@ static int nand_dt_init(struct nand_chip *chip)
  * This is the first phase of the normal nand_scan() function. It reads the
  * flash ID and sets up MTD fields accordingly.
  *
+ * This helper used to be called directly from controller drivers that needed
+ * to tweak some ECC-related parameters before nand_scan_tail(). This separation
+ * prevented dynamic allocations during this phase which was unconvenient and
+ * as been banned for the benefit of the ->init_ecc()/cleanup_ecc() hooks.
  */
-int nand_scan_ident(struct mtd_info *mtd, int maxchips,
-		    struct nand_flash_dev *table)
+static int nand_scan_ident(struct mtd_info *mtd, int maxchips,
+			   struct nand_flash_dev *table)
 {
 	int i, nand_maf_id, nand_dev_id;
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -6008,7 +6012,6 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 
 	return 0;
 }
-EXPORT_SYMBOL(nand_scan_ident);
 
 static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 {
@@ -6385,14 +6388,14 @@ static bool nand_ecc_strength_good(struct mtd_info *mtd)
 }
 
 /**
- * nand_scan_tail - [NAND Interface] Scan for the NAND device
+ * nand_scan_tail - Scan for the NAND device
  * @mtd: MTD device structure
  *
  * This is the second phase of the normal nand_scan() function. It fills out
  * all the uninitialized function pointers with the defaults and scans for a
  * bad block table if appropriate.
  */
-int nand_scan_tail(struct mtd_info *mtd)
+static int nand_scan_tail(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
@@ -6716,7 +6719,6 @@ err_free_buf:
 
 	return ret;
 }
-EXPORT_SYMBOL(nand_scan_tail);
 
 static int nand_attach(struct nand_chip *chip)
 {
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index fbd6b29cf22c..71571ed23a20 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -35,17 +35,6 @@ static inline int nand_scan(struct mtd_info *mtd, int max_chips)
 	return nand_scan_with_ids(mtd, max_chips, NULL);
 }
 
-/*
- * Separate phases of nand_scan(), allowing board driver to intervene
- * and override command or ECC setup according to flash type.
- */
-int nand_scan_ident(struct mtd_info *mtd, int max_chips,
-			   struct nand_flash_dev *table);
-int nand_scan_tail(struct mtd_info *mtd);
-
-/* Unregister the MTD device and free resources held by the NAND device */
-void nand_release(struct mtd_info *mtd);
-
 /* Internal helper for board drivers which need to override command function */
 void nand_wait_ready(struct mtd_info *mtd);
 
@@ -1745,8 +1734,13 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
 int nand_write_data_op(struct nand_chip *chip, const void *buf,
 		       unsigned int len, bool force_8bit);
 
-/* Free resources held by the NAND device */
+/*
+ * Free resources held by the NAND device, must be called on error after a
+ * sucessful nand_scan().
+ */
 void nand_cleanup(struct nand_chip *chip);
+/* Unregister the MTD device and calls nand_cleanup() */
+void nand_release(struct mtd_info *mtd);
 
 /* Default extended ID decoding function */
 void nand_decode_ext_id(struct nand_chip *chip);
-- 
cgit v1.2.3


From 2023f1fa216f30b1877d65be2057fbaf0bbd49b3 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jul 2018 15:31:51 +0200
Subject: mtd: rawnand: allocate model parameter dynamically

Thanks to the migration of all drivers to use nand_scan() and the
related nand_controller_ops, we can now allocate data during the
detection phase. Let's do it first for the NAND model parameter which
is allocated in nand_detect().

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 52 +++++++++++++++++++++++++++++++---------
 include/linux/mtd/rawnand.h      |  2 +-
 2 files changed, 42 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 34ea44f90fd8..00e80781124a 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5225,8 +5225,11 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 
 	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
 	sanitize_string(p->model, sizeof(p->model));
-	strncpy(chip->parameters.model, p->model,
-		sizeof(chip->parameters.model) - 1);
+	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
+	if (!chip->parameters.model) {
+		ret = -ENOMEM;
+		goto free_onfi_param_page;
+	}
 
 	mtd->writesize = le32_to_cpu(p->byte_per_page);
 
@@ -5356,8 +5359,11 @@ static int nand_flash_detect_jedec(struct nand_chip *chip)
 
 	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
 	sanitize_string(p->model, sizeof(p->model));
-	strncpy(chip->parameters.model, p->model,
-		sizeof(chip->parameters.model) - 1);
+	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
+	if (!chip->parameters.model) {
+		ret = -ENOMEM;
+		goto free_jedec_param_page;
+	}
 
 	mtd->writesize = le32_to_cpu(p->byte_per_page);
 
@@ -5546,8 +5552,9 @@ static bool find_full_id_nand(struct nand_chip *chip,
 		chip->onfi_timing_mode_default =
 					type->onfi_timing_mode_default;
 
-		strncpy(chip->parameters.model, type->name,
-			sizeof(chip->parameters.model) - 1);
+		chip->parameters.model = kstrdup(type->name, GFP_KERNEL);
+		if (!chip->parameters.model)
+			return false;
 
 		return true;
 	}
@@ -5706,8 +5713,9 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 	if (!type->name)
 		return -ENODEV;
 
-	strncpy(chip->parameters.model, type->name,
-		sizeof(chip->parameters.model) - 1);
+	chip->parameters.model = kstrdup(type->name, GFP_KERNEL);
+	if (!chip->parameters.model)
+		return -ENOMEM;
 
 	chip->chipsize = (uint64_t)type->chipsize << 20;
 
@@ -5737,7 +5745,9 @@ ident_done:
 			mtd->name);
 		pr_warn("bus width %d instead of %d bits\n", busw ? 16 : 8,
 			(chip->options & NAND_BUSWIDTH_16) ? 16 : 8);
-		return -EINVAL;
+		ret = -EINVAL;
+
+		goto free_detect_allocation;
 	}
 
 	nand_decode_bbm_options(chip);
@@ -5774,6 +5784,11 @@ ident_done:
 		(int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC",
 		mtd->erasesize >> 10, mtd->writesize, mtd->oobsize);
 	return 0;
+
+free_detect_allocation:
+	kfree(chip->parameters.model);
+
+	return ret;
 }
 
 static const char * const nand_ecc_modes[] = {
@@ -6013,6 +6028,11 @@ static int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	return 0;
 }
 
+static void nand_scan_ident_cleanup(struct nand_chip *chip)
+{
+	kfree(chip->parameters.model);
+}
+
 static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -6760,11 +6780,18 @@ int nand_scan_with_ids(struct mtd_info *mtd, int maxchips,
 
 	ret = nand_attach(chip);
 	if (ret)
-		return ret;
+		goto cleanup_ident;
 
 	ret = nand_scan_tail(mtd);
 	if (ret)
-		nand_detach(chip);
+		goto detach_chip;
+
+	return 0;
+
+detach_chip:
+	nand_detach(chip);
+cleanup_ident:
+	nand_scan_ident_cleanup(chip);
 
 	return ret;
 }
@@ -6796,6 +6823,9 @@ void nand_cleanup(struct nand_chip *chip)
 
 	/* Free controller specific allocations after chip identification */
 	nand_detach(chip);
+
+	/* Free identification phase allocations */
+	nand_scan_ident_cleanup(chip);
 }
 
 EXPORT_SYMBOL_GPL(nand_cleanup);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 71571ed23a20..099fa166569a 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -476,7 +476,7 @@ struct onfi_params {
  */
 struct nand_parameters {
 	/* Generic parameters */
-	char model[100];
+	const char *model;
 	bool supports_set_get_features;
 	DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER);
 	DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER);
-- 
cgit v1.2.3


From 56e6c104e4f151e19eb410004405ec52b4f8605a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 31 Jul 2018 13:06:57 +0200
Subject: console: Replace #if 0 with atomic var 'ignore_console_lock_warning'

The macro WARN_CONSOLE_UNLOCKED prints a warning when a thread enters
the console's critical section without having acquired the console
lock. The console lock can be ignored when debugging the console using
printk, but this makes WARN_CONSOLE_UNLOCKED generate unnecessary
warnings.

The variable ignore_console_lock_warning temporarily disables
WARN_CONSOLE_UNLOCKED. Developers interested in debugging the console's
critical sections should increment it before entering the CS and
decrement it after leaving the CS. Setting ignore_console_lock_warning
is only for debugging. Regular operation should not manipulate it.

Acknoledgements: This patch is based on an earlier version by Steven
Rostedt. The use of atomic increment/decrement was suggested by Petr
Mladek.

Link: http://lkml.kernel.org/r/717e6337-e7a6-7a92-1c1b-8929a25696b5@suse.de
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
[b.zolnierkie: use <linux/atomic.h>]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/console.h | 14 +++++++++-----
 kernel/printk/printk.c  |  3 +++
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index f59f3dbca65c..ec9bdb3d7bab 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -14,6 +14,7 @@
 #ifndef _LINUX_CONSOLE_H_
 #define _LINUX_CONSOLE_H_ 1
 
+#include <linux/atomic.h>
 #include <linux/types.h>
 
 struct vc_data;
@@ -201,11 +202,14 @@ void vcs_make_sysfs(int index);
 void vcs_remove_sysfs(int index);
 
 /* Some debug stub to catch some of the obvious races in the VT code */
-#if 1
-#define WARN_CONSOLE_UNLOCKED()	WARN_ON(!is_console_locked() && !oops_in_progress)
-#else
-#define WARN_CONSOLE_UNLOCKED()
-#endif
+#define WARN_CONSOLE_UNLOCKED()						\
+	WARN_ON(!atomic_read(&ignore_console_lock_warning) &&		\
+		!is_console_locked() && !oops_in_progress)
+/*
+ * Increment ignore_console_lock_warning if you need to quiet
+ * WARN_CONSOLE_UNLOCKED() for debugging purposes.
+ */
+extern atomic_t ignore_console_lock_warning;
 
 /* VESA Blanking Levels */
 #define VESA_NO_BLANKING        0
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 3f041e7cbfc9..7d32a86758cd 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -66,6 +66,9 @@ int console_printk[4] = {
 	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
 };
 
+atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
+EXPORT_SYMBOL(ignore_console_lock_warning);
+
 /*
  * Low level drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
-- 
cgit v1.2.3


From 11785ef53228d23ec386f5fe4a34601536f0c891 Mon Sep 17 00:00:00 2001
From: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Date: Tue, 31 Jul 2018 13:28:42 +0100
Subject: ALSA: usb-audio: Initial Power Domain support

Thee USB Audio Class 3 (UAC3) introduces Power Domains as a new
feature to let a host turn individual parts of an audio function
to different power states via USB requests. This lets the device
get to know a bit amore about what the host is up to in order to
optimize power consumption efficiently.

The Power Domains are optional for UAC3 configuration but all
UAC3 devices shall include at least one BADD configuration where
the support for Power Domains is compulsory.

This patch adds a set of features/helpers to parse these power
domains and change their status.

Signed-off-by: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v3.h |   4 ++
 sound/usb/Makefile           |   1 +
 sound/usb/power.c            | 104 +++++++++++++++++++++++++++++++++++++++++++
 sound/usb/power.h            |  19 ++++++++
 4 files changed, 128 insertions(+)
 create mode 100644 sound/usb/power.c

(limited to 'include/linux')

diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h
index 334bfa6dfb47..6b708434b7f9 100644
--- a/include/linux/usb/audio-v3.h
+++ b/include/linux/usb/audio-v3.h
@@ -447,4 +447,8 @@ struct uac3_interrupt_data_msg {
 /* BADD sample rate is always fixed to 48kHz */
 #define UAC3_BADD_SAMPLING_RATE				48000
 
+/* BADD power domains recovery times in 50us increments */
+#define UAC3_BADD_PD_RECOVER_D1D0			0x0258	/* 30ms */
+#define UAC3_BADD_PD_RECOVER_D2D0			0x1770	/* 300ms */
+
 #endif /* __LINUX_USB_AUDIO_V3_H */
diff --git a/sound/usb/Makefile b/sound/usb/Makefile
index 05440e2df8d9..d330f74c90e6 100644
--- a/sound/usb/Makefile
+++ b/sound/usb/Makefile
@@ -13,6 +13,7 @@ snd-usb-audio-objs := 	card.o \
 			mixer_scarlett.o \
 			mixer_us16x08.o \
 			pcm.o \
+			power.o \
 			proc.o \
 			quirks.o \
 			stream.o
diff --git a/sound/usb/power.c b/sound/usb/power.c
new file mode 100644
index 000000000000..bd303a1ba1b7
--- /dev/null
+++ b/sound/usb/power.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   UAC3 Power Domain state management functions
+ */
+
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/usb/audio.h>
+#include <linux/usb/audio-v2.h>
+#include <linux/usb/audio-v3.h>
+
+#include "usbaudio.h"
+#include "helper.h"
+#include "power.h"
+
+struct snd_usb_power_domain *
+snd_usb_find_power_domain(struct usb_host_interface *ctrl_iface,
+			  unsigned char id)
+{
+	struct snd_usb_power_domain *pd;
+	void *p;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return NULL;
+
+	p = NULL;
+	while ((p = snd_usb_find_csint_desc(ctrl_iface->extra,
+					    ctrl_iface->extralen,
+					    p, UAC3_POWER_DOMAIN)) != NULL) {
+		struct uac3_power_domain_descriptor *pd_desc = p;
+		int i;
+
+		for (i = 0; i < pd_desc->bNrEntities; i++) {
+			if (pd_desc->baEntityID[i] == id) {
+				pd->pd_id = pd_desc->bPowerDomainID;
+				pd->pd_d1d0_rec =
+					le16_to_cpu(pd_desc->waRecoveryTime1);
+				pd->pd_d2d0_rec =
+					le16_to_cpu(pd_desc->waRecoveryTime2);
+				return pd;
+			}
+		}
+	}
+
+	kfree(pd);
+	return NULL;
+}
+
+int snd_usb_power_domain_set(struct snd_usb_audio *chip,
+			     struct snd_usb_power_domain *pd,
+			     unsigned char state)
+{
+	struct usb_device *dev = chip->dev;
+	unsigned char current_state;
+	int err, idx;
+
+	idx = snd_usb_ctrl_intf(chip) | (pd->pd_id << 8);
+
+	err = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0),
+			      UAC2_CS_CUR,
+			      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+			      UAC3_AC_POWER_DOMAIN_CONTROL << 8, idx,
+			      &current_state, sizeof(current_state));
+	if (err < 0) {
+		dev_err(&dev->dev, "Can't get UAC3 power state for id %d\n",
+			pd->pd_id);
+		return err;
+	}
+
+	if (current_state == state) {
+		dev_dbg(&dev->dev, "UAC3 power domain id %d already in state %d\n",
+			pd->pd_id, state);
+		return 0;
+	}
+
+	err = snd_usb_ctl_msg(chip->dev, usb_sndctrlpipe(chip->dev, 0), UAC2_CS_CUR,
+			      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT,
+			      UAC3_AC_POWER_DOMAIN_CONTROL << 8, idx,
+			      &state, sizeof(state));
+	if (err < 0) {
+		dev_err(&dev->dev, "Can't set UAC3 power state to %d for id %d\n",
+			state, pd->pd_id);
+		return err;
+	}
+
+	if (state == UAC3_PD_STATE_D0) {
+		switch (current_state) {
+		case UAC3_PD_STATE_D2:
+			udelay(pd->pd_d2d0_rec * 50);
+			break;
+		case UAC3_PD_STATE_D1:
+			udelay(pd->pd_d1d0_rec * 50);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	dev_dbg(&dev->dev, "UAC3 power domain id %d change to state %d\n",
+		pd->pd_id, state);
+
+	return 0;
+}
diff --git a/sound/usb/power.h b/sound/usb/power.h
index b2e25f60c5a2..6004231a7c75 100644
--- a/sound/usb/power.h
+++ b/sound/usb/power.h
@@ -2,6 +2,25 @@
 #ifndef __USBAUDIO_POWER_H
 #define __USBAUDIO_POWER_H
 
+struct snd_usb_power_domain {
+	int pd_id;              /* UAC3 Power Domain ID */
+	int pd_d1d0_rec;        /* D1 to D0 recovery time */
+	int pd_d2d0_rec;        /* D2 to D0 recovery time */
+};
+
+enum {
+	UAC3_PD_STATE_D0,
+	UAC3_PD_STATE_D1,
+	UAC3_PD_STATE_D2,
+};
+
+int snd_usb_power_domain_set(struct snd_usb_audio *chip,
+			     struct snd_usb_power_domain *pd,
+			     unsigned char state);
+struct snd_usb_power_domain *
+snd_usb_find_power_domain(struct usb_host_interface *ctrl_iface,
+			  unsigned char id);
+
 #ifdef CONFIG_PM
 int snd_usb_autoresume(struct snd_usb_audio *chip);
 void snd_usb_autosuspend(struct snd_usb_audio *chip);
-- 
cgit v1.2.3


From 08fcf813281ebcf72c69487c1501ad91b7121cdb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 31 Jul 2018 09:10:26 -0600
Subject: t10-pi: provide empty t10_pi_complete() for !CONFIG_BLK_DEV_INTEGRITY

Fixes a link failure whtn BLK_DEV_INTEGRITY isn't defined.

Fixes: 10c41ddd6132 ("block: move dif_prepare/dif_complete functions to block layer")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/t10-pi.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 5a427c289f58..b9626aa7e90c 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -51,8 +51,19 @@ extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
 extern void t10_pi_prepare(struct request *rq, u8 protection_type);
 extern void t10_pi_complete(struct request *rq, u8 protection_type,
 			    unsigned int intervals);
+#else
+static inline void t10_pi_complete(struct request *rq, u8 protection_type,
+				   unsigned int intervals)
+{
+}
+static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+}
+#endif
 
 #endif
-- 
cgit v1.2.3


From c3bc8fd637a9623f5c507bd18f9677effbddf584 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 30 Jul 2018 15:24:23 -0700
Subject: tracing: Centralize preemptirq tracepoints and unify their usage

This patch detaches the preemptirq tracepoints from the tracers and
keeps it separate.

Advantages:
* Lockdep and irqsoff event can now run in parallel since they no longer
have their own calls.

* This unifies the usecase of adding hooks to an irqsoff and irqson
event, and a preemptoff and preempton event.
  3 users of the events exist:
  - Lockdep
  - irqsoff and preemptoff tracers
  - irqs and preempt trace events

The unification cleans up several ifdefs and makes the code in preempt
tracer and irqsoff tracers simpler. It gets rid of all the horrific
ifdeferry around PROVE_LOCKING and makes configuration of the different
users of the tracepoints more easy and understandable. It also gets rid
of the time_* function calls from the lockdep hooks used to call into
the preemptirq tracer which is not needed anymore. The negative delta in
lines of code in this patch is quite large too.

In the patch we introduce a new CONFIG option PREEMPTIRQ_TRACEPOINTS
as a single point for registering probes onto the tracepoints. With
this,
the web of config options for preempt/irq toggle tracepoints and its
users becomes:

 PREEMPT_TRACER   PREEMPTIRQ_EVENTS  IRQSOFF_TRACER PROVE_LOCKING
       |                 |     \         |           |
       \    (selects)    /      \        \ (selects) /
      TRACE_PREEMPT_TOGGLE       ----> TRACE_IRQFLAGS
                      \                  /
                       \ (depends on)   /
                     PREEMPTIRQ_TRACEPOINTS

Other than the performance tests mentioned in the previous patch, I also
ran the locking API test suite. I verified that all tests cases are
passing.

I also injected issues by not registering lockdep probes onto the
tracepoints and I see failures to confirm that the probes are indeed
working.

This series + lockdep probes not registered (just to inject errors):
[    0.000000]      hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]      soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]          hard-safe-A + irqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]          soft-safe-A + irqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]          hard-safe-A + irqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]          soft-safe-A + irqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]     hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
[    0.000000]     soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |

With this series + lockdep probes registered, all locking tests pass:

[    0.000000]      hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]      soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]          hard-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]          soft-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]          hard-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]          soft-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]     hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
[    0.000000]     soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |

Link: http://lkml.kernel.org/r/20180730222423.196630-4-joel@joelfernandes.org

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h            |  11 +-
 include/linux/irqflags.h          |  11 +-
 include/linux/lockdep.h           |   8 +-
 include/linux/preempt.h           |   2 +-
 include/trace/events/preemptirq.h |  23 ++--
 init/main.c                       |   5 +-
 kernel/locking/lockdep.c          |  35 +++---
 kernel/sched/core.c               |   2 +-
 kernel/trace/Kconfig              |  22 +++-
 kernel/trace/Makefile             |   2 +-
 kernel/trace/trace_irqsoff.c      | 231 ++++++++++----------------------------
 kernel/trace/trace_preemptirq.c   |  72 ++++++++++++
 12 files changed, 195 insertions(+), 229 deletions(-)
 create mode 100644 kernel/trace/trace_preemptirq.c

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 63af5eb0ff46..a397907e8d72 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -701,16 +701,7 @@ static inline unsigned long get_lock_parent_ip(void)
 	return CALLER_ADDR2;
 }
 
-#ifdef CONFIG_IRQSOFF_TRACER
-  extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
-  extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
-#else
-  static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { }
-  static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
-#endif
-
-#if defined(CONFIG_PREEMPT_TRACER) || \
-	(defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
+#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
   extern void trace_preempt_on(unsigned long a0, unsigned long a1);
   extern void trace_preempt_off(unsigned long a0, unsigned long a1);
 #else
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 9700f00bbc04..50edb9cbbd26 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -15,9 +15,16 @@
 #include <linux/typecheck.h>
 #include <asm/irqflags.h>
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+/* Currently trace_softirqs_on/off is used only by lockdep */
+#ifdef CONFIG_PROVE_LOCKING
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+#else
+# define trace_softirqs_on(ip)	do { } while (0)
+# define trace_softirqs_off(ip)	do { } while (0)
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
 # define trace_hardirq_context(p)	((p)->hardirq_context)
@@ -43,8 +50,6 @@ do {						\
 #else
 # define trace_hardirqs_on()		do { } while (0)
 # define trace_hardirqs_off()		do { } while (0)
-# define trace_softirqs_on(ip)		do { } while (0)
-# define trace_softirqs_off(ip)		do { } while (0)
 # define trace_hardirq_context(p)	0
 # define trace_softirq_context(p)	0
 # define trace_hardirqs_enabled(p)	0
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6fc77d4dbdcd..a8113357ceeb 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -266,7 +266,8 @@ struct held_lock {
 /*
  * Initialization, self-test and debugging-output methods:
  */
-extern void lockdep_info(void);
+extern void lockdep_init(void);
+extern void lockdep_init_early(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
@@ -406,7 +407,8 @@ static inline void lockdep_on(void)
 # define lock_downgrade(l, i)			do { } while (0)
 # define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
-# define lockdep_info()				do { } while (0)
+# define lockdep_init()				do { } while (0)
+# define lockdep_init_early()			do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
@@ -532,7 +534,7 @@ do {								\
 
 #endif /* CONFIG_LOCKDEP */
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PROVE_LOCKING
 extern void print_irqtrace_events(struct task_struct *curr);
 #else
 static inline void print_irqtrace_events(struct task_struct *curr)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 5bd3f151da78..c01813c3fbe9 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -150,7 +150,7 @@
  */
 #define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
 
-#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
 #define preempt_count_dec_and_test() \
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h
index 9c4eb33c5a1d..9a0d4ceeb166 100644
--- a/include/trace/events/preemptirq.h
+++ b/include/trace/events/preemptirq.h
@@ -1,4 +1,4 @@
-#ifdef CONFIG_PREEMPTIRQ_EVENTS
+#ifdef CONFIG_PREEMPTIRQ_TRACEPOINTS
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM preemptirq
@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(preemptirq_template,
 		  (void *)((unsigned long)(_stext) + __entry->parent_offs))
 );
 
-#ifndef CONFIG_PROVE_LOCKING
+#ifdef CONFIG_TRACE_IRQFLAGS
 DEFINE_EVENT(preemptirq_template, irq_disable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
@@ -40,9 +40,14 @@ DEFINE_EVENT(preemptirq_template, irq_disable,
 DEFINE_EVENT(preemptirq_template, irq_enable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
+#else
+#define trace_irq_enable(...)
+#define trace_irq_disable(...)
+#define trace_irq_enable_rcuidle(...)
+#define trace_irq_disable_rcuidle(...)
 #endif
 
-#ifdef CONFIG_DEBUG_PREEMPT
+#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
 DEFINE_EVENT(preemptirq_template, preempt_disable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
@@ -50,22 +55,22 @@ DEFINE_EVENT(preemptirq_template, preempt_disable,
 DEFINE_EVENT(preemptirq_template, preempt_enable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
+#else
+#define trace_preempt_enable(...)
+#define trace_preempt_disable(...)
+#define trace_preempt_enable_rcuidle(...)
+#define trace_preempt_disable_rcuidle(...)
 #endif
 
 #endif /* _TRACE_PREEMPTIRQ_H */
 
 #include <trace/define_trace.h>
 
-#endif /* !CONFIG_PREEMPTIRQ_EVENTS */
-
-#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING)
+#else /* !CONFIG_PREEMPTIRQ_TRACEPOINTS */
 #define trace_irq_enable(...)
 #define trace_irq_disable(...)
 #define trace_irq_enable_rcuidle(...)
 #define trace_irq_disable_rcuidle(...)
-#endif
-
-#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT)
 #define trace_preempt_enable(...)
 #define trace_preempt_disable(...)
 #define trace_preempt_enable_rcuidle(...)
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..44fe43be84c1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -648,6 +648,9 @@ asmlinkage __visible void __init start_kernel(void)
 	profile_init();
 	call_function_init();
 	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
+
+	lockdep_init_early();
+
 	early_boot_irqs_disabled = false;
 	local_irq_enable();
 
@@ -663,7 +666,7 @@ asmlinkage __visible void __init start_kernel(void)
 		panic("Too many boot %s vars at `%s'", panic_later,
 		      panic_param);
 
-	lockdep_info();
+	lockdep_init();
 
 	/*
 	 * Need to run this when irqs are enabled, because it wants
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index fbbb79d5cfa0..03bfaeb9f4e6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -55,6 +55,7 @@
 
 #include "lockdep_internals.h"
 
+#include <trace/events/preemptirq.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/lock.h>
 
@@ -2839,10 +2840,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 	debug_atomic_inc(hardirqs_on_events);
 }
 
-__visible void trace_hardirqs_on_caller(unsigned long ip)
+static void lockdep_hardirqs_on(void *none, unsigned long ignore,
+				unsigned long ip)
 {
-	time_hardirqs_on(CALLER_ADDR0, ip);
-
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
@@ -2881,23 +2881,15 @@ __visible void trace_hardirqs_on_caller(unsigned long ip)
 	__trace_hardirqs_on_caller(ip);
 	current->lockdep_recursion = 0;
 }
-EXPORT_SYMBOL(trace_hardirqs_on_caller);
-
-void trace_hardirqs_on(void)
-{
-	trace_hardirqs_on_caller(CALLER_ADDR0);
-}
-EXPORT_SYMBOL(trace_hardirqs_on);
 
 /*
  * Hardirqs were disabled:
  */
-__visible void trace_hardirqs_off_caller(unsigned long ip)
+static void lockdep_hardirqs_off(void *none, unsigned long ignore,
+				 unsigned long ip)
 {
 	struct task_struct *curr = current;
 
-	time_hardirqs_off(CALLER_ADDR0, ip);
-
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
@@ -2919,13 +2911,6 @@ __visible void trace_hardirqs_off_caller(unsigned long ip)
 	} else
 		debug_atomic_inc(redundant_hardirqs_off);
 }
-EXPORT_SYMBOL(trace_hardirqs_off_caller);
-
-void trace_hardirqs_off(void)
-{
-	trace_hardirqs_off_caller(CALLER_ADDR0);
-}
-EXPORT_SYMBOL(trace_hardirqs_off);
 
 /*
  * Softirqs will be enabled:
@@ -4330,7 +4315,15 @@ out_restore:
 	raw_local_irq_restore(flags);
 }
 
-void __init lockdep_info(void)
+void __init lockdep_init_early(void)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	register_trace_prio_irq_disable(lockdep_hardirqs_off, NULL, INT_MAX);
+	register_trace_prio_irq_enable(lockdep_hardirqs_on, NULL, INT_MIN);
+#endif
+}
+
+void __init lockdep_init(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..5de1a4343424 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3189,7 +3189,7 @@ static inline void sched_tick_stop(int cpu) { }
 #endif
 
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
-				defined(CONFIG_PREEMPT_TRACER))
+				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
 /*
  * If the value passed in is equal to the current preempt count
  * then we just disabled preemption. Start timing the latency.
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4d4eb15cc7fd..036cec1fcd24 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,6 +82,15 @@ config RING_BUFFER_ALLOW_SWAP
 	 Allow the use of ring_buffer_swap_cpu.
 	 Adds a very slight overhead to tracing when enabled.
 
+config PREEMPTIRQ_TRACEPOINTS
+	bool
+	depends on TRACE_PREEMPT_TOGGLE || TRACE_IRQFLAGS
+	select TRACING
+	default y
+	help
+	  Create preempt/irq toggle tracepoints if needed, so that other parts
+	  of the kernel can use them to generate or add hooks to them.
+
 # All tracer options should select GENERIC_TRACER. For those options that are
 # enabled by all tracers (context switch and event tracer) they select TRACING.
 # This allows those options to appear when no other tracer is selected. But the
@@ -155,18 +164,20 @@ config FUNCTION_GRAPH_TRACER
 	  the return value. This is done by setting the current return
 	  address on the current task structure into a stack of calls.
 
+config TRACE_PREEMPT_TOGGLE
+	bool
+	help
+	  Enables hooks which will be called when preemption is first disabled,
+	  and last enabled.
 
 config PREEMPTIRQ_EVENTS
 	bool "Enable trace events for preempt and irq disable/enable"
 	select TRACE_IRQFLAGS
-	depends on DEBUG_PREEMPT || !PROVE_LOCKING
-	depends on TRACING
+	select TRACE_PREEMPT_TOGGLE if PREEMPT
+	select GENERIC_TRACER
 	default n
 	help
 	  Enable tracing of disable and enable events for preemption and irqs.
-	  For tracing preempt disable/enable events, DEBUG_PREEMPT must be
-	  enabled. For tracing irq disable/enable events, PROVE_LOCKING must
-	  be disabled.
 
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
@@ -203,6 +214,7 @@ config PREEMPT_TRACER
 	select RING_BUFFER_ALLOW_SWAP
 	select TRACER_SNAPSHOT
 	select TRACER_SNAPSHOT_PER_CPU_SWAP
+	select TRACE_PREEMPT_TOGGLE
 	help
 	  This option measures the time spent in preemption-off critical
 	  sections, with microsecond accuracy.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 81902a79e049..98d53b39a8ee 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_TRACING_MAP) += tracing_map.o
 obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
-obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o
+obj-$(CONFIG_PREEMPTIRQ_TRACEPOINTS) += trace_preemptirq.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index f8daa754cce2..770cd30cda40 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -16,7 +16,6 @@
 
 #include "trace.h"
 
-#define CREATE_TRACE_POINTS
 #include <trace/events/preemptirq.h>
 
 #if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
@@ -450,66 +449,6 @@ void stop_critical_timings(void)
 }
 EXPORT_SYMBOL_GPL(stop_critical_timings);
 
-#ifdef CONFIG_IRQSOFF_TRACER
-#ifdef CONFIG_PROVE_LOCKING
-void time_hardirqs_on(unsigned long a0, unsigned long a1)
-{
-	if (!preempt_trace() && irq_trace())
-		stop_critical_timing(a0, a1);
-}
-
-void time_hardirqs_off(unsigned long a0, unsigned long a1)
-{
-	if (!preempt_trace() && irq_trace())
-		start_critical_timing(a0, a1);
-}
-
-#else /* !CONFIG_PROVE_LOCKING */
-
-/*
- * We are only interested in hardirq on/off events:
- */
-static inline void tracer_hardirqs_on(void)
-{
-	if (!preempt_trace() && irq_trace())
-		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-
-static inline void tracer_hardirqs_off(void)
-{
-	if (!preempt_trace() && irq_trace())
-		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-
-static inline void tracer_hardirqs_on_caller(unsigned long caller_addr)
-{
-	if (!preempt_trace() && irq_trace())
-		stop_critical_timing(CALLER_ADDR0, caller_addr);
-}
-
-static inline void tracer_hardirqs_off_caller(unsigned long caller_addr)
-{
-	if (!preempt_trace() && irq_trace())
-		start_critical_timing(CALLER_ADDR0, caller_addr);
-}
-
-#endif /* CONFIG_PROVE_LOCKING */
-#endif /*  CONFIG_IRQSOFF_TRACER */
-
-#ifdef CONFIG_PREEMPT_TRACER
-static inline void tracer_preempt_on(unsigned long a0, unsigned long a1)
-{
-	if (preempt_trace() && !irq_trace())
-		stop_critical_timing(a0, a1);
-}
-
-static inline void tracer_preempt_off(unsigned long a0, unsigned long a1)
-{
-	if (preempt_trace() && !irq_trace())
-		start_critical_timing(a0, a1);
-}
-#endif /* CONFIG_PREEMPT_TRACER */
-
 #ifdef CONFIG_FUNCTION_TRACER
 static bool function_enabled;
 
@@ -659,15 +598,34 @@ static void irqsoff_tracer_stop(struct trace_array *tr)
 }
 
 #ifdef CONFIG_IRQSOFF_TRACER
+/*
+ * We are only interested in hardirq on/off events:
+ */
+static void tracer_hardirqs_on(void *none, unsigned long a0, unsigned long a1)
+{
+	if (!preempt_trace() && irq_trace())
+		stop_critical_timing(a0, a1);
+}
+
+static void tracer_hardirqs_off(void *none, unsigned long a0, unsigned long a1)
+{
+	if (!preempt_trace() && irq_trace())
+		start_critical_timing(a0, a1);
+}
+
 static int irqsoff_tracer_init(struct trace_array *tr)
 {
 	trace_type = TRACER_IRQS_OFF;
 
+	register_trace_irq_disable(tracer_hardirqs_off, NULL);
+	register_trace_irq_enable(tracer_hardirqs_on, NULL);
 	return __irqsoff_tracer_init(tr);
 }
 
 static void irqsoff_tracer_reset(struct trace_array *tr)
 {
+	unregister_trace_irq_disable(tracer_hardirqs_off, NULL);
+	unregister_trace_irq_enable(tracer_hardirqs_on, NULL);
 	__irqsoff_tracer_reset(tr);
 }
 
@@ -690,21 +648,34 @@ static struct tracer irqsoff_tracer __read_mostly =
 	.allow_instances = true,
 	.use_max_tr	= true,
 };
-# define register_irqsoff(trace) register_tracer(&trace)
-#else
-# define register_irqsoff(trace) do { } while (0)
-#endif
+#endif /*  CONFIG_IRQSOFF_TRACER */
 
 #ifdef CONFIG_PREEMPT_TRACER
+static void tracer_preempt_on(void *none, unsigned long a0, unsigned long a1)
+{
+	if (preempt_trace() && !irq_trace())
+		stop_critical_timing(a0, a1);
+}
+
+static void tracer_preempt_off(void *none, unsigned long a0, unsigned long a1)
+{
+	if (preempt_trace() && !irq_trace())
+		start_critical_timing(a0, a1);
+}
+
 static int preemptoff_tracer_init(struct trace_array *tr)
 {
 	trace_type = TRACER_PREEMPT_OFF;
 
+	register_trace_preempt_disable(tracer_preempt_off, NULL);
+	register_trace_preempt_enable(tracer_preempt_on, NULL);
 	return __irqsoff_tracer_init(tr);
 }
 
 static void preemptoff_tracer_reset(struct trace_array *tr)
 {
+	unregister_trace_preempt_disable(tracer_preempt_off, NULL);
+	unregister_trace_preempt_enable(tracer_preempt_on, NULL);
 	__irqsoff_tracer_reset(tr);
 }
 
@@ -727,23 +698,29 @@ static struct tracer preemptoff_tracer __read_mostly =
 	.allow_instances = true,
 	.use_max_tr	= true,
 };
-# define register_preemptoff(trace) register_tracer(&trace)
-#else
-# define register_preemptoff(trace) do { } while (0)
-#endif
+#endif /* CONFIG_PREEMPT_TRACER */
 
-#if defined(CONFIG_IRQSOFF_TRACER) && \
-	defined(CONFIG_PREEMPT_TRACER)
+#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
 
 static int preemptirqsoff_tracer_init(struct trace_array *tr)
 {
 	trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
 
+	register_trace_irq_disable(tracer_hardirqs_off, NULL);
+	register_trace_irq_enable(tracer_hardirqs_on, NULL);
+	register_trace_preempt_disable(tracer_preempt_off, NULL);
+	register_trace_preempt_enable(tracer_preempt_on, NULL);
+
 	return __irqsoff_tracer_init(tr);
 }
 
 static void preemptirqsoff_tracer_reset(struct trace_array *tr)
 {
+	unregister_trace_irq_disable(tracer_hardirqs_off, NULL);
+	unregister_trace_irq_enable(tracer_hardirqs_on, NULL);
+	unregister_trace_preempt_disable(tracer_preempt_off, NULL);
+	unregister_trace_preempt_enable(tracer_preempt_on, NULL);
+
 	__irqsoff_tracer_reset(tr);
 }
 
@@ -766,115 +743,21 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
 	.allow_instances = true,
 	.use_max_tr	= true,
 };
-
-# define register_preemptirqsoff(trace) register_tracer(&trace)
-#else
-# define register_preemptirqsoff(trace) do { } while (0)
 #endif
 
 __init static int init_irqsoff_tracer(void)
 {
-	register_irqsoff(irqsoff_tracer);
-	register_preemptoff(preemptoff_tracer);
-	register_preemptirqsoff(preemptirqsoff_tracer);
-
-	return 0;
-}
-core_initcall(init_irqsoff_tracer);
-#endif /* IRQSOFF_TRACER || PREEMPTOFF_TRACER */
-
-#ifndef CONFIG_IRQSOFF_TRACER
-static inline void tracer_hardirqs_on(void) { }
-static inline void tracer_hardirqs_off(void) { }
-static inline void tracer_hardirqs_on_caller(unsigned long caller_addr) { }
-static inline void tracer_hardirqs_off_caller(unsigned long caller_addr) { }
+#ifdef CONFIG_IRQSOFF_TRACER
+	register_tracer(&irqsoff_tracer);
 #endif
-
-#ifndef CONFIG_PREEMPT_TRACER
-static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { }
-static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { }
+#ifdef CONFIG_PREEMPT_TRACER
+	register_tracer(&preemptoff_tracer);
 #endif
-
-#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING)
-/* Per-cpu variable to prevent redundant calls when IRQs already off */
-static DEFINE_PER_CPU(int, tracing_irq_cpu);
-
-void trace_hardirqs_on(void)
-{
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
-
-	trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
-	tracer_hardirqs_on();
-
-	this_cpu_write(tracing_irq_cpu, 0);
-}
-EXPORT_SYMBOL(trace_hardirqs_on);
-
-void trace_hardirqs_off(void)
-{
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
-
-	this_cpu_write(tracing_irq_cpu, 1);
-
-	trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
-	tracer_hardirqs_off();
-}
-EXPORT_SYMBOL(trace_hardirqs_off);
-
-__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
-{
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
-
-	trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
-	tracer_hardirqs_on_caller(caller_addr);
-
-	this_cpu_write(tracing_irq_cpu, 0);
-}
-EXPORT_SYMBOL(trace_hardirqs_on_caller);
-
-__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
-{
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
-
-	this_cpu_write(tracing_irq_cpu, 1);
-
-	trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
-	tracer_hardirqs_off_caller(caller_addr);
-}
-EXPORT_SYMBOL(trace_hardirqs_off_caller);
-
-/*
- * Stubs:
- */
-
-void trace_softirqs_on(unsigned long ip)
-{
-}
-
-void trace_softirqs_off(unsigned long ip)
-{
-}
-
-inline void print_irqtrace_events(struct task_struct *curr)
-{
-}
+#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
+	register_tracer(&preemptirqsoff_tracer);
 #endif
 
-#if defined(CONFIG_PREEMPT_TRACER) || \
-	(defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
-void trace_preempt_on(unsigned long a0, unsigned long a1)
-{
-	trace_preempt_enable_rcuidle(a0, a1);
-	tracer_preempt_on(a0, a1);
-}
-
-void trace_preempt_off(unsigned long a0, unsigned long a1)
-{
-	trace_preempt_disable_rcuidle(a0, a1);
-	tracer_preempt_off(a0, a1);
+	return 0;
 }
-#endif
+core_initcall(init_irqsoff_tracer);
+#endif /* IRQSOFF_TRACER || PREEMPTOFF_TRACER */
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
new file mode 100644
index 000000000000..e76b78bf258e
--- /dev/null
+++ b/kernel/trace/trace_preemptirq.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * preemptoff and irqoff tracepoints
+ *
+ * Copyright (C) Joel Fernandes (Google) <joel@joelfernandes.org>
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/preemptirq.h>
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+/* Per-cpu variable to prevent redundant calls when IRQs already off */
+static DEFINE_PER_CPU(int, tracing_irq_cpu);
+
+void trace_hardirqs_on(void)
+{
+	if (!this_cpu_read(tracing_irq_cpu))
+		return;
+
+	trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+	this_cpu_write(tracing_irq_cpu, 0);
+}
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+void trace_hardirqs_off(void)
+{
+	if (this_cpu_read(tracing_irq_cpu))
+		return;
+
+	this_cpu_write(tracing_irq_cpu, 1);
+	trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+{
+	if (!this_cpu_read(tracing_irq_cpu))
+		return;
+
+	trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
+	this_cpu_write(tracing_irq_cpu, 0);
+}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+
+__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+{
+	if (this_cpu_read(tracing_irq_cpu))
+		return;
+
+	this_cpu_write(tracing_irq_cpu, 1);
+	trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
+
+void trace_preempt_on(unsigned long a0, unsigned long a1)
+{
+	trace_preempt_enable_rcuidle(a0, a1);
+}
+
+void trace_preempt_off(unsigned long a0, unsigned long a1)
+{
+	trace_preempt_disable_rcuidle(a0, a1);
+}
+#endif
-- 
cgit v1.2.3


From 44bda4b7d26e9fffed6d7152d98a2e9edaeb2a76 Mon Sep 17 00:00:00 2001
From: Hari Vyas <hari.vyas@broadcom.com>
Date: Tue, 3 Jul 2018 14:35:41 +0530
Subject: PCI: Fix is_added/is_busmaster race condition

When a PCI device is detected, pdev->is_added is set to 1 and proc and
sysfs entries are created.

When the device is removed, pdev->is_added is checked for one and then
device is detached with clearing of proc and sys entries and at end,
pdev->is_added is set to 0.

is_added and is_busmaster are bit fields in pci_dev structure sharing same
memory location.

A strange issue was observed with multiple removal and rescan of a PCIe
NVMe device using sysfs commands where is_added flag was observed as zero
instead of one while removing device and proc,sys entries are not cleared.
This causes issue in later device addition with warning message
"proc_dir_entry" already registered.

Debugging revealed a race condition between the PCI core setting the
is_added bit in pci_bus_add_device() and the NVMe driver reset work-queue
setting the is_busmaster bit in pci_set_master().  As these fields are not
handled atomically, that clears the is_added bit.

Move the is_added bit to a separate private flag variable and use atomic
functions to set and retrieve the device addition state.  This avoids the
race because is_added no longer shares a memory location with is_busmaster.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200283
Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/pci-common.c          |  4 +++-
 arch/powerpc/platforms/powernv/pci-ioda.c |  3 ++-
 arch/powerpc/platforms/pseries/setup.c    |  3 ++-
 drivers/pci/bus.c                         |  6 +++---
 drivers/pci/hotplug/acpiphp_glue.c        |  2 +-
 drivers/pci/pci.h                         | 11 +++++++++++
 drivers/pci/probe.c                       |  4 ++--
 drivers/pci/remove.c                      |  5 +++--
 include/linux/pci.h                       |  1 -
 9 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index fe9733ffffaa..471aac313b89 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -42,6 +42,8 @@
 #include <asm/ppc-pci.h>
 #include <asm/eeh.h>
 
+#include "../../../drivers/pci/pci.h"
+
 /* hose_spinlock protects accesses to the the phb_bitmap. */
 static DEFINE_SPINLOCK(hose_spinlock);
 LIST_HEAD(hose_list);
@@ -1014,7 +1016,7 @@ void pcibios_setup_bus_devices(struct pci_bus *bus)
 		/* Cardbus can call us to add new devices to a bus, so ignore
 		 * those who are already fully discovered
 		 */
-		if (dev->is_added)
+		if (pci_dev_is_added(dev))
 			continue;
 
 		pcibios_setup_device(dev);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5bd0eb6681bc..70b2e1e0f23c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -46,6 +46,7 @@
 
 #include "powernv.h"
 #include "pci.h"
+#include "../../../../drivers/pci/pci.h"
 
 #define PNV_IODA1_M64_NUM	16	/* Number of M64 BARs	*/
 #define PNV_IODA1_M64_SEGS	8	/* Segments per M64 BAR	*/
@@ -3138,7 +3139,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 	struct pci_dn *pdn;
 	int mul, total_vfs;
 
-	if (!pdev->is_physfn || pdev->is_added)
+	if (!pdev->is_physfn || pci_dev_is_added(pdev))
 		return;
 
 	pdn = pci_get_pdn(pdev);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 139f0af6c3d9..8a4868a3964b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -71,6 +71,7 @@
 #include <asm/security_features.h>
 
 #include "pseries.h"
+#include "../../../../drivers/pci/pci.h"
 
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
@@ -664,7 +665,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
 	const int *indexes;
 	struct device_node *dn = pci_device_to_OF_node(pdev);
 
-	if (!pdev->is_physfn || pdev->is_added)
+	if (!pdev->is_physfn || pci_dev_is_added(pdev))
 		return;
 	/*Firmware must support open sriov otherwise dont configure*/
 	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 35b7fc87eac5..5cb40b2518f9 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -330,7 +330,7 @@ void pci_bus_add_device(struct pci_dev *dev)
 		return;
 	}
 
-	dev->is_added = 1;
+	pci_dev_assign_added(dev, true);
 }
 EXPORT_SYMBOL_GPL(pci_bus_add_device);
 
@@ -347,14 +347,14 @@ void pci_bus_add_devices(const struct pci_bus *bus)
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Skip already-added devices */
-		if (dev->is_added)
+		if (pci_dev_is_added(dev))
 			continue;
 		pci_bus_add_device(dev);
 	}
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Skip if device attach failed */
-		if (!dev->is_added)
+		if (!pci_dev_is_added(dev))
 			continue;
 		child = dev->subordinate;
 		if (child)
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 3a17b290df5d..ef0b1b6ba86f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -509,7 +509,7 @@ static void enable_slot(struct acpiphp_slot *slot)
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Assume that newly added devices are powered on already. */
-		if (!dev->is_added)
+		if (!pci_dev_is_added(dev))
 			dev->current_state = PCI_D0;
 	}
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 882f1f9596df..08817253c8a2 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -288,6 +288,7 @@ struct pci_sriov {
 
 /* pci_dev priv_flags */
 #define PCI_DEV_DISCONNECTED 0
+#define PCI_DEV_ADDED 1
 
 static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
 {
@@ -300,6 +301,16 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
 	return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags);
 }
 
+static inline void pci_dev_assign_added(struct pci_dev *dev, bool added)
+{
+	assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added);
+}
+
+static inline bool pci_dev_is_added(const struct pci_dev *dev)
+{
+	return test_bit(PCI_DEV_ADDED, &dev->priv_flags);
+}
+
 #ifdef CONFIG_PCI_ATS
 void pci_restore_ats_state(struct pci_dev *dev);
 #else
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..611adcd9c169 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2433,13 +2433,13 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
 	dev = pci_scan_single_device(bus, devfn);
 	if (!dev)
 		return 0;
-	if (!dev->is_added)
+	if (!pci_dev_is_added(dev))
 		nr++;
 
 	for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
 		dev = pci_scan_single_device(bus, devfn + fn);
 		if (dev) {
-			if (!dev->is_added)
+			if (!pci_dev_is_added(dev))
 				nr++;
 			dev->multifunction = 1;
 		}
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 6f072eae4f7a..5e3d0dced2b8 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,11 +19,12 @@ static void pci_stop_dev(struct pci_dev *dev)
 {
 	pci_pme_active(dev, false);
 
-	if (dev->is_added) {
+	if (pci_dev_is_added(dev)) {
 		device_release_driver(&dev->dev);
 		pci_proc_detach_device(dev);
 		pci_remove_sysfs_dev_files(dev);
-		dev->is_added = 0;
+
+		pci_dev_assign_added(dev, false);
 	}
 
 	if (dev->bus->self)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index abd5d5e17aee..c133ccfa002e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -368,7 +368,6 @@ struct pci_dev {
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
 	unsigned int	multifunction:1;	/* Multi-function device */
 
-	unsigned int	is_added:1;
 	unsigned int	is_busmaster:1;		/* Is busmaster */
 	unsigned int	no_msi:1;		/* May not use MSI */
 	unsigned int	no_64bit_msi:1; 	/* May only use 32-bit MSIs */
-- 
cgit v1.2.3


From c29c2ebd2ae0066c026045a21aa33ccbfcd8bb3c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 30 Jul 2018 20:43:51 -0700
Subject: net: update real_num_rx_queues even when !CONFIG_SYSFS

We used to depend on real_num_rx_queues as a upper bound for sanity
checks.  For AF_XDP socket validation it's useful if the check behaves
the same regardless of CONFIG_SYSFS setting.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c917467a2c7..3bf7e93c9e96 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3431,8 +3431,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
 #else
 static inline int netif_set_real_num_rx_queues(struct net_device *dev,
-						unsigned int rxq)
+						unsigned int rxqs)
 {
+	dev->real_num_rx_queues = rxqs;
 	return 0;
 }
 #endif
-- 
cgit v1.2.3


From 84c6b86875e01a08a0daa6fdd4a01b36bf0bf0b2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 30 Jul 2018 20:43:53 -0700
Subject: xsk: don't allow umem replace at stack level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently drivers have to check if they already have a umem
installed for a given queue and return an error if so.  Make
better use of XDP_QUERY_XSK_UMEM and move this functionality
to the core.

We need to keep rtnl across the calls now.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 ++++---
 net/xdp/xdp_umem.c        | 37 ++++++++++++++++++++++++++++---------
 2 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3bf7e93c9e96..282e2e95ad5b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -872,10 +872,10 @@ struct netdev_bpf {
 		struct {
 			struct bpf_offloaded_map *offmap;
 		};
-		/* XDP_SETUP_XSK_UMEM */
+		/* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */
 		struct {
-			struct xdp_umem *umem;
-			u16 queue_id;
+			struct xdp_umem *umem; /* out for query*/
+			u16 queue_id; /* in for query */
 		} xsk;
 	};
 };
@@ -3568,6 +3568,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags);
 u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
 		    enum bpf_netdev_command cmd);
+int xdp_umem_query(struct net_device *dev, u16 queue_id);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index c199d66b5f3f..911ca6d3cb5a 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -11,6 +11,8 @@
 #include <linux/slab.h>
 #include <linux/bpf.h>
 #include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
 
 #include "xdp_umem.h"
 #include "xsk_queue.h"
@@ -40,6 +42,21 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 	}
 }
 
+int xdp_umem_query(struct net_device *dev, u16 queue_id)
+{
+	struct netdev_bpf bpf;
+
+	ASSERT_RTNL();
+
+	memset(&bpf, 0, sizeof(bpf));
+	bpf.command = XDP_QUERY_XSK_UMEM;
+	bpf.xsk.queue_id = queue_id;
+
+	if (!dev->netdev_ops->ndo_bpf)
+		return 0;
+	return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
+}
+
 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
 			u32 queue_id, u16 flags)
 {
@@ -62,28 +79,30 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
 	bpf.command = XDP_QUERY_XSK_UMEM;
 
 	rtnl_lock();
-	err = dev->netdev_ops->ndo_bpf(dev, &bpf);
-	rtnl_unlock();
-
-	if (err)
-		return force_zc ? -ENOTSUPP : 0;
+	err = xdp_umem_query(dev, queue_id);
+	if (err) {
+		err = err < 0 ? -ENOTSUPP : -EBUSY;
+		goto err_rtnl_unlock;
+	}
 
 	bpf.command = XDP_SETUP_XSK_UMEM;
 	bpf.xsk.umem = umem;
 	bpf.xsk.queue_id = queue_id;
 
-	rtnl_lock();
 	err = dev->netdev_ops->ndo_bpf(dev, &bpf);
-	rtnl_unlock();
-
 	if (err)
-		return force_zc ? err : 0; /* fail or fallback */
+		goto err_rtnl_unlock;
+	rtnl_unlock();
 
 	dev_hold(dev);
 	umem->dev = dev;
 	umem->queue_id = queue_id;
 	umem->zc = true;
 	return 0;
+
+err_rtnl_unlock:
+	rtnl_unlock();
+	return force_zc ? err : 0; /* fail or fallback */
 }
 
 static void xdp_umem_clear_dev(struct xdp_umem *umem)
-- 
cgit v1.2.3


From 016583d7030cec9b69e0d55269a5967f4ee871d2 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Tue, 31 Jul 2018 10:10:51 -0400
Subject: sunrpc: Change rpc_print_iostats to rpc_clnt_show_stats and handle
 rpc_clnt clones

The existing rpc_print_iostats has a few shortcomings.  First, the naming
is not consistent with other functions in the kernel that display stats.
Second, it is really displaying stats for an rpc_clnt structure as it
displays both xprt stats and per-op stats.  Third, it does not handle
rpc_clnt clones, which is important for the one in-kernel tree caller
of this function, the NFS client's nfs_show_stats function.

Fix all of the above by renaming the rpc_print_iostats to
rpc_clnt_show_stats and looping through any rpc_clnt clones via
cl_parent.

Once this interface is fixed, this addresses a problem with NFSv4.
Before this patch, the /proc/self/mountstats always showed incorrect
counts for NFSv4 lease and session related opcodes such as SEQUENCE,
RENEW, SETCLIENTID, CREATE_SESSION, etc.  These counts were always 0
even though many ops would go over the wire.  The reason for this is
there are multiple rpc_clnt structures allocated for any given NFSv4
mount, and inside nfs_show_stats() we callled into rpc_print_iostats()
which only handled one of them, nfs_server->client.  Fix these counts
by calling sunrpc's new rpc_clnt_show_stats() function, which handles
cloned rpc_clnt structs and prints the stats together.

Note that one side-effect of the above is that multiple mounts from
the same NFS server will show identical counts in the above ops due
to the fact the one rpc_clnt (representing the NFSv4 client state)
is shared across mounts.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/super.c                 |  2 +-
 include/linux/sunrpc/metrics.h |  4 ++--
 net/sunrpc/stats.c             | 17 ++++++++++++-----
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5e470e233c83..bdf39fa1bfbc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -884,7 +884,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
 #endif
 	seq_printf(m, "\n");
 
-	rpc_print_iostats(m, nfss->client);
+	rpc_clnt_show_stats(m, nfss->client);
 
 	return 0;
 }
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
index 9baed7b355b2..1b3751327575 100644
--- a/include/linux/sunrpc/metrics.h
+++ b/include/linux/sunrpc/metrics.h
@@ -82,7 +82,7 @@ void			rpc_count_iostats(const struct rpc_task *,
 					  struct rpc_iostats *);
 void			rpc_count_iostats_metrics(const struct rpc_task *,
 					  struct rpc_iostats *);
-void			rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void			rpc_clnt_show_stats(struct seq_file *, struct rpc_clnt *);
 void			rpc_free_iostats(struct rpc_iostats *);
 
 #else  /*  CONFIG_PROC_FS  */
@@ -95,7 +95,7 @@ static inline void rpc_count_iostats_metrics(const struct rpc_task *task,
 {
 }
 
-static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {}
+static inline void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) {}
 static inline void rpc_free_iostats(struct rpc_iostats *stats) {}
 
 #endif  /*  CONFIG_PROC_FS  */
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 32adddd7fb78..ffae7c2245b1 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -235,13 +235,12 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
 		   ktime_to_ms(stats->om_execute));
 }
 
-void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
 {
-	struct rpc_iostats *stats = clnt->cl_metrics;
 	struct rpc_xprt *xprt;
 	unsigned int op, maxproc = clnt->cl_maxproc;
 
-	if (!stats)
+	if (!clnt->cl_metrics)
 		return;
 
 	seq_printf(seq, "\tRPC iostats version: %s  ", RPC_IOSTATS_VERS);
@@ -256,10 +255,18 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 
 	seq_printf(seq, "\tper-op statistics\n");
 	for (op = 0; op < maxproc; op++) {
-		_print_rpc_iostats(seq, &stats[op], op, clnt->cl_procinfo);
+		struct rpc_iostats stats = {};
+		struct rpc_clnt *next = clnt;
+		do {
+			_add_rpc_iostats(&stats, &next->cl_metrics[op]);
+			if (next == next->cl_parent)
+				break;
+			next = next->cl_parent;
+		} while (next);
+		_print_rpc_iostats(seq, &stats, op, clnt->cl_procinfo);
 	}
 }
-EXPORT_SYMBOL_GPL(rpc_print_iostats);
+EXPORT_SYMBOL_GPL(rpc_clnt_show_stats);
 
 /*
  * Register/unregister RPC proc files
-- 
cgit v1.2.3


From 0f90be132cbf1537d87a6a8b9e80867adac892f6 Mon Sep 17 00:00:00 2001
From: Bill Baker <Bill.Baker@Oracle.com>
Date: Tue, 19 Jun 2018 16:24:58 -0500
Subject: NFSv4 client live hangs after live data migration recovery

After a live data migration event at the NFS server, the client may send
I/O requests to the wrong server, causing a live hang due to repeated
recovery events.  On the wire, this will appear as an I/O request failing
with NFS4ERR_BADSESSION, followed by successful CREATE_SESSION, repeatedly.
NFS4ERR_BADSSESSION is returned because the session ID being used was
issued by the other server and is not valid at the old server.

The failure is caused by async worker threads having cached the transport
(xprt) in the rpc_task structure.  After the migration recovery completes,
the task is redispatched and the task resends the request to the wrong
server based on the old value still present in tk_xprt.

The solution is to recompute the tk_xprt field of the rpc_task structure
so that the request goes to the correct server.

Signed-off-by: Bill Baker <bill.baker@oracle.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Helen Chao <helen.chao@oracle.com>
Fixes: fb43d17210ba ("SUNRPC: Use the multipath iterator to assign a ...")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c           |  9 ++++++++-
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 28 ++++++++++++++++++++--------
 3 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3b28c0ac10bc..bddba460643a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -581,8 +581,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
 		ret = -EIO;
 	return ret;
 out_retry:
-	if (ret == 0)
+	if (ret == 0) {
 		exception->retry = 1;
+		/*
+		 * For NFS4ERR_MOVED, the client transport will need to
+		 * be recomputed after migration recovery has completed.
+		 */
+		if (errorcode == -NFS4ERR_MOVED)
+			rpc_task_release_transport(task);
+	}
 	return ret;
 }
 
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 9b11b6a0978c..73d5c4a870fa 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -156,6 +156,7 @@ int		rpc_switch_client_transport(struct rpc_clnt *,
 
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
+void		rpc_task_release_transport(struct rpc_task *);
 void		rpc_task_release_client(struct rpc_task *);
 
 int		rpcb_create_local(struct net *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d839c33ae7d9..0d85425b1e07 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -965,10 +965,20 @@ out:
 }
 EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 
+void rpc_task_release_transport(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+
+	if (xprt) {
+		task->tk_xprt = NULL;
+		xprt_put(xprt);
+	}
+}
+EXPORT_SYMBOL_GPL(rpc_task_release_transport);
+
 void rpc_task_release_client(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
-	struct rpc_xprt *xprt = task->tk_xprt;
 
 	if (clnt != NULL) {
 		/* Remove from client task list */
@@ -979,12 +989,14 @@ void rpc_task_release_client(struct rpc_task *task)
 
 		rpc_release_client(clnt);
 	}
+	rpc_task_release_transport(task);
+}
 
-	if (xprt != NULL) {
-		task->tk_xprt = NULL;
-
-		xprt_put(xprt);
-	}
+static
+void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+	if (!task->tk_xprt)
+		task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
 }
 
 static
@@ -992,8 +1004,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 {
 
 	if (clnt != NULL) {
-		if (task->tk_xprt == NULL)
-			task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
+		rpc_task_set_transport(task, clnt);
 		task->tk_client = clnt;
 		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
@@ -1512,6 +1523,7 @@ call_start(struct rpc_task *task)
 		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
+	rpc_task_set_transport(task, clnt);
 }
 
 /*
-- 
cgit v1.2.3


From e6476c21447c4b17c47e476aade6facf050f31e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:45:07 +0200
Subject: net: remove bogus RCU annotations on socket.wq

We never use RCU protection for it, just a lot of cargo-cult
rcu_deference_protects calls.

Note that we do keep the kfree_rcu call for it, as the references through
struct sock are RCU protected and thus might require a grace period before
freeing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  2 +-
 include/net/sock.h  |  2 +-
 net/socket.c        | 10 ++++------
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 6554d3ba4396..e0930678c8bf 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -114,7 +114,7 @@ struct socket {
 
 	unsigned long		flags;
 
-	struct socket_wq __rcu	*wq;
+	struct socket_wq	*wq;
 
 	struct file		*file;
 	struct sock		*sk;
diff --git a/include/net/sock.h b/include/net/sock.h
index 2afea5d1bdfe..433f45fc2d68 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1788,7 +1788,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
 	WARN_ON(parent->sk);
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_wq = parent->wq;
+	rcu_assign_pointer(sk->sk_wq, parent->wq);
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
 	sk->sk_uid = SOCK_INODE(parent)->i_uid;
diff --git a/net/socket.c b/net/socket.c
index 5b7df6695f4f..475247e347ae 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -251,7 +251,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	init_waitqueue_head(&wq->wait);
 	wq->fasync_list = NULL;
 	wq->flags = 0;
-	RCU_INIT_POINTER(ei->socket.wq, wq);
+	ei->socket.wq = wq;
 
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
@@ -265,11 +265,9 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 static void sock_destroy_inode(struct inode *inode)
 {
 	struct socket_alloc *ei;
-	struct socket_wq *wq;
 
 	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	wq = rcu_dereference_protected(ei->socket.wq, 1);
-	kfree_rcu(wq, rcu);
+	kfree_rcu(ei->socket.wq, rcu);
 	kmem_cache_free(sock_inode_cachep, ei);
 }
 
@@ -603,7 +601,7 @@ static void __sock_release(struct socket *sock, struct inode *inode)
 		module_put(owner);
 	}
 
-	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
+	if (sock->wq->fasync_list)
 		pr_err("%s: fasync list not empty!\n", __func__);
 
 	if (!sock->file) {
@@ -1181,7 +1179,7 @@ static int sock_fasync(int fd, struct file *filp, int on)
 		return -EINVAL;
 
 	lock_sock(sk);
-	wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
+	wq = sock->wq;
 	fasync_helper(fd, filp, on, &wq->fasync_list);
 
 	if (!wq->fasync_list)
-- 
cgit v1.2.3


From 7209049d40dc37791ce0f3738965296f30e26044 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 31 Jul 2018 17:27:02 -0400
Subject: dm kcopyd: return void from dm_kcopyd_copy()

dm_kcopyd_copy() only ever returns 0 so there is no need for callers to
account for possible failure.  Same goes for dm_kcopyd_zero().

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c  | 16 ++++------------
 drivers/md/dm-kcopyd.c        | 16 +++++++---------
 drivers/md/dm-raid1.c         | 17 +++++------------
 drivers/md/dm-thin.c          | 23 +++--------------------
 drivers/md/dm-zoned-reclaim.c |  6 ++----
 include/linux/dm-kcopyd.h     | 12 ++++++------
 6 files changed, 27 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 44df244807e5..a53413371725 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1188,9 +1188,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	queue_continuation(mg->cache->wq, &mg->k);
 }
 
-static int copy(struct dm_cache_migration *mg, bool promote)
+static void copy(struct dm_cache_migration *mg, bool promote)
 {
-	int r;
 	struct dm_io_region o_region, c_region;
 	struct cache *cache = mg->cache;
 
@@ -1203,11 +1202,9 @@ static int copy(struct dm_cache_migration *mg, bool promote)
 	c_region.count = cache->sectors_per_block;
 
 	if (promote)
-		r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
+		dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
 	else
-		r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
-
-	return r;
+		dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
 }
 
 static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
@@ -1449,12 +1446,7 @@ static void mg_full_copy(struct work_struct *ws)
 	}
 
 	init_continuation(&mg->k, mg_upgrade_lock);
-
-	if (copy(mg, is_policy_promote)) {
-		DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-		mg->k.input = BLK_STS_IOERR;
-		mg_complete(mg, false);
-	}
+	copy(mg, is_policy_promote);
 }
 
 static void mg_copy(struct work_struct *ws)
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3c7547a3c371..cc101f3ec42c 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -741,9 +741,9 @@ static void split_job(struct kcopyd_job *master_job)
 	}
 }
 
-int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
-		   unsigned int num_dests, struct dm_io_region *dests,
-		   unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
+void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
+		    unsigned int num_dests, struct dm_io_region *dests,
+		    unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
 {
 	struct kcopyd_job *job;
 	int i;
@@ -818,16 +818,14 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 		job->progress = 0;
 		split_job(job);
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL(dm_kcopyd_copy);
 
-int dm_kcopyd_zero(struct dm_kcopyd_client *kc,
-		   unsigned num_dests, struct dm_io_region *dests,
-		   unsigned flags, dm_kcopyd_notify_fn fn, void *context)
+void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
+		    unsigned num_dests, struct dm_io_region *dests,
+		    unsigned flags, dm_kcopyd_notify_fn fn, void *context)
 {
-	return dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
+	dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
 }
 EXPORT_SYMBOL(dm_kcopyd_zero);
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 5903e492bb34..79eab1071ec2 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -326,9 +326,8 @@ static void recovery_complete(int read_err, unsigned long write_err,
 	dm_rh_recovery_end(reg, !(read_err || write_err));
 }
 
-static int recover(struct mirror_set *ms, struct dm_region *reg)
+static void recover(struct mirror_set *ms, struct dm_region *reg)
 {
-	int r;
 	unsigned i;
 	struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
 	struct mirror *m;
@@ -367,10 +366,8 @@ static int recover(struct mirror_set *ms, struct dm_region *reg)
 	if (!errors_handled(ms))
 		set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
 
-	r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
-			   flags, recovery_complete, reg);
-
-	return r;
+	dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
+		       flags, recovery_complete, reg);
 }
 
 static void reset_ms_flags(struct mirror_set *ms)
@@ -388,7 +385,6 @@ static void do_recovery(struct mirror_set *ms)
 {
 	struct dm_region *reg;
 	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-	int r;
 
 	/*
 	 * Start quiescing some regions.
@@ -398,11 +394,8 @@ static void do_recovery(struct mirror_set *ms)
 	/*
 	 * Copy any already quiesced regions.
 	 */
-	while ((reg = dm_rh_recovery_start(ms->rh))) {
-		r = recover(ms, reg);
-		if (r)
-			dm_rh_recovery_end(reg, 0);
-	}
+	while ((reg = dm_rh_recovery_start(ms->rh)))
+		recover(ms, reg);
 
 	/*
 	 * Update the in sync flag.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c44477d7a9ea..5997d6808b57 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1220,18 +1220,13 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
 static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
 		    sector_t begin, sector_t end)
 {
-	int r;
 	struct dm_io_region to;
 
 	to.bdev = tc->pool_dev->bdev;
 	to.sector = begin;
 	to.count = end - begin;
 
-	r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
-	if (r < 0) {
-		DMERR_LIMIT("dm_kcopyd_zero() failed");
-		copy_complete(1, 1, m);
-	}
+	dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
 }
 
 static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
@@ -1257,7 +1252,6 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 			  struct dm_bio_prison_cell *cell, struct bio *bio,
 			  sector_t len)
 {
-	int r;
 	struct pool *pool = tc->pool;
 	struct dm_thin_new_mapping *m = get_next_mapping(pool);
 
@@ -1296,19 +1290,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 		to.sector = data_dest * pool->sectors_per_block;
 		to.count = len;
 
-		r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
-				   0, copy_complete, m);
-		if (r < 0) {
-			DMERR_LIMIT("dm_kcopyd_copy() failed");
-			copy_complete(1, 1, m);
-
-			/*
-			 * We allow the zero to be issued, to simplify the
-			 * error path.  Otherwise we'd need to start
-			 * worrying about decrementing the prepare_actions
-			 * counter.
-			 */
-		}
+		dm_kcopyd_copy(pool->copier, &from, 1, &to,
+			       0, copy_complete, m);
 
 		/*
 		 * Do we need to zero a tail region?
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index 44a119e12f1a..edf4b95eb075 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -161,10 +161,8 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
 
 		/* Copy the valid region */
 		set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags);
-		ret = dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
-				     dmz_reclaim_kcopy_end, zrc);
-		if (ret)
-			return ret;
+		dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
+			       dmz_reclaim_kcopy_end, zrc);
 
 		/* Wait for copy to complete */
 		wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY,
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index cfac8588ed56..e42de7750c88 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -62,9 +62,9 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
 typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err,
 				    void *context);
 
-int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
-		   unsigned num_dests, struct dm_io_region *dests,
-		   unsigned flags, dm_kcopyd_notify_fn fn, void *context);
+void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
+		    unsigned num_dests, struct dm_io_region *dests,
+		    unsigned flags, dm_kcopyd_notify_fn fn, void *context);
 
 /*
  * Prepare a callback and submit it via the kcopyd thread.
@@ -81,9 +81,9 @@ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
 				 dm_kcopyd_notify_fn fn, void *context);
 void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
 
-int dm_kcopyd_zero(struct dm_kcopyd_client *kc,
-		   unsigned num_dests, struct dm_io_region *dests,
-		   unsigned flags, dm_kcopyd_notify_fn fn, void *context);
+void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
+		    unsigned num_dests, struct dm_io_region *dests,
+		    unsigned flags, dm_kcopyd_notify_fn fn, void *context);
 
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_DM_KCOPYD_H */
-- 
cgit v1.2.3


From 546c596cf5491fda1516536e049c6a36836eb884 Mon Sep 17 00:00:00 2001
From: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Date: Wed, 18 Jul 2018 09:40:35 +0800
Subject: PCI: Unify PCI and normal DMA direction definitions

Current DMA direction definitions in pci-dma-compat.h and dma-direction.h
are mirrored in value.  Unify them to enhance readability and avoid
possible inconsistency.

Signed-off-by: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Joey Zheng <yu.zheng@hxt-semitech.com>
---
 include/linux/dma-direction.h  | 6 ++----
 include/linux/pci-dma-compat.h | 8 ++++----
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-direction.h b/include/linux/dma-direction.h
index 3649a031893a..9c96e30e6a0b 100644
--- a/include/linux/dma-direction.h
+++ b/include/linux/dma-direction.h
@@ -1,14 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_DMA_DIRECTION_H
 #define _LINUX_DMA_DIRECTION_H
-/*
- * These definitions mirror those in pci.h, so they can be used
- * interchangeably with their PCI_ counterparts.
- */
+
 enum dma_data_direction {
 	DMA_BIDIRECTIONAL = 0,
 	DMA_TO_DEVICE = 1,
 	DMA_FROM_DEVICE = 2,
 	DMA_NONE = 3,
 };
+
 #endif
diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h
index 0dd1a3f7b309..c3f1b44ade29 100644
--- a/include/linux/pci-dma-compat.h
+++ b/include/linux/pci-dma-compat.h
@@ -8,10 +8,10 @@
 #include <linux/dma-mapping.h>
 
 /* This defines the direction arg to the DMA mapping routines. */
-#define PCI_DMA_BIDIRECTIONAL	0
-#define PCI_DMA_TODEVICE	1
-#define PCI_DMA_FROMDEVICE	2
-#define PCI_DMA_NONE		3
+#define PCI_DMA_BIDIRECTIONAL	DMA_BIDIRECTIONAL
+#define PCI_DMA_TODEVICE	DMA_TO_DEVICE
+#define PCI_DMA_FROMDEVICE	DMA_FROM_DEVICE
+#define PCI_DMA_NONE		DMA_NONE
 
 static inline void *
 pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
-- 
cgit v1.2.3


From bb276262e88dae52cc717bb636b7468f66bb234e Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 27 Jul 2018 11:33:13 -0700
Subject: mtd: spi-nor: only apply reset hacks to broken hardware

Commit 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when
exiting") is the latest from a long history of attempts to add reboot
handling to handle stateful addressing modes on SPI flash. Some prior
mostly-related discussions:

http://lists.infradead.org/pipermail/linux-mtd/2013-March/046343.html
[PATCH 1/3] mtd: m25p80: utilize dedicated 4-byte addressing commands

http://lists.infradead.org/pipermail/barebox/2014-September/020682.html
[RFC] MTD m25p80 3-byte addressing and boot problem

http://lists.infradead.org/pipermail/linux-mtd/2015-February/057683.html
[PATCH 2/2] m25p80: if supported put chip to deep power down if not used

Previously, attempts to add reboot-time software reset handling were
rejected, but the latest attempt was not.

Quick summary of the problem:
Some systems (e.g., boot ROM or bootloader) assume that they can read
initial boot code from their SPI flash using 3-byte addressing. If the
flash is left in 4-byte mode after reset, these systems won't boot. The
above patch provided a shutdown/remove hook to attempt to reset the
addressing mode before we reboot. Notably, this patch misses out on
huge classes of unexpected reboots (e.g., crashes, watchdog resets).

Unfortunately, it is essentially impossible to solve this problem 100%:
if your system doesn't know how to reset the SPI flash to power-on
defaults at initialization time, no amount of software can really rescue
you -- there will always be a chance of some unexpected reset that
leaves your flash in an addressing mode that your boot sequence didn't
expect.

While it is not directly harmful to perform hacks like the
aforementioned commit on all 4-byte addressing flash, a
properly-designed system should not need the hack -- and in fact,
providing this hack may mask the fact that a given system is indeed
broken. So this patch attempts to apply this unsound hack more narrowly,
providing a strong suggestion to developers and system designers that
this is truly a hack. With luck, system designers can catch their errors
early on in their development cycle, rather than applying this hack long
term. But apparently enough systems are out in the wild that we still
have to provide this hack.

Document a new device tree property to denote systems that do not have a
proper hardware (or software) reset mechanism, and apply the hack (with
a loud warning) only in this case.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 .../devicetree/bindings/mtd/jedec,spi-nor.txt          |  9 +++++++++
 drivers/mtd/spi-nor/spi-nor.c                          | 18 ++++++++++++++++--
 include/linux/mtd/spi-nor.h                            |  1 +
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 956bb046e599..f03be904d3c2 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -69,6 +69,15 @@ Optional properties:
                    all chips and support for it can not be detected at runtime.
                    Refer to your chips' datasheet to check if this is supported
                    by your chip.
+- broken-flash-reset : Some flash devices utilize stateful addressing modes
+		   (e.g., for 32-bit addressing) which need to be managed
+		   carefully by a system. Because these sorts of flash don't
+		   have a standardized software reset command, and because some
+		   systems don't toggle the flash RESET# pin upon system reset
+		   (if the pin even exists at all), there are systems which
+		   cannot reboot properly if the flash is left in the "wrong"
+		   state. This boolean flag can be used on such systems, to
+		   denote the absence of a reliable reset mechanism.
 
 Example:
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index d9c368c44194..f028277fb1ce 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2757,8 +2757,18 @@ static int spi_nor_init(struct spi_nor *nor)
 
 	if ((nor->addr_width == 4) &&
 	    (JEDEC_MFR(nor->info) != SNOR_MFR_SPANSION) &&
-	    !(nor->info->flags & SPI_NOR_4B_OPCODES))
+	    !(nor->info->flags & SPI_NOR_4B_OPCODES)) {
+		/*
+		 * If the RESET# pin isn't hooked up properly, or the system
+		 * otherwise doesn't perform a reset command in the boot
+		 * sequence, it's impossible to 100% protect against unexpected
+		 * reboots (e.g., crashes). Warn the user (or hopefully, system
+		 * designer) that this is bad.
+		 */
+		WARN_ONCE(nor->flags & SNOR_F_BROKEN_RESET,
+			  "enabling reset hack; may not recover from unexpected reboots\n");
 		set_4byte(nor, nor->info, 1);
+	}
 
 	return 0;
 }
@@ -2781,7 +2791,8 @@ void spi_nor_restore(struct spi_nor *nor)
 	/* restore the addressing mode */
 	if ((nor->addr_width == 4) &&
 	    (JEDEC_MFR(nor->info) != SNOR_MFR_SPANSION) &&
-	    !(nor->info->flags & SPI_NOR_4B_OPCODES))
+	    !(nor->info->flags & SPI_NOR_4B_OPCODES) &&
+	    (nor->flags & SNOR_F_BROKEN_RESET))
 		set_4byte(nor, nor->info, 0);
 }
 EXPORT_SYMBOL_GPL(spi_nor_restore);
@@ -2911,6 +2922,9 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 		params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 	}
 
+	if (of_property_read_bool(np, "broken-flash-reset"))
+		nor->flags |= SNOR_F_BROKEN_RESET;
+
 	/* Some devices cannot do fast-read, no matter what DT tells us */
 	if (info->flags & SPI_NOR_NO_FR)
 		params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index e60da0d34cc1..c922e97f205a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -235,6 +235,7 @@ enum spi_nor_option_flags {
 	SNOR_F_S3AN_ADDR_DEFAULT = BIT(3),
 	SNOR_F_READY_XSR_RDY	= BIT(4),
 	SNOR_F_USE_CLSR		= BIT(5),
+	SNOR_F_BROKEN_RESET	= BIT(6),
 };
 
 /**
-- 
cgit v1.2.3


From 3d3fe3c05d5a17ebdf55b936c51017c127c0ed44 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jul 2018 15:31:52 +0200
Subject: mtd: rawnand: allocate dynamically ONFI parameters during detection

Now that it is possible to do dynamic allocations during the
identification phase, convert the onfi_params structure (which is only
needed with ONFI compliant chips) into a pointer that will be allocated
only if needed.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c    | 54 +++++++++++++++++++++++--------------
 drivers/mtd/nand/raw/nand_micron.c  |  7 ++---
 drivers/mtd/nand/raw/nand_timings.c | 12 ++++-----
 include/linux/mtd/rawnand.h         |  6 ++---
 4 files changed, 47 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 00e80781124a..d527e448ce19 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5151,6 +5151,8 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_onfi_params *p;
+	struct onfi_params *onfi;
+	int onfi_version = 0;
 	char id[4];
 	int i, ret, val;
 
@@ -5206,21 +5208,19 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 	/* Check version */
 	val = le16_to_cpu(p->revision);
 	if (val & ONFI_VERSION_2_3)
-		chip->parameters.onfi.version = 23;
+		onfi_version = 23;
 	else if (val & ONFI_VERSION_2_2)
-		chip->parameters.onfi.version = 22;
+		onfi_version = 22;
 	else if (val & ONFI_VERSION_2_1)
-		chip->parameters.onfi.version = 21;
+		onfi_version = 21;
 	else if (val & ONFI_VERSION_2_0)
-		chip->parameters.onfi.version = 20;
+		onfi_version = 20;
 	else if (val & ONFI_VERSION_1_0)
-		chip->parameters.onfi.version = 10;
+		onfi_version = 10;
 
-	if (!chip->parameters.onfi.version) {
+	if (!onfi_version) {
 		pr_info("unsupported ONFI version: %d\n", val);
 		goto free_onfi_param_page;
-	} else {
-		ret = 1;
 	}
 
 	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
@@ -5257,7 +5257,7 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 	if (p->ecc_bits != 0xff) {
 		chip->ecc_strength_ds = p->ecc_bits;
 		chip->ecc_step_ds = 512;
-	} else if (chip->parameters.onfi.version >= 21 &&
+	} else if (onfi_version >= 21 &&
 		(le16_to_cpu(p->features) & ONFI_FEATURE_EXT_PARAM_PAGE)) {
 
 		/*
@@ -5284,19 +5284,33 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 		bitmap_set(chip->parameters.set_feature_list,
 			   ONFI_FEATURE_ADDR_TIMING_MODE, 1);
 	}
-	chip->parameters.onfi.tPROG = le16_to_cpu(p->t_prog);
-	chip->parameters.onfi.tBERS = le16_to_cpu(p->t_bers);
-	chip->parameters.onfi.tR = le16_to_cpu(p->t_r);
-	chip->parameters.onfi.tCCS = le16_to_cpu(p->t_ccs);
-	chip->parameters.onfi.async_timing_mode =
-		le16_to_cpu(p->async_timing_mode);
-	chip->parameters.onfi.vendor_revision =
-		le16_to_cpu(p->vendor_revision);
-	memcpy(chip->parameters.onfi.vendor, p->vendor,
-	       sizeof(p->vendor));
 
+	onfi = kzalloc(sizeof(*onfi), GFP_KERNEL);
+	if (!onfi) {
+		ret = -ENOMEM;
+		goto free_model;
+	}
+
+	onfi->version = onfi_version;
+	onfi->tPROG = le16_to_cpu(p->t_prog);
+	onfi->tBERS = le16_to_cpu(p->t_bers);
+	onfi->tR = le16_to_cpu(p->t_r);
+	onfi->tCCS = le16_to_cpu(p->t_ccs);
+	onfi->async_timing_mode = le16_to_cpu(p->async_timing_mode);
+	onfi->vendor_revision = le16_to_cpu(p->vendor_revision);
+	memcpy(onfi->vendor, p->vendor, sizeof(p->vendor));
+	chip->parameters.onfi = onfi;
+
+	/* Identification done, free the full ONFI parameter page and exit */
+	kfree(p);
+
+	return 1;
+
+free_model:
+	kfree(chip->parameters.model);
 free_onfi_param_page:
 	kfree(p);
+
 	return ret;
 }
 
@@ -5693,7 +5707,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 		}
 	}
 
-	chip->parameters.onfi.version = 0;
 	if (!type->name || !type->pagesize) {
 		/* Check if the chip is ONFI compliant */
 		ret = nand_flash_detect_onfi(chip);
@@ -6031,6 +6044,7 @@ static int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 static void nand_scan_ident_cleanup(struct nand_chip *chip)
 {
 	kfree(chip->parameters.model);
+	kfree(chip->parameters.onfi);
 }
 
 static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index 656947d91841..f5dc0a7a2456 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -88,9 +88,10 @@ static int micron_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
 static int micron_nand_onfi_init(struct nand_chip *chip)
 {
 	struct nand_parameters *p = &chip->parameters;
-	struct nand_onfi_vendor_micron *micron = (void *)p->onfi.vendor;
 
-	if (chip->parameters.onfi.version && p->onfi.vendor_revision) {
+	if (p->onfi) {
+		struct nand_onfi_vendor_micron *micron = (void *)p->onfi->vendor;
+
 		chip->read_retries = micron->read_retry_options;
 		chip->setup_read_retry = micron_nand_setup_read_retry;
 	}
@@ -382,7 +383,7 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip)
 	u8 id[5];
 	int ret;
 
-	if (!chip->parameters.onfi.version)
+	if (!chip->parameters.onfi)
 		return MICRON_ON_DIE_UNSUPPORTED;
 
 	if (chip->bits_per_cell != 1)
diff --git a/drivers/mtd/nand/raw/nand_timings.c b/drivers/mtd/nand/raw/nand_timings.c
index 9bb599106a31..ebc7b5f76f77 100644
--- a/drivers/mtd/nand/raw/nand_timings.c
+++ b/drivers/mtd/nand/raw/nand_timings.c
@@ -294,6 +294,7 @@ int onfi_fill_data_interface(struct nand_chip *chip,
 			     int timing_mode)
 {
 	struct nand_data_interface *iface = &chip->data_interface;
+	struct onfi_params *onfi = chip->parameters.onfi;
 
 	if (type != NAND_SDR_IFACE)
 		return -EINVAL;
@@ -308,17 +309,16 @@ int onfi_fill_data_interface(struct nand_chip *chip,
 	 * tPROG, tBERS, tR and tCCS.
 	 * These information are part of the ONFI parameter page.
 	 */
-	if (chip->parameters.onfi.version) {
-		struct nand_parameters *params = &chip->parameters;
+	if (onfi) {
 		struct nand_sdr_timings *timings = &iface->timings.sdr;
 
 		/* microseconds -> picoseconds */
-		timings->tPROG_max = 1000000ULL * params->onfi.tPROG;
-		timings->tBERS_max = 1000000ULL * params->onfi.tBERS;
-		timings->tR_max = 1000000ULL * params->onfi.tR;
+		timings->tPROG_max = 1000000ULL * onfi->tPROG;
+		timings->tBERS_max = 1000000ULL * onfi->tBERS;
+		timings->tR_max = 1000000ULL * onfi->tR;
 
 		/* nanoseconds -> picoseconds */
-		timings->tCCS_min = 1000UL * params->onfi.tCCS;
+		timings->tCCS_min = 1000UL * onfi->tCCS;
 	} else {
 		struct nand_sdr_timings *timings = &iface->timings.sdr;
 		/*
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 099fa166569a..efb2345359bb 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -482,7 +482,7 @@ struct nand_parameters {
 	DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER);
 
 	/* ONFI parameters */
-	struct onfi_params onfi;
+	struct onfi_params *onfi;
 };
 
 /* The maximum expected count of bytes in the NAND ID sequence */
@@ -1618,10 +1618,10 @@ struct platform_nand_data {
 /* return the supported asynchronous timing mode. */
 static inline int onfi_get_async_timing_mode(struct nand_chip *chip)
 {
-	if (!chip->parameters.onfi.version)
+	if (!chip->parameters.onfi)
 		return ONFI_TIMING_MODE_UNKNOWN;
 
-	return chip->parameters.onfi.async_timing_mode;
+	return chip->parameters.onfi->async_timing_mode;
 }
 
 int onfi_fill_data_interface(struct nand_chip *chip,
-- 
cgit v1.2.3


From 304d34360b099020a12af2abb7e1ac506f4ba16d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 28 Jul 2018 10:19:13 +0200
Subject: spi: add flags parameter to txrx_word function pointers

Add the capability to specify the flag parameter used in
bitbang_txrx_be_cpha{0,1} through the txrx_word function pointers of
spi_bitbang data structure. That feature will be used to add spi-3wire
support to the spi-gpio controller

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ath79.c         |  2 +-
 drivers/spi/spi-bitbang.c       | 34 +++++++++++++++++++++-------------
 drivers/spi/spi-butterfly.c     |  4 ++--
 drivers/spi/spi-gpio.c          | 32 ++++++++++++++++----------------
 drivers/spi/spi-lm70llp.c       |  5 +++--
 drivers/spi/spi-sh-sci.c        | 20 ++++++++++++--------
 drivers/spi/spi-xtensa-xtfpga.c |  2 +-
 include/linux/spi/spi_bitbang.h |  2 +-
 8 files changed, 57 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index 0719bd484891..3f6b657394de 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -176,7 +176,7 @@ static void ath79_spi_cleanup(struct spi_device *spi)
 }
 
 static u32 ath79_spi_txrx_mode0(struct spi_device *spi, unsigned int nsecs,
-			       u32 word, u8 bits)
+			       u32 word, u8 bits, unsigned flags)
 {
 	struct ath79_spi *sp = ath79_spidev_to_sp(spi);
 	u32 ioc = sp->ioc_base;
diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 3aa9e6e3dac8..76f1b534bdd7 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -49,22 +49,26 @@
 struct spi_bitbang_cs {
 	unsigned	nsecs;	/* (clock cycle time)/2 */
 	u32		(*txrx_word)(struct spi_device *spi, unsigned nsecs,
-					u32 word, u8 bits);
+					u32 word, u8 bits, unsigned flags);
 	unsigned	(*txrx_bufs)(struct spi_device *,
 					u32 (*txrx_word)(
 						struct spi_device *spi,
 						unsigned nsecs,
-						u32 word, u8 bits),
-					unsigned, struct spi_transfer *);
+						u32 word, u8 bits,
+						unsigned flags),
+					unsigned, struct spi_transfer *,
+					unsigned);
 };
 
 static unsigned bitbang_txrx_8(
 	struct spi_device	*spi,
 	u32			(*txrx_word)(struct spi_device *spi,
 					unsigned nsecs,
-					u32 word, u8 bits),
+					u32 word, u8 bits,
+					unsigned flags),
 	unsigned		ns,
-	struct spi_transfer	*t
+	struct spi_transfer	*t,
+	unsigned flags
 ) {
 	unsigned		bits = t->bits_per_word;
 	unsigned		count = t->len;
@@ -76,7 +80,7 @@ static unsigned bitbang_txrx_8(
 
 		if (tx)
 			word = *tx++;
-		word = txrx_word(spi, ns, word, bits);
+		word = txrx_word(spi, ns, word, bits, flags);
 		if (rx)
 			*rx++ = word;
 		count -= 1;
@@ -88,9 +92,11 @@ static unsigned bitbang_txrx_16(
 	struct spi_device	*spi,
 	u32			(*txrx_word)(struct spi_device *spi,
 					unsigned nsecs,
-					u32 word, u8 bits),
+					u32 word, u8 bits,
+					unsigned flags),
 	unsigned		ns,
-	struct spi_transfer	*t
+	struct spi_transfer	*t,
+	unsigned flags
 ) {
 	unsigned		bits = t->bits_per_word;
 	unsigned		count = t->len;
@@ -102,7 +108,7 @@ static unsigned bitbang_txrx_16(
 
 		if (tx)
 			word = *tx++;
-		word = txrx_word(spi, ns, word, bits);
+		word = txrx_word(spi, ns, word, bits, flags);
 		if (rx)
 			*rx++ = word;
 		count -= 2;
@@ -114,9 +120,11 @@ static unsigned bitbang_txrx_32(
 	struct spi_device	*spi,
 	u32			(*txrx_word)(struct spi_device *spi,
 					unsigned nsecs,
-					u32 word, u8 bits),
+					u32 word, u8 bits,
+					unsigned flags),
 	unsigned		ns,
-	struct spi_transfer	*t
+	struct spi_transfer	*t,
+	unsigned flags
 ) {
 	unsigned		bits = t->bits_per_word;
 	unsigned		count = t->len;
@@ -128,7 +136,7 @@ static unsigned bitbang_txrx_32(
 
 		if (tx)
 			word = *tx++;
-		word = txrx_word(spi, ns, word, bits);
+		word = txrx_word(spi, ns, word, bits, flags);
 		if (rx)
 			*rx++ = word;
 		count -= 4;
@@ -236,7 +244,7 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t)
 	struct spi_bitbang_cs	*cs = spi->controller_state;
 	unsigned		nsecs = cs->nsecs;
 
-	return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t);
+	return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, 0);
 }
 
 /*----------------------------------------------------------------------*/
diff --git a/drivers/spi/spi-butterfly.c b/drivers/spi/spi-butterfly.c
index 22a31e4a1a11..1a3510215841 100644
--- a/drivers/spi/spi-butterfly.c
+++ b/drivers/spi/spi-butterfly.c
@@ -144,9 +144,9 @@ static void butterfly_chipselect(struct spi_device *spi, int value)
 
 static u32
 butterfly_txrx_word_mode0(struct spi_device *spi, unsigned nsecs, u32 word,
-			  u8 bits)
+			  u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 /*----------------------------------------------------------------------*/
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 6ae92d4dca19..be68298cbd9c 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -149,27 +149,27 @@ static inline int getmiso(const struct spi_device *spi)
  */
 
 static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
 /*
@@ -183,30 +183,30 @@ static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
  */
 
 static u32 spi_gpio_spec_txrx_word_mode0(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode1(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode2(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode3(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
diff --git a/drivers/spi/spi-lm70llp.c b/drivers/spi/spi-lm70llp.c
index 61ee0f4269ae..4549efd792da 100644
--- a/drivers/spi/spi-lm70llp.c
+++ b/drivers/spi/spi-lm70llp.c
@@ -188,9 +188,10 @@ static void lm70_chipselect(struct spi_device *spi, int value)
 /*
  * Our actual bitbanger routine.
  */
-static u32 lm70_txrx(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits)
+static u32 lm70_txrx(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits,
+		     unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static void spi_lm70llp_attach(struct parport *p)
diff --git a/drivers/spi/spi-sh-sci.c b/drivers/spi/spi-sh-sci.c
index a9beeeed812c..393701cfca3c 100644
--- a/drivers/spi/spi-sh-sci.c
+++ b/drivers/spi/spi-sh-sci.c
@@ -80,27 +80,31 @@ static inline u32 getmiso(struct spi_device *dev)
 #include "spi-bitbang-txrx.h"
 
 static u32 sh_sci_spi_txrx_mode0(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 sh_sci_spi_txrx_mode1(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 sh_sci_spi_txrx_mode2(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 sh_sci_spi_txrx_mode3(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
 static void sh_sci_spi_chipselect(struct spi_device *dev, int value)
diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c
index be6155cba9de..8ce04f829a80 100644
--- a/drivers/spi/spi-xtensa-xtfpga.c
+++ b/drivers/spi/spi-xtensa-xtfpga.c
@@ -54,7 +54,7 @@ static inline void xtfpga_spi_wait_busy(struct xtfpga_spi *xspi)
 }
 
 static u32 xtfpga_spi_txrx_word(struct spi_device *spi, unsigned nsecs,
-				u32 v, u8 bits)
+				u32 v, u8 bits, unsigned flags)
 {
 	struct xtfpga_spi *xspi = spi_master_get_devdata(spi->master);
 
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index c1e61641a7f1..5847f00ef9cf 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -30,7 +30,7 @@ struct spi_bitbang {
 	/* txrx_word[SPI_MODE_*]() just looks like a shift register */
 	u32	(*txrx_word[4])(struct spi_device *spi,
 			unsigned nsecs,
-			u32 word, u8 bits);
+			u32 word, u8 bits, unsigned flags);
 };
 
 /* you can call these default bitbang->master methods from your custom
-- 
cgit v1.2.3


From 4b859db2c60692560afbfef1b030d0ddef57b7ee Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 28 Jul 2018 10:19:14 +0200
Subject: spi: spi-gpio: add SPI_3WIRE support

Add SPI_3WIRE support to spi-gpio controller introducing
set_line_direction function pointer in spi_bitbang data structure.
Spi-gpio controller has been tested using hts221 temp/rh iio sensor
running in 3wire mode and lsm6dsm running in 4wire mode

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bitbang.c       | 16 ++++++++++++++++
 drivers/spi/spi-gpio.c          | 17 ++++++++++++++++-
 include/linux/spi/spi_bitbang.h |  1 +
 3 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 76f1b534bdd7..f29176000b8d 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -243,7 +243,23 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
 	struct spi_bitbang_cs	*cs = spi->controller_state;
 	unsigned		nsecs = cs->nsecs;
+	struct spi_bitbang	*bitbang;
+
+	bitbang = spi_master_get_devdata(spi->master);
+	if (bitbang->set_line_direction) {
+		int err;
 
+		err = bitbang->set_line_direction(spi, !!(t->tx_buf));
+		if (err < 0)
+			return err;
+	}
+
+	if (spi->mode & SPI_3WIRE) {
+		unsigned flags;
+
+		flags = t->tx_buf ? SPI_MASTER_NO_RX : SPI_MASTER_NO_TX;
+		return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, flags);
+	}
 	return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, 0);
 }
 
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index be68298cbd9c..0626e6e3ea0c 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -121,7 +121,10 @@ static inline int getmiso(const struct spi_device *spi)
 {
 	struct spi_gpio *spi_gpio = spi_to_spi_gpio(spi);
 
-	return !!gpiod_get_value_cansleep(spi_gpio->miso);
+	if (spi->mode & SPI_3WIRE)
+		return !!gpiod_get_value_cansleep(spi_gpio->mosi);
+	else
+		return !!gpiod_get_value_cansleep(spi_gpio->miso);
 }
 
 /*
@@ -250,6 +253,16 @@ static int spi_gpio_setup(struct spi_device *spi)
 	return status;
 }
 
+static int spi_gpio_set_direction(struct spi_device *spi, bool output)
+{
+	struct spi_gpio *spi_gpio = spi_to_spi_gpio(spi);
+
+	if (output)
+		return gpiod_direction_output(spi_gpio->mosi, 1);
+	else
+		return gpiod_direction_input(spi_gpio->mosi);
+}
+
 static void spi_gpio_cleanup(struct spi_device *spi)
 {
 	spi_bitbang_cleanup(spi);
@@ -395,6 +408,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 		return status;
 
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
+	master->mode_bits = SPI_3WIRE | SPI_CPHA | SPI_CPOL;
 	master->flags = master_flags;
 	master->bus_num = pdev->id;
 	/* The master needs to think there is a chipselect even if not connected */
@@ -407,6 +421,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 
 	spi_gpio->bitbang.master = master;
 	spi_gpio->bitbang.chipselect = spi_gpio_chipselect;
+	spi_gpio->bitbang.set_line_direction = spi_gpio_set_direction;
 
 	if ((master_flags & (SPI_MASTER_NO_TX | SPI_MASTER_NO_RX)) == 0) {
 		spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0;
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 5847f00ef9cf..b7e021b274dc 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -31,6 +31,7 @@ struct spi_bitbang {
 	u32	(*txrx_word[4])(struct spi_device *spi,
 			unsigned nsecs,
 			u32 word, u8 bits, unsigned flags);
+	int	(*set_line_direction)(struct spi_device *spi, bool output);
 };
 
 /* you can call these default bitbang->master methods from your custom
-- 
cgit v1.2.3


From ba113c3aa79a7f941ac162d05a3620bdc985c58d Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:21 -0700
Subject: tcp: add data bytes sent stats

Introduce a new TCP stat to record the number of bytes sent
(RFC4898 tcpEStatsPerfHCDataOctetsOut) and expose it in both tcp_info
(TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS).

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 include/uapi/linux/tcp.h | 4 +++-
 net/ipv4/tcp.c           | 6 ++++++
 net/ipv4/tcp_output.c    | 1 +
 4 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 58a8d7d71354..d0798dcd2cab 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -181,6 +181,9 @@ struct tcp_sock {
 	u32	data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
 				 * total number of data segments sent.
 				 */
+	u64	bytes_sent;	/* RFC4898 tcpEStatsPerfHCDataOctetsOut
+				 * total number of data bytes sent.
+				 */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index e3f6ed8a7064..1c70ed287c3b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -235,6 +235,8 @@ struct tcp_info {
 
 	__u32	tcpi_delivered;
 	__u32	tcpi_delivered_ce;
+
+	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -257,7 +259,7 @@ enum {
 	TCP_NLA_SND_SSTHRESH,	/* Slow start size threshold */
 	TCP_NLA_DELIVERED,	/* Data pkts delivered incl. out-of-order */
 	TCP_NLA_DELIVERED_CE,	/* Like above but only ones w/ CE marks */
-
+	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 27bbe6a792b7..873cb9968ff5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2594,6 +2594,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_rx_dst = NULL;
 	tcp_saved_syn_free(tp);
 	tp->compressed_ack = 0;
+	tp->bytes_sent = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3201,6 +3202,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 		info->tcpi_delivery_rate = rate64;
 	info->tcpi_delivered = tp->delivered;
 	info->tcpi_delivered_ce = tp->delivered_ce;
+	info->tcpi_bytes_sent = tp->bytes_sent;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3225,6 +3227,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
+		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
 		0;
 }
 
@@ -3272,6 +3275,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
 	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
 
+	nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
+			  TCP_NLA_PAD);
+
 	return stats;
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 490df62f26d4..861531fe0e97 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1136,6 +1136,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
 		tp->data_segs_out += tcp_skb_pcount(skb);
+		tp->bytes_sent += skb->len - tcp_header_size;
 		tcp_internal_pacing(sk, skb);
 	}
 
-- 
cgit v1.2.3


From fb31c9b9f6c85b1bad569ecedbde78d9e37cd87b Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:22 -0700
Subject: tcp: add data bytes retransmitted stats

Introduce a new TCP stat to record the number of bytes retransmitted
(RFC4898 tcpEStatsPerfOctetsRetrans) and expose it in both tcp_info
(TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS).

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 5 +++++
 net/ipv4/tcp_output.c    | 1 +
 4 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d0798dcd2cab..fb67f9a51b95 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -333,6 +333,9 @@ struct tcp_sock {
 				 * the first SYN. */
 	u32	undo_marker;	/* snd_una upon a new recovery episode. */
 	int	undo_retrans;	/* number of undoable retransmissions. */
+	u64	bytes_retrans;	/* RFC4898 tcpEStatsPerfOctetsRetrans
+				 * Total data bytes retransmitted
+				 */
 	u32	total_retrans;	/* Total retransmits for entire connection */
 
 	u32	urg_seq;	/* Seq of received urgent pointer */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 1c70ed287c3b..c31f5100b744 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -237,6 +237,7 @@ struct tcp_info {
 	__u32	tcpi_delivered_ce;
 
 	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
+	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -260,6 +261,7 @@ enum {
 	TCP_NLA_DELIVERED,	/* Data pkts delivered incl. out-of-order */
 	TCP_NLA_DELIVERED_CE,	/* Like above but only ones w/ CE marks */
 	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
+	TCP_NLA_BYTES_RETRANS,	/* Data bytes retransmitted */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 873cb9968ff5..5ed1be88e922 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2595,6 +2595,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_saved_syn_free(tp);
 	tp->compressed_ack = 0;
 	tp->bytes_sent = 0;
+	tp->bytes_retrans = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3203,6 +3204,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_delivered = tp->delivered;
 	info->tcpi_delivered_ce = tp->delivered_ce;
 	info->tcpi_bytes_sent = tp->bytes_sent;
+	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3228,6 +3230,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
+		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
 		0;
 }
 
@@ -3277,6 +3280,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
 	nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
 			  TCP_NLA_PAD);
+	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
+			  TCP_NLA_PAD);
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 861531fe0e97..50cabf7656f3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2871,6 +2871,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
 	tp->total_retrans += segs;
+	tp->bytes_retrans += skb->len;
 
 	/* make sure skb->data is aligned on arches that require it
 	 * and check if ack-trimming & collapsing extended the headroom
-- 
cgit v1.2.3


From 7e10b6554ff2ce7f86d5d3eec3af5db8db482caa Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:23 -0700
Subject: tcp: add dsack blocks received stats

Introduce a new TCP stat to record the number of DSACK blocks received
(RFC4989 tcpEStatsStackDSACKDups) and expose it in both tcp_info
(TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS).

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 4 ++++
 net/ipv4/tcp_input.c     | 1 +
 4 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fb67f9a51b95..da6281c549a5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -188,6 +188,9 @@ struct tcp_sock {
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
 				 */
+	u32	dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups
+				 * total number of DSACK blocks received
+				 */
  	u32	snd_una;	/* First byte we want an ack for	*/
  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index c31f5100b744..0e1c0aec0153 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -238,6 +238,7 @@ struct tcp_info {
 
 	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
 	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
+	__u32	tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -262,6 +263,7 @@ enum {
 	TCP_NLA_DELIVERED_CE,	/* Like above but only ones w/ CE marks */
 	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
 	TCP_NLA_BYTES_RETRANS,	/* Data bytes retransmitted */
+	TCP_NLA_DSACK_DUPS,	/* DSACK blocks received */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5ed1be88e922..d6232b598cae 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2596,6 +2596,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->compressed_ack = 0;
 	tp->bytes_sent = 0;
 	tp->bytes_retrans = 0;
+	tp->dsack_dups = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3205,6 +3206,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_delivered_ce = tp->delivered_ce;
 	info->tcpi_bytes_sent = tp->bytes_sent;
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
+	info->tcpi_dsack_dups = tp->dsack_dups;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3231,6 +3233,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
+		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
 		0;
 }
 
@@ -3282,6 +3285,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 			  TCP_NLA_PAD);
 	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
 			  TCP_NLA_PAD);
+	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d51fa358b2b1..fbc85ff7d71d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -874,6 +874,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
 {
 	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
 	tp->rack.dsack_seen = 1;
+	tp->dsack_dups++;
 }
 
 /* It's reordering when higher sequence was delivered (i.e. sacked) before
-- 
cgit v1.2.3


From 7ec65372ca534217b53fd208500cf7aac223a383 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:24 -0700
Subject: tcp: add stat of data packet reordering events

Introduce a new TCP stats to record the number of reordering events seen
and expose it in both tcp_info (TCP_INFO) and opt_stats
(SOF_TIMESTAMPING_OPT_STATS).
Application can use this stats to track the frequency of the reordering
events in addition to the existing reordering stats which tracks the
magnitude of the latest reordering event.

Note: this new stats tracks reordering events triggered by ACKs, which
could often be fewer than the actual number of packets being delivered
out-of-order.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 4 ++--
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 4 ++++
 net/ipv4/tcp_input.c     | 3 ++-
 net/ipv4/tcp_recovery.c  | 2 +-
 5 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index da6281c549a5..263e37271afd 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -220,8 +220,7 @@ struct tcp_sock {
 #define TCP_RACK_RECOVERY_THRESH 16
 		u8 reo_wnd_persist:5, /* No. of recovery since last adj */
 		   dsack_seen:1, /* Whether DSACK seen after last adj */
-		   advanced:1,	 /* mstamp advanced since last lost marking */
-		   reord:1;	 /* reordering detected */
+		   advanced:1;	 /* mstamp advanced since last lost marking */
 	} rack;
 	u16	advmss;		/* Advertised MSS			*/
 	u8	compressed_ack;
@@ -267,6 +266,7 @@ struct tcp_sock {
 	u8	ecn_flags;	/* ECN status bits.			*/
 	u8	keepalive_probes; /* num of allowed keep alive probes	*/
 	u32	reordering;	/* Packet reordering metric.		*/
+	u32	reord_seen;	/* number of data packet reordering events */
 	u32	snd_up;		/* Urgent pointer		*/
 
 /*
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 0e1c0aec0153..e02d31986ff9 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -239,6 +239,7 @@ struct tcp_info {
 	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
 	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
 	__u32	tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
+	__u32	tcpi_reord_seen;     /* reordering events seen */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -264,6 +265,7 @@ enum {
 	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
 	TCP_NLA_BYTES_RETRANS,	/* Data bytes retransmitted */
 	TCP_NLA_DSACK_DUPS,	/* DSACK blocks received */
+	TCP_NLA_REORD_SEEN,	/* reordering events seen */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d6232b598cae..31fa1c080f28 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2597,6 +2597,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->bytes_sent = 0;
 	tp->bytes_retrans = 0;
 	tp->dsack_dups = 0;
+	tp->reord_seen = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3207,6 +3208,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_bytes_sent = tp->bytes_sent;
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	info->tcpi_dsack_dups = tp->dsack_dups;
+	info->tcpi_reord_seen = tp->reord_seen;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3234,6 +3236,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
+		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
 		0;
 }
 
@@ -3286,6 +3289,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
 			  TCP_NLA_PAD);
 	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
+	nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fbc85ff7d71d..3d6156f07a8d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -906,8 +906,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
 				       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
 	}
 
-	tp->rack.reord = 1;
 	/* This exciting event is worth to be remembered. 8) */
+	tp->reord_seen++;
 	NET_INC_STATS(sock_net(sk),
 		      ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
 }
@@ -1871,6 +1871,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 
 	tp->reordering = min_t(u32, tp->packets_out + addend,
 			       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+	tp->reord_seen++;
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
 }
 
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 71593e4400ab..c81aadff769b 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -25,7 +25,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!tp->rack.reord) {
+	if (!tp->reord_seen) {
 		/* If reordering has not been observed, be aggressive during
 		 * the recovery or starting the recovery by DUPACK threshold.
 		 */
-- 
cgit v1.2.3


From 8b11ec1b5ffb54f71cb5a5e5c8c4d36e5d113085 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 1 Aug 2018 13:43:38 -0700
Subject: mm: do not initialize TLB stack vma's with vma_init()

Commit 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and
data segments") tried to initialize various left-over ad-hoc vma's
"properly", but actually made things worse for the temporary vma's used
for TLB flushing.

vma_init() doesn't actually initialize all of the vma, just a few
fields, so doing something like

   -       struct vm_area_struct vma = { .vm_mm = tlb->mm, };
   +       struct vm_area_struct vma;
   +
   +       vma_init(&vma, tlb->mm);

was actually very bad: instead of having a nicely initialized vma with
every field but "vm_mm" zeroed, you'd have an entirely uninitialized vma
with only a couple of fields initialized.  And they weren't even fields
that the code in question mostly cared about.

The flush_tlb_range() function takes a "struct vma" rather than a
"struct mm_struct", because a few architectures actually care about what
kind of range it is - being able to only do an ITLB flush if it's a
range that doesn't have data accesses enabled, for example.  And all the
normal users already have the vma for doing the range invalidation.

But a few people want to call flush_tlb_range() with a range they just
made up, so they also end up using a made-up vma.  x86 just has a
special "flush_tlb_mm_range()" function for this, but other
architectures (arm and ia64) do the "use fake vma" thing instead, and
thus got caught up in the vma_init() changes.

At the same time, the TLB flushing code really doesn't care about most
other fields in the vma, so vma_init() is just unnecessary and
pointless.

This fixes things by having an explicit "this is just an initializer for
the TLB flush" initializer macro, which is used by the arm/arm64/ia64
people who mis-use this interface with just a dummy vma.

Fixes: 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and data segments")
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-rpc/ecard.c    |  5 +----
 arch/arm64/include/asm/tlb.h |  4 +---
 arch/arm64/mm/hugetlbpage.c  | 10 ++++------
 arch/ia64/include/asm/tlb.h  |  7 +++----
 include/linux/mm.h           |  3 +++
 5 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index 8db62cc54a6a..04b2f22c2739 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -212,7 +212,7 @@ static DEFINE_MUTEX(ecard_mutex);
  */
 static void ecard_init_pgtables(struct mm_struct *mm)
 {
-	struct vm_area_struct vma;
+	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_EXEC);
 
 	/* We want to set up the page tables for the following mapping:
 	 *  Virtual	Physical
@@ -237,9 +237,6 @@ static void ecard_init_pgtables(struct mm_struct *mm)
 
 	memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE));
 
-	vma_init(&vma, mm);
-	vma.vm_flags = VM_EXEC;
-
 	flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE);
 	flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE);
 }
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index d87f2d646caa..0ad1cf233470 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -37,9 +37,7 @@ static inline void __tlb_remove_table(void *_table)
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	struct vm_area_struct vma;
-
-	vma_init(&vma, tlb->mm);
+	struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
 
 	/*
 	 * The ASID allocator will either invalidate the ASID or mark
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 1854e49aa18a..192b3ba07075 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -108,13 +108,10 @@ static pte_t get_clear_flush(struct mm_struct *mm,
 			     unsigned long pgsize,
 			     unsigned long ncontig)
 {
-	struct vm_area_struct vma;
 	pte_t orig_pte = huge_ptep_get(ptep);
 	bool valid = pte_valid(orig_pte);
 	unsigned long i, saddr = addr;
 
-	vma_init(&vma, mm);
-
 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
 		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
 
@@ -127,8 +124,10 @@ static pte_t get_clear_flush(struct mm_struct *mm,
 			orig_pte = pte_mkdirty(orig_pte);
 	}
 
-	if (valid)
+	if (valid) {
+		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 		flush_tlb_range(&vma, saddr, addr);
+	}
 	return orig_pte;
 }
 
@@ -147,10 +146,9 @@ static void clear_flush(struct mm_struct *mm,
 			     unsigned long pgsize,
 			     unsigned long ncontig)
 {
-	struct vm_area_struct vma;
+	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 	unsigned long i, saddr = addr;
 
-	vma_init(&vma, mm);
 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 		pte_clear(mm, addr, ptep);
 
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index db89e7306081..516355a774bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -115,12 +115,11 @@ ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned
 		flush_tlb_all();
 	} else {
 		/*
-		 * XXX fix me: flush_tlb_range() should take an mm pointer instead of a
-		 * vma pointer.
+		 * flush_tlb_range() takes a vma instead of a mm pointer because
+		 * some architectures want the vm_flags for ITLB/DTLB flush.
 		 */
-		struct vm_area_struct vma;
+		struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
 
-		vma_init(&vma, tlb->mm);
 		/* flush the address range from the tlb: */
 		flush_tlb_range(&vma, start, end);
 		/* now flush the virt. page-table area mapping the address range: */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7ba6d356d18f..68a5121694ef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -466,6 +466,9 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
 	vma->vm_ops = NULL;
 }
 
+/* flush_tlb_range() takes a vma, not a mm, and can care about flags */
+#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
+
 struct mmu_gather;
 struct inode;
 
-- 
cgit v1.2.3


From c42b65e363ce97a828f81b59033c3558f8fa7f70 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 1 Aug 2018 15:42:56 -0700
Subject: bitmap: Add bitmap_alloc(), bitmap_zalloc() and bitmap_free()

A lot of code become ugly because of open coding allocations for bitmaps.

Introduce three helpers to allow users be more clear of intention
and keep their code neat.

Note, due to multiple circular dependencies we may not provide
the helpers as inliners. For now we keep them exported and, perhaps,
at some point in the future we will sort out header inclusion and
inheritance.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/bitmap.h |  8 ++++++++
 lib/bitmap.c           | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 1ee46f492267..acf5e8df3504 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -104,6 +104,14 @@
  * contain all bit positions from 0 to 'bits' - 1.
  */
 
+/*
+ * Allocation and deallocation of bitmap.
+ * Provided in lib/bitmap.c to avoid circular dependency.
+ */
+extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
+extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
+extern void bitmap_free(const unsigned long *bitmap);
+
 /*
  * lib/bitmap.c provides these functions:
  */
diff --git a/lib/bitmap.c b/lib/bitmap.c
index a42eff7e8c48..5b476d8414be 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -13,6 +13,7 @@
 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
 
@@ -1128,6 +1129,25 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n
 EXPORT_SYMBOL(bitmap_copy_le);
 #endif
 
+unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags)
+{
+	return kmalloc_array(BITS_TO_LONGS(nbits), sizeof(unsigned long),
+			     flags);
+}
+EXPORT_SYMBOL(bitmap_alloc);
+
+unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags)
+{
+	return bitmap_alloc(nbits, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL(bitmap_zalloc);
+
+void bitmap_free(const unsigned long *bitmap)
+{
+	kfree(bitmap);
+}
+EXPORT_SYMBOL(bitmap_free);
+
 #if BITS_PER_LONG == 64
 /**
  * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap
-- 
cgit v1.2.3


From 3ebea280d7e9b610fa3d31c9cfd556b1705eeedf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Aug 2018 21:08:30 -0400
Subject: ring-buffer: Make ring_buffer_record_is_on() return bool

The value of ring_buffer_record_is_on() is either true or false, so have its
return value be bool.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 kernel/trace/ring_buffer.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 003d09ab308d..5176124fc4ba 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -164,7 +164,7 @@ void ring_buffer_record_disable(struct ring_buffer *buffer);
 void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
-int ring_buffer_record_is_on(struct ring_buffer *buffer);
+bool ring_buffer_record_is_on(struct ring_buffer *buffer);
 int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0b0b688ea166..ffb43cbc5c13 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3221,7 +3221,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_on);
  *
  * Returns true if the ring buffer is in a state that it accepts writes.
  */
-int ring_buffer_record_is_on(struct ring_buffer *buffer)
+bool ring_buffer_record_is_on(struct ring_buffer *buffer)
 {
 	return !atomic_read(&buffer->record_disabled);
 }
-- 
cgit v1.2.3


From d7224c0e128c7337c0b0f66ac20921fbbf4efc14 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Aug 2018 21:09:50 -0400
Subject: ring-buffer: Make ring_buffer_record_is_set_on() return bool

The value of ring_buffer_record_is_set_on() is either true or false, so have
its return value be bool.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 kernel/trace/ring_buffer.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 5176124fc4ba..0940fda59872 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -165,7 +165,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
 bool ring_buffer_record_is_on(struct ring_buffer *buffer);
-int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
+bool ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ffb43cbc5c13..b386830b4d51 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3237,7 +3237,7 @@ bool ring_buffer_record_is_on(struct ring_buffer *buffer)
  * ring_buffer_record_disable(), as that is a temporary disabling of
  * the ring buffer.
  */
-int ring_buffer_record_is_set_on(struct ring_buffer *buffer)
+bool ring_buffer_record_is_set_on(struct ring_buffer *buffer)
 {
 	return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
 }
-- 
cgit v1.2.3


From c971e6a006175bd0f195c6346c4e8bc4089bec00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 28 May 2018 18:27:19 -0400
Subject: kill d_instantiate_no_diralias()

The only user is fuse_create_new_entry(), and there it's used to
mitigate the same mkdir/open-by-handle race as in nfs_mkdir().
The same solution applies - unhash the mkdir argument, then
call d_splice_alias() and if that returns a reference to preexisting
alias, dput() and report success.  ->mkdir() argument left unhashed
negative with the preexisting alias moved in the right place is just
fine from the ->mkdir() callers point of view.

Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 27 ---------------------------
 fs/fuse/dir.c          | 15 +++++++++++----
 include/linux/dcache.h |  1 -
 3 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..a7d9e7a4c283 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1899,33 +1899,6 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 }
 EXPORT_SYMBOL(d_instantiate_new);
 
-/**
- * d_instantiate_no_diralias - instantiate a non-aliased dentry
- * @entry: dentry to complete
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry.  If a directory alias is found, then
- * return an error (and drop inode).  Together with d_materialise_unique() this
- * guarantees that a directory inode may never have more than one alias.
- */
-int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
-{
-	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
-
-	security_d_instantiate(entry, inode);
-	spin_lock(&inode->i_lock);
-	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
-		spin_unlock(&inode->i_lock);
-		iput(inode);
-		return -EBUSY;
-	}
-	__d_instantiate(entry, inode);
-	spin_unlock(&inode->i_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(d_instantiate_no_diralias);
-
 struct dentry *d_make_root(struct inode *root_inode)
 {
 	struct dentry *res = NULL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..4bbae6ac75c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 {
 	struct fuse_entry_out outarg;
 	struct inode *inode;
+	struct dentry *d;
 	int err;
 	struct fuse_forget_link *forget;
 
@@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 	}
 	kfree(forget);
 
-	err = d_instantiate_no_diralias(entry, inode);
-	if (err)
-		return err;
+	d_drop(entry);
+	d = d_splice_alias(inode, entry);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
 
-	fuse_change_entry_timeout(entry, &outarg);
+	if (d) {
+		fuse_change_entry_timeout(d, &outarg);
+		dput(d);
+	} else {
+		fuse_change_entry_timeout(entry, &outarg);
+	}
 	fuse_invalidate_attr(dir);
 	return 0;
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..0b83629a3d8f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -227,7 +227,6 @@ extern void d_instantiate(struct dentry *, struct inode *);
 extern void d_instantiate_new(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
-extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);
-- 
cgit v1.2.3


From 7acece71a517cad83a0842a94d94c13f271b680c Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Tue, 31 Jul 2018 05:41:38 -0500
Subject: serial: sh-sci: Remove SCIx_RZ_SCIFA_REGTYPE

There is no more need for SCIx_RZ_SCIFA_REGTYPE now that
SCIx_SH4_SCIF_REGTYPE can provide the same register/address definitions.

Also, R7S9210 no longer needs a special compatible since the standard
"renesas,scif" will work just fine.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 31 -------------------------------
 include/linux/serial_sci.h  |  1 -
 2 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index c29244f76057..54ea58bbe3c9 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -293,33 +293,6 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
-	/*
-	 * The "SCIFA" that is in RZ/T and RZ/A2.
-	 * It looks like a normal SCIF with FIFO data, but with a
-	 * compressed address space. Also, the break out of interrupts
-	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
-	 */
-	[SCIx_RZ_SCIFA_REGTYPE] = {
-		.regs = {
-			[SCSMR]		= { 0x00, 16 },
-			[SCBRR]		= { 0x02,  8 },
-			[SCSCR]		= { 0x04, 16 },
-			[SCxTDR]	= { 0x06,  8 },
-			[SCxSR]		= { 0x08, 16 },
-			[SCxRDR]	= { 0x0A,  8 },
-			[SCFCR]		= { 0x0C, 16 },
-			[SCFDR]		= { 0x0E, 16 },
-			[SCSPTR]	= { 0x10, 16 },
-			[SCLSR]		= { 0x12, 16 },
-		},
-		.fifosize = 16,
-		.overrun_reg = SCLSR,
-		.overrun_mask = SCLSR_ORER,
-		.sampling_rate_mask = SCI_SR(32),
-		.error_mask = SCIF_DEFAULT_ERROR_MASK,
-		.error_clear = SCIF_ERROR_CLEAR,
-	},
-
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -3148,10 +3121,6 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
-	{
-		.compatible = "renesas,scif-r7s9210",
-		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
-	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1c89611e0e06..c0e795d95477 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,7 +36,6 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
-	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From d3b26dd7cb0e3433bfd3c1d4dcf74c6039bb49fb Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 2 Aug 2018 03:08:23 +0000
Subject: Drivers: hv: vmbus: Reset the channel callback in
 vmbus_onoffer_rescind()

Before setting channel->rescind in vmbus_rescind_cleanup(), we should make
sure the channel callback won't run any more, otherwise a high-level
driver like pci_hyperv, which may be infinitely waiting for the host VSP's
response and notices the channel has been rescinded, can't safely give
up: e.g., in hv_pci_protocol_negotiation() -> wait_for_response(), it's
unsafe to exit from wait_for_response() and proceed with the on-stack
variable "comp_pkt" popped. The issue was originally spotted by
Michael Kelley <mikelley@microsoft.com>.

In vmbus_close_internal(), the patch also minimizes the range protected by
disabling/enabling channel->callback_event: we don't really need that for
the whole function.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Cc: stable@vger.kernel.org
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 40 ++++++++++++++++++++++++----------------
 drivers/hv/channel_mgmt.c |  6 ++++++
 include/linux/hyperv.h    |  2 ++
 3 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index ba0a092ae085..c3949220b770 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -558,11 +558,8 @@ static void reset_channel_cb(void *arg)
 	channel->onchannel_callback = NULL;
 }
 
-static int vmbus_close_internal(struct vmbus_channel *channel)
+void vmbus_reset_channel_cb(struct vmbus_channel *channel)
 {
-	struct vmbus_channel_close_channel *msg;
-	int ret;
-
 	/*
 	 * vmbus_on_event(), running in the per-channel tasklet, can race
 	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
@@ -572,6 +569,29 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	 */
 	tasklet_disable(&channel->callback_event);
 
+	channel->sc_creation_callback = NULL;
+
+	/* Stop the callback asap */
+	if (channel->target_cpu != get_cpu()) {
+		put_cpu();
+		smp_call_function_single(channel->target_cpu, reset_channel_cb,
+					 channel, true);
+	} else {
+		reset_channel_cb(channel);
+		put_cpu();
+	}
+
+	/* Re-enable tasklet for use on re-open */
+	tasklet_enable(&channel->callback_event);
+}
+
+static int vmbus_close_internal(struct vmbus_channel *channel)
+{
+	struct vmbus_channel_close_channel *msg;
+	int ret;
+
+	vmbus_reset_channel_cb(channel);
+
 	/*
 	 * In case a device driver's probe() fails (e.g.,
 	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
@@ -585,16 +605,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	}
 
 	channel->state = CHANNEL_OPEN_STATE;
-	channel->sc_creation_callback = NULL;
-	/* Stop callback and cancel the timer asap */
-	if (channel->target_cpu != get_cpu()) {
-		put_cpu();
-		smp_call_function_single(channel->target_cpu, reset_channel_cb,
-					 channel, true);
-	} else {
-		reset_channel_cb(channel);
-		put_cpu();
-	}
 
 	/* Send a closing message */
 
@@ -639,8 +649,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
 out:
-	/* re-enable tasklet for use on re-open */
-	tasklet_enable(&channel->callback_event);
 	return ret;
 }
 
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f3b551a50653..0f0e091c117c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -892,6 +892,12 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		return;
 	}
 
+	/*
+	 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
+	 * should make sure the channel callback is not running any more.
+	 */
+	vmbus_reset_channel_cb(channel);
+
 	/*
 	 * Now wait for offer handling to complete.
 	 */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2330f08062c7..efda23cf32c7 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1061,6 +1061,8 @@ extern int vmbus_establish_gpadl(struct vmbus_channel *channel,
 extern int vmbus_teardown_gpadl(struct vmbus_channel *channel,
 				     u32 gpadl_handle);
 
+void vmbus_reset_channel_cb(struct vmbus_channel *channel);
+
 extern int vmbus_recvpacket(struct vmbus_channel *channel,
 				  void *buffer,
 				  u32 bufferlen,
-- 
cgit v1.2.3


From 72755eed6c1c5312425ad3d78bf0b916769e6125 Mon Sep 17 00:00:00 2001
From: Roman Kiryanov <rkir@google.com>
Date: Mon, 23 Jul 2018 15:47:26 -0700
Subject: goldfish: Add missing includes to goldfish.h

goldfish.h refers to external symbols such as
dma_addr_t and writel. This causes compilation errors
if this file is included before other header files.

The mentioned symbols are defined in types.h (dma_addr_t)
and io.h (writel).

Signed-off-by: Roman Kiryanov <rkir@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/goldfish.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
index 2835c150c3ff..159b4191f15d 100644
--- a/include/linux/goldfish.h
+++ b/include/linux/goldfish.h
@@ -2,6 +2,9 @@
 #ifndef __LINUX_GOLDFISH_H
 #define __LINUX_GOLDFISH_H
 
+#include <linux/types.h>
+#include <linux/io.h>
+
 /* Helpers for Goldfish virtual platform */
 
 static inline void gf_write_ptr(const void *ptr, void __iomem *portl,
-- 
cgit v1.2.3


From 68275680ea12e9f378e51ff525dacbb76f006764 Mon Sep 17 00:00:00 2001
From: Roman Kiryanov <rkir@google.com>
Date: Mon, 23 Jul 2018 15:47:27 -0700
Subject: goldfish: Use dedicated macros instead of manual bit shifting

There are dedicated macros (lower_32_bits and upper_32_bits)
available to extract the lower and upper 32 bits. They provide
better readability and could prevent some compilation warnings.

Signed-off-by: Roman Kiryanov <rkir@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/goldfish.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
index 159b4191f15d..265a099cd3b8 100644
--- a/include/linux/goldfish.h
+++ b/include/linux/goldfish.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_GOLDFISH_H
 #define __LINUX_GOLDFISH_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/io.h>
 
@@ -10,9 +11,11 @@
 static inline void gf_write_ptr(const void *ptr, void __iomem *portl,
 				void __iomem *porth)
 {
-	writel((u32)(unsigned long)ptr, portl);
+	const unsigned long addr = (unsigned long)ptr;
+
+	writel(lower_32_bits(addr), portl);
 #ifdef CONFIG_64BIT
-	writel((unsigned long)ptr >> 32, porth);
+	writel(upper_32_bits(addr), porth);
 #endif
 }
 
@@ -20,9 +23,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr,
 				     void __iomem *portl,
 				     void __iomem *porth)
 {
-	writel((u32)addr, portl);
+	writel(lower_32_bits(addr), portl);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	writel(addr >> 32, porth);
+	writel(upper_32_bits(addr), porth);
 #endif
 }
 
-- 
cgit v1.2.3


From 06bcb5168d7d49aa3ed449ff13a6d13c30afc3f0 Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@exceet.de>
Date: Thu, 2 Aug 2018 14:53:52 +0200
Subject: spi: spi-mem: Fix a typo in the documentation of struct spi_mem

Fix a typo in the @drvpriv description.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
Acked-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index bb4bd15ae1f6..951a2e949d5f 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -122,7 +122,7 @@ struct spi_mem_op {
 /**
  * struct spi_mem - describes a SPI memory device
  * @spi: the underlying SPI device
- * @drvpriv: spi_mem_drviver private data
+ * @drvpriv: spi_mem_driver private data
  *
  * Extra information that describe the SPI memory device and may be needed by
  * the controller to properly handle this device should be placed here.
-- 
cgit v1.2.3


From 5d27a9c8ea9e967d00b92a76d4bb242bf7692f2b Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@exceet.de>
Date: Thu, 2 Aug 2018 14:53:53 +0200
Subject: spi: spi-mem: Extend the SPI mem interface to set a custom memory
 name

When porting (Q)SPI controller drivers from the MTD layer to the SPI
layer, the naming scheme for the memory devices changes. To be able
to keep compatibility with the old drivers naming scheme, a name
field is added to struct spi_mem and a hook is added to let controller
drivers set a custom name for the memory device.

Example for the FSL QSPI driver:

Name with the old driver: 21e0000.qspi,
or with multiple devices: 21e0000.qspi-0, 21e0000.qspi-1, ...

Name with the new driver without spi_mem_get_name: spi4.0

Suggested-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mem.c       | 28 ++++++++++++++++++++++++++++
 include/linux/spi/spi-mem.h | 12 ++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 990770dfa5cf..e43842c7a31a 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -310,6 +310,24 @@ int spi_mem_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 }
 EXPORT_SYMBOL_GPL(spi_mem_exec_op);
 
+/**
+ * spi_mem_get_name() - Return the SPI mem device name to be used by the
+ *			upper layer if necessary
+ * @mem: the SPI memory
+ *
+ * This function allows SPI mem users to retrieve the SPI mem device name.
+ * It is useful if the upper layer needs to expose a custom name for
+ * compatibility reasons.
+ *
+ * Return: a string containing the name of the memory device to be used
+ *	   by the SPI mem user
+ */
+const char *spi_mem_get_name(struct spi_mem *mem)
+{
+	return mem->name;
+}
+EXPORT_SYMBOL_GPL(spi_mem_get_name);
+
 /**
  * spi_mem_adjust_op_size() - Adjust the data size of a SPI mem operation to
  *			      match controller limitations
@@ -344,6 +362,7 @@ static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv)
 static int spi_mem_probe(struct spi_device *spi)
 {
 	struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+	struct spi_controller *ctlr = spi->controller;
 	struct spi_mem *mem;
 
 	mem = devm_kzalloc(&spi->dev, sizeof(*mem), GFP_KERNEL);
@@ -351,6 +370,15 @@ static int spi_mem_probe(struct spi_device *spi)
 		return -ENOMEM;
 
 	mem->spi = spi;
+
+	if (ctlr->mem_ops && ctlr->mem_ops->get_name)
+		mem->name = ctlr->mem_ops->get_name(mem);
+	else
+		mem->name = dev_name(&spi->dev);
+
+	if (IS_ERR_OR_NULL(mem->name))
+		return PTR_ERR(mem->name);
+
 	spi_set_drvdata(spi, mem);
 
 	return memdrv->probe(mem);
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 951a2e949d5f..0d64ccc4e584 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -123,6 +123,7 @@ struct spi_mem_op {
  * struct spi_mem - describes a SPI memory device
  * @spi: the underlying SPI device
  * @drvpriv: spi_mem_driver private data
+ * @name: name of the SPI memory device
  *
  * Extra information that describe the SPI memory device and may be needed by
  * the controller to properly handle this device should be placed here.
@@ -133,6 +134,7 @@ struct spi_mem_op {
 struct spi_mem {
 	struct spi_device *spi;
 	void *drvpriv;
+	char *name;
 };
 
 /**
@@ -165,6 +167,13 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
  *		    limitations)
  * @supports_op: check if an operation is supported by the controller
  * @exec_op: execute a SPI memory operation
+ * @get_name: get a custom name for the SPI mem device from the controller.
+ *	      This might be needed if the controller driver has been ported
+ *	      to use the SPI mem layer and a custom name is used to keep
+ *	      mtdparts compatible.
+ *	      Note that if the implementation of this function allocates memory
+ *	      dynamically, then it should do so with devm_xxx(), as we don't
+ *	      have a ->free_name() function.
  *
  * This interface should be implemented by SPI controllers providing an
  * high-level interface to execute SPI memory operation, which is usually the
@@ -176,6 +185,7 @@ struct spi_controller_mem_ops {
 			    const struct spi_mem_op *op);
 	int (*exec_op)(struct spi_mem *mem,
 		       const struct spi_mem_op *op);
+	const char *(*get_name)(struct spi_mem *mem);
 };
 
 /**
@@ -234,6 +244,8 @@ bool spi_mem_supports_op(struct spi_mem *mem,
 int spi_mem_exec_op(struct spi_mem *mem,
 		    const struct spi_mem_op *op);
 
+const char *spi_mem_get_name(struct spi_mem *mem);
+
 int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv,
 				       struct module *owner);
 
-- 
cgit v1.2.3


From 5a5ba10f44fa1cd081cec38389e1b47f438fe25b Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Thu, 26 Jul 2018 15:40:56 +0200
Subject: rtc: remove struct rtc_task

Include rtc_task members directly in rtc_timer member.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c | 8 ++++----
 include/linux/rtc.h     | 9 ++-------
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index d0983ed6c842..a962540faf2e 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -912,8 +912,8 @@ again:
 		timerqueue_del(&rtc->timerqueue, &timer->node);
 		trace_rtc_timer_dequeue(timer);
 		timer->enabled = 0;
-		if (timer->task.func)
-			timer->task.func(timer->task.private_data);
+		if (timer->func)
+			timer->func(timer->private_data);
 
 		trace_rtc_timer_fired(timer);
 		/* Re-add/fwd periodic timers */
@@ -968,8 +968,8 @@ void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data)
 {
 	timerqueue_init(&timer->node);
 	timer->enabled = 0;
-	timer->task.func = f;
-	timer->task.private_data = data;
+	timer->func = f;
+	timer->private_data = data;
 }
 
 /* rtc_timer_start - Sets an rtc_timer to fire in the future
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index bf4d375025d1..6aedc30003e7 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -87,16 +87,11 @@ struct rtc_class_ops {
 	int (*set_offset)(struct device *, long offset);
 };
 
-typedef struct rtc_task {
-	void (*func)(void *private_data);
-	void *private_data;
-} rtc_task_t;
-
-
 struct rtc_timer {
-	struct rtc_task	task;
 	struct timerqueue_node node;
 	ktime_t period;
+	void (*func)(void *private_data);
+	void *private_data;
 	int enabled;
 };
 
-- 
cgit v1.2.3


From a83c0ea79d8b5705d09a39c73224332f9170a903 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:54:59 +0300
Subject: docs/mm: bootmem: add kernel-doc description of 'struct bootmem_data'

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/bootmem.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7942a96b1a9d..42515195d7d8 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -27,9 +27,20 @@ extern unsigned long max_pfn;
 extern unsigned long long max_possible_pfn;
 
 #ifndef CONFIG_NO_BOOTMEM
-/*
- * node_bootmem_map is a map pointer - the bits represent all physical 
- * memory pages (including holes) on the node.
+/**
+ * struct bootmem_data - per-node information used by the bootmem allocator
+ * @node_min_pfn: the starting physical address of the node's memory
+ * @node_low_pfn: the end physical address of the directly addressable memory
+ * @node_bootmem_map: is a bitmap pointer - the bits represent all physical
+ *		      memory pages (including holes) on the node.
+ * @last_end_off: the offset within the page of the end of the last allocation;
+ *                if 0, the page used is full
+ * @hint_idx: the PFN of the page used with the last allocation;
+ *            together with using this with the @last_end_offset field,
+ *            a test can be made to see if allocations can be merged
+ *            with the page used for the last allocation rather than
+ *            using up a full new page.
+ * @list: list entry in the linked list ordered by the memory addresses
  */
 typedef struct bootmem_data {
 	unsigned long node_min_pfn;
-- 
cgit v1.2.3


From e1720fee27246dfb84f7c433e35367170a2d4436 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:55:01 +0300
Subject: mm/memblock: add a name for memblock flags enumeration

Since kernel-doc does not like anonymous enums the name is required for
adding documentation. While on it, I've also updated all the function
declarations to use 'enum memblock_flags' instead of unsigned long.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/memblock.h | 22 +++++++++++-----------
 mm/memblock.c            | 37 +++++++++++++++++++++----------------
 mm/nobootmem.c           |  2 +-
 3 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index ca59883c8364..8b8fbceffd4d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -21,7 +21,7 @@
 #define INIT_PHYSMEM_REGIONS	4
 
 /* Definition of memblock flags. */
-enum {
+enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
 	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
 	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
@@ -31,7 +31,7 @@ enum {
 struct memblock_region {
 	phys_addr_t base;
 	phys_addr_t size;
-	unsigned long flags;
+	enum memblock_flags flags;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 	int nid;
 #endif
@@ -72,7 +72,7 @@ void memblock_discard(void);
 
 phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
-					int nid, ulong flags);
+					int nid, enum memblock_flags flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -89,19 +89,19 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
-ulong choose_memblock_flags(void);
+enum memblock_flags choose_memblock_flags(void);
 
 /* Low level functions */
 int memblock_add_range(struct memblock_type *type,
 		       phys_addr_t base, phys_addr_t size,
-		       int nid, unsigned long flags);
+		       int nid, enum memblock_flags flags);
 
-void __next_mem_range(u64 *idx, int nid, ulong flags,
+void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
 		      struct memblock_type *type_a,
 		      struct memblock_type *type_b, phys_addr_t *out_start,
 		      phys_addr_t *out_end, int *out_nid);
 
-void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
+void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags,
 			  struct memblock_type *type_a,
 			  struct memblock_type *type_b, phys_addr_t *out_start,
 			  phys_addr_t *out_end, int *out_nid);
@@ -253,13 +253,13 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 			   NUMA_NO_NODE, MEMBLOCK_NONE, p_start, p_end, NULL)
 
 static inline void memblock_set_region_flags(struct memblock_region *r,
-					     unsigned long flags)
+					     enum memblock_flags flags)
 {
 	r->flags |= flags;
 }
 
 static inline void memblock_clear_region_flags(struct memblock_region *r,
-					       unsigned long flags)
+					       enum memblock_flags flags)
 {
 	r->flags &= ~flags;
 }
@@ -317,10 +317,10 @@ static inline bool memblock_bottom_up(void)
 
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
-					ulong flags);
+					enum memblock_flags flags);
 phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t max_addr,
-					int nid, ulong flags);
+					int nid, enum memblock_flags flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/memblock.c b/mm/memblock.c
index 03d48d8835ba..20f48b49821a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -60,7 +60,7 @@ static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 static int memblock_reserved_in_slab __initdata_memblock = 0;
 
-ulong __init_memblock choose_memblock_flags(void)
+enum memblock_flags __init_memblock choose_memblock_flags(void)
 {
 	return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
 }
@@ -109,7 +109,7 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
 static phys_addr_t __init_memblock
 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
 				phys_addr_t size, phys_addr_t align, int nid,
-				ulong flags)
+				enum memblock_flags flags)
 {
 	phys_addr_t this_start, this_end, cand;
 	u64 i;
@@ -143,7 +143,7 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
 static phys_addr_t __init_memblock
 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
 			       phys_addr_t size, phys_addr_t align, int nid,
-			       ulong flags)
+			       enum memblock_flags flags)
 {
 	phys_addr_t this_start, this_end, cand;
 	u64 i;
@@ -188,7 +188,8 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  */
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
 					phys_addr_t align, phys_addr_t start,
-					phys_addr_t end, int nid, ulong flags)
+					phys_addr_t end, int nid,
+					enum memblock_flags flags)
 {
 	phys_addr_t kernel_end, ret;
 
@@ -251,7 +252,7 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
 					phys_addr_t align)
 {
 	phys_addr_t ret;
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 
 again:
 	ret = memblock_find_in_range_node(size, align, start, end,
@@ -472,7 +473,8 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
 static void __init_memblock memblock_insert_region(struct memblock_type *type,
 						   int idx, phys_addr_t base,
 						   phys_addr_t size,
-						   int nid, unsigned long flags)
+						   int nid,
+						   enum memblock_flags flags)
 {
 	struct memblock_region *rgn = &type->regions[idx];
 
@@ -504,7 +506,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  */
 int __init_memblock memblock_add_range(struct memblock_type *type,
 				phys_addr_t base, phys_addr_t size,
-				int nid, unsigned long flags)
+				int nid, enum memblock_flags flags)
 {
 	bool insert = false;
 	phys_addr_t obase = base;
@@ -873,7 +875,8 @@ void __init_memblock __next_reserved_mem_region(u64 *idx,
  * As both region arrays are sorted, the function advances the two indices
  * in lockstep and returns each intersection.
  */
-void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
+void __init_memblock __next_mem_range(u64 *idx, int nid,
+				      enum memblock_flags flags,
 				      struct memblock_type *type_a,
 				      struct memblock_type *type_b,
 				      phys_addr_t *out_start,
@@ -982,7 +985,8 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
  *
  * Reverse of __next_mem_range().
  */
-void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
+void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
+					  enum memblock_flags flags,
 					  struct memblock_type *type_a,
 					  struct memblock_type *type_b,
 					  phys_addr_t *out_start,
@@ -1140,7 +1144,8 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 
 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t start,
-					phys_addr_t end, int nid, ulong flags)
+					phys_addr_t end, int nid,
+					enum memblock_flags flags)
 {
 	phys_addr_t found;
 
@@ -1162,7 +1167,7 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
-					ulong flags)
+					enum memblock_flags flags)
 {
 	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
 					flags);
@@ -1170,14 +1175,14 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 
 phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t max_addr,
-					int nid, ulong flags)
+					int nid, enum memblock_flags flags)
 {
 	return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
 }
 
 phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 	phys_addr_t ret;
 
 again:
@@ -1258,7 +1263,7 @@ static void * __init memblock_virt_alloc_internal(
 {
 	phys_addr_t alloc;
 	void *ptr;
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 
 	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
 		nid = NUMA_NO_NODE;
@@ -1733,7 +1738,7 @@ phys_addr_t __init_memblock memblock_get_current_limit(void)
 static void __init_memblock memblock_dump(struct memblock_type *type)
 {
 	phys_addr_t base, end, size;
-	unsigned long flags;
+	enum memblock_flags flags;
 	int idx;
 	struct memblock_region *rgn;
 
@@ -1751,7 +1756,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
 			snprintf(nid_buf, sizeof(nid_buf), " on node %d",
 				 memblock_get_region_node(rgn));
 #endif
-		pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#lx\n",
+		pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n",
 			type->name, idx, &base, &end, &size, nid_buf, flags);
 	}
 }
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index c2cfa04f0231..439af3b765a7 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -42,7 +42,7 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 {
 	void *ptr;
 	u64 addr;
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 
 	if (limit > memblock.current_limit)
 		limit = memblock.current_limit;
-- 
cgit v1.2.3


From 47cec4432ab06c4ba90c241b142a016f878ac6da Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:55:02 +0300
Subject: docs/mm: memblock: update kernel-doc comments

* make memblock_discard description kernel-doc compatible
* add brief description for memblock_setclr_flag and describe its
  parameters
* fixup return value descriptions

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/memblock.h | 17 +++++++---
 mm/memblock.c            | 84 +++++++++++++++++++++++++++---------------------
 2 files changed, 59 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8b8fbceffd4d..63704c64583a 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -239,7 +239,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 /**
  * for_each_resv_unavail_range - iterate through reserved and unavailable memory
  * @i: u64 used as loop variable
- * @flags: pick from blocks based on memory attributes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  *
@@ -367,8 +366,10 @@ phys_addr_t memblock_get_current_limit(void);
  */
 
 /**
- * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region
+ * memblock_region_memory_base_pfn - get the lowest pfn of the memory region
  * @reg: memblock_region structure
+ *
+ * Return: the lowest pfn intersecting with the memory region
  */
 static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg)
 {
@@ -376,8 +377,10 @@ static inline unsigned long memblock_region_memory_base_pfn(const struct membloc
 }
 
 /**
- * memblock_region_memory_end_pfn - Return the end_pfn this region
+ * memblock_region_memory_end_pfn - get the end pfn of the memory region
  * @reg: memblock_region structure
+ *
+ * Return: the end_pfn of the reserved region
  */
 static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg)
 {
@@ -385,8 +388,10 @@ static inline unsigned long memblock_region_memory_end_pfn(const struct memblock
 }
 
 /**
- * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region
+ * memblock_region_reserved_base_pfn - get the lowest pfn of the reserved region
  * @reg: memblock_region structure
+ *
+ * Return: the lowest pfn intersecting with the reserved region
  */
 static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg)
 {
@@ -394,8 +399,10 @@ static inline unsigned long memblock_region_reserved_base_pfn(const struct membl
 }
 
 /**
- * memblock_region_reserved_end_pfn - Return the end_pfn this region
+ * memblock_region_reserved_end_pfn - get the end pfn of the reserved region
  * @reg: memblock_region structure
+ *
+ * Return: the end_pfn of the reserved region
  */
 static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg)
 {
diff --git a/mm/memblock.c b/mm/memblock.c
index 20f48b49821a..4b9dd76f6ae6 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -92,10 +92,11 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
 	return i < type->cnt;
 }
 
-/*
+/**
  * __memblock_find_range_bottom_up - find free area utility in bottom-up
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @size: size of free area to find
  * @align: alignment of free area to find
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
@@ -103,7 +104,7 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
  *
  * Utility called from memblock_find_in_range_node(), find free area bottom-up.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 static phys_addr_t __init_memblock
@@ -129,7 +130,8 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
 /**
  * __memblock_find_range_top_down - find free area utility, in top-down
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @size: size of free area to find
  * @align: alignment of free area to find
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
@@ -137,7 +139,7 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
  *
  * Utility called from memblock_find_in_range_node(), find free area top-down.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 static phys_addr_t __init_memblock
@@ -169,7 +171,8 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  * @size: size of free area to find
  * @align: alignment of free area to find
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  * @flags: pick from blocks based on memory attributes
  *
@@ -183,7 +186,7 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  *
  * If bottom-up allocation failed, will try to allocate memory top-down.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
@@ -238,13 +241,14 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
 /**
  * memblock_find_in_range - find free area in given range
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @size: size of free area to find
  * @align: alignment of free area to find
  *
  * Find @size free area aligned to @align in the specified range.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
@@ -288,7 +292,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
 
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
 /**
- * Discard memory and reserved arrays if they were allocated
+ * memblock_discard - discard memory and reserved arrays if they were allocated
  */
 void __init memblock_discard(void)
 {
@@ -318,11 +322,11 @@ void __init memblock_discard(void)
  *
  * Double the size of the @type regions array. If memblock is being used to
  * allocate memory for a new reserved regions array and there is a previously
- * allocated memory range [@new_area_start,@new_area_start+@new_area_size]
+ * allocated memory range [@new_area_start, @new_area_start + @new_area_size]
  * waiting to be reserved, ensure the memory used by the new array does
  * not overlap.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -1 on failure.
  */
 static int __init_memblock memblock_double_array(struct memblock_type *type,
@@ -467,7 +471,7 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
  * @nid:	node id of the new region
  * @flags:	flags of the new region
  *
- * Insert new memblock region [@base,@base+@size) into @type at @idx.
+ * Insert new memblock region [@base, @base + @size) into @type at @idx.
  * @type must already have extra room to accommodate the new region.
  */
 static void __init_memblock memblock_insert_region(struct memblock_type *type,
@@ -496,12 +500,12 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * @nid: nid of the new region
  * @flags: flags of the new region
  *
- * Add new memblock region [@base,@base+@size) into @type.  The new region
+ * Add new memblock region [@base, @base + @size) into @type.  The new region
  * is allowed to overlap with existing ones - overlaps don't affect already
  * existing regions.  @type is guaranteed to be minimal (all neighbouring
  * compatible regions are merged) after the addition.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -errno on failure.
  */
 int __init_memblock memblock_add_range(struct memblock_type *type,
@@ -615,11 +619,11 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
  * @end_rgn: out parameter for the end of isolated region
  *
  * Walk @type and ensure that regions don't cross the boundaries defined by
- * [@base,@base+@size).  Crossing regions are split at the boundaries,
+ * [@base, @base + @size).  Crossing regions are split at the boundaries,
  * which may create at most two more regions.  The index of the first
  * region inside the range is returned in *@start_rgn and end in *@end_rgn.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -errno on failure.
  */
 static int __init_memblock memblock_isolate_range(struct memblock_type *type,
@@ -730,10 +734,15 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 }
 
 /**
+ * memblock_setclr_flag - set or clear flag for a memory region
+ * @base: base address of the region
+ * @size: size of the region
+ * @set: set or clear the flag
+ * @flag: the flag to udpate
  *
  * This function isolates region [@base, @base + @size), and sets/clears flag
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 static int __init_memblock memblock_setclr_flag(phys_addr_t base,
 				phys_addr_t size, int set, int flag)
@@ -760,7 +769,7 @@ static int __init_memblock memblock_setclr_flag(phys_addr_t base,
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
 {
@@ -772,7 +781,7 @@ int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
 {
@@ -784,7 +793,7 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
 {
@@ -798,7 +807,7 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
 {
@@ -810,7 +819,7 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
 {
@@ -971,9 +980,6 @@ void __init_memblock __next_mem_range(u64 *idx, int nid,
 /**
  * __next_mem_range_rev - generic next function for for_each_*_range_rev()
  *
- * Finds the next range from type_a which is not marked as unsuitable
- * in type_b.
- *
  * @idx: pointer to u64 loop variable
  * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @flags: pick from blocks based on memory attributes
@@ -983,6 +989,9 @@ void __init_memblock __next_mem_range(u64 *idx, int nid,
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
  *
+ * Finds the next range from type_a which is not marked as unsuitable
+ * in type_b.
+ *
  * Reverse of __next_mem_range().
  */
 void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
@@ -1118,10 +1127,10 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
  * @type: memblock type to set node ID for
  * @nid: node ID to set
  *
- * Set the nid of memblock @type regions in [@base,@base+@size) to @nid.
+ * Set the nid of memblock @type regions in [@base, @base + @size) to @nid.
  * Regions which cross the area boundaries are split as necessary.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -errno on failure.
  */
 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
@@ -1245,7 +1254,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
  * The allocation is performed from memory region limited by
  * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE.
  *
- * The memory block is aligned on SMP_CACHE_BYTES if @align == 0.
+ * The memory block is aligned on %SMP_CACHE_BYTES if @align == 0.
  *
  * The phys address of allocated boot memory block is converted to virtual and
  * allocated memory is reset to 0.
@@ -1253,7 +1262,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
  * In addition, function sets the min_count to 0 using kmemleak_alloc for
  * allocated boot memory block, so that it is never reported as leaks.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 static void * __init memblock_virt_alloc_internal(
@@ -1338,7 +1347,7 @@ done:
  * info), if enabled. Does not zero allocated memory, does not panic if request
  * cannot be satisfied.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 void * __init memblock_virt_alloc_try_nid_raw(
@@ -1375,7 +1384,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
  * Public function, provides additional debug information (including caller
  * info), if enabled. This function zeroes the allocated memory.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 void * __init memblock_virt_alloc_try_nid_nopanic(
@@ -1411,7 +1420,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
  * which provides debug information (including caller info), if enabled,
  * and panics if the request can not be satisfied.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 void * __init memblock_virt_alloc_try_nid(
@@ -1668,9 +1677,9 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
  * @base: base of region to check
  * @size: size of region to check
  *
- * Check if the region [@base, @base+@size) is a subset of a memory block.
+ * Check if the region [@base, @base + @size) is a subset of a memory block.
  *
- * RETURNS:
+ * Return:
  * 0 if false, non-zero if true
  */
 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
@@ -1689,9 +1698,10 @@ bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t siz
  * @base: base of region to check
  * @size: size of region to check
  *
- * Check if the region [@base, @base+@size) intersects a reserved memory block.
+ * Check if the region [@base, @base + @size) intersects a reserved
+ * memory block.
  *
- * RETURNS:
+ * Return:
  * True if they intersect, false if not.
  */
 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
-- 
cgit v1.2.3


From 9a0de1bfe191220cb0437865261ab2f95cbb13ef Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:55:04 +0300
Subject: docs/mm: memblock: add kernel-doc description for memblock types

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/memblock.h | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 63704c64583a..516920549378 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -20,7 +20,13 @@
 #define INIT_MEMBLOCK_REGIONS	128
 #define INIT_PHYSMEM_REGIONS	4
 
-/* Definition of memblock flags. */
+/**
+ * enum memblock_flags - definition of memory region attributes
+ * @MEMBLOCK_NONE: no special request
+ * @MEMBLOCK_HOTPLUG: hotpluggable region
+ * @MEMBLOCK_MIRROR: mirrored region
+ * @MEMBLOCK_NOMAP: don't add to kernel direct mapping
+ */
 enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
 	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
@@ -28,6 +34,13 @@ enum memblock_flags {
 	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
 };
 
+/**
+ * struct memblock_region - represents a memory region
+ * @base: physical address of the region
+ * @size: size of the region
+ * @flags: memory region attributes
+ * @nid: NUMA node id
+ */
 struct memblock_region {
 	phys_addr_t base;
 	phys_addr_t size;
@@ -37,14 +50,30 @@ struct memblock_region {
 #endif
 };
 
+/**
+ * struct memblock_type - collection of memory regions of certain type
+ * @cnt: number of regions
+ * @max: size of the allocated array
+ * @total_size: size of all regions
+ * @regions: array of regions
+ * @name: the memory type symbolic name
+ */
 struct memblock_type {
-	unsigned long cnt;	/* number of regions */
-	unsigned long max;	/* size of the allocated array */
-	phys_addr_t total_size;	/* size of all regions */
+	unsigned long cnt;
+	unsigned long max;
+	phys_addr_t total_size;
 	struct memblock_region *regions;
 	char *name;
 };
 
+/**
+ * struct memblock - memblock allocator metadata
+ * @bottom_up: is bottom up direction?
+ * @current_limit: physical address of the current allocation limit
+ * @memory: usabe memory regions
+ * @reserved: reserved memory regions
+ * @physmem: all physical memory
+ */
 struct memblock {
 	bool bottom_up;  /* is bottom up direction? */
 	phys_addr_t current_limit;
-- 
cgit v1.2.3


From 6d54228fd1f293d00576ab2c3d2e4992c7cce12f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 25 Jun 2018 17:26:55 +0200
Subject: libceph: make ceph_osdc_notify{,_ack}() payload_len u32

The wire format dictates that payload_len fits into 4 bytes.  Take u32
instead of size_t to reflect that.

All callers pass a small integer, so no changes required.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 4 ++--
 net/ceph/osd_client.c           | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 0d6ee04b4c41..d6fe7e4df8cf 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -528,12 +528,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
 			 u64 notify_id,
 			 u64 cookie,
 			 void *payload,
-			 size_t payload_len);
+			 u32 payload_len);
 int ceph_osdc_notify(struct ceph_osd_client *osdc,
 		     struct ceph_object_id *oid,
 		     struct ceph_object_locator *oloc,
 		     void *payload,
-		     size_t payload_len,
+		     u32 payload_len,
 		     u32 timeout,
 		     struct page ***preply_pages,
 		     size_t *preply_len);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a00c74f1154e..93614cb5e03f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4591,7 +4591,7 @@ EXPORT_SYMBOL(ceph_osdc_unwatch);
 
 static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
 				      u64 notify_id, u64 cookie, void *payload,
-				      size_t payload_len)
+				      u32 payload_len)
 {
 	struct ceph_osd_req_op *op;
 	struct ceph_pagelist *pl;
@@ -4628,7 +4628,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
 			 u64 notify_id,
 			 u64 cookie,
 			 void *payload,
-			 size_t payload_len)
+			 u32 payload_len)
 {
 	struct ceph_osd_request *req;
 	int ret;
@@ -4661,7 +4661,7 @@ EXPORT_SYMBOL(ceph_osdc_notify_ack);
 
 static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
 				  u64 cookie, u32 prot_ver, u32 timeout,
-				  void *payload, size_t payload_len)
+				  void *payload, u32 payload_len)
 {
 	struct ceph_osd_req_op *op;
 	struct ceph_pagelist *pl;
@@ -4701,7 +4701,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
 		     struct ceph_object_id *oid,
 		     struct ceph_object_locator *oloc,
 		     void *payload,
-		     size_t payload_len,
+		     u32 payload_len,
 		     u32 timeout,
 		     struct page ***preply_pages,
 		     size_t *preply_len)
-- 
cgit v1.2.3


From c9ed51c9123ab5e8f79b7d53a9afd786b43d4fe6 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 27 Jun 2018 16:42:51 +0200
Subject: libceph: change ceph_pagelist_encode_string() to take u32

The wire format dictates that the length of string fits into 4 bytes.
Take u32 instead of size_t to reflect that.

We were already truncating len in ceph_pagelist_encode_32() -- this
just pushes that truncation one level up.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/pagelist.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 7edcded07641..d0223364349f 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -68,7 +68,7 @@ static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
 	return ceph_pagelist_append(pl, &v, 1);
 }
 static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
-					      char *s, size_t len)
+					      char *s, u32 len)
 {
 	int ret = ceph_pagelist_encode_32(pl, len);
 	if (ret)
-- 
cgit v1.2.3


From 473bd2d780d1699d81b25f57c0ec4de633a28eb8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 22:18:34 +0200
Subject: libceph: use timespec64 in for keepalive2 and ticket validity

ceph_con_keepalive_expired() is the last user of timespec_add() and some
of the last uses of ktime_get_real_ts().  Replacing this with timespec64
based interfaces  lets us remove that deprecated API.

I'm introducing new ceph_encode_timespec64()/ceph_decode_timespec64()
here that take timespec64 structures and convert to/from ceph_timespec,
which is defined to have an unsigned 32-bit tv_sec member. This extends
the range of valid times to year 2106, avoiding the year 2038 overflow.

The ceph file system portion still uses the old functions for inode
timestamps, this will be done separately after the VFS layer is converted.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h    | 20 +++++++++++++++++++-
 include/linux/ceph/messenger.h |  2 +-
 net/ceph/auth_x.c              | 14 +++++++-------
 net/ceph/auth_x.h              |  2 +-
 net/ceph/cls_lock_client.c     |  4 ++--
 net/ceph/messenger.c           | 20 ++++++++++----------
 6 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index d143ac8879c6..094b9b4a34f3 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -194,8 +194,26 @@ ceph_decode_skip_n(p, end, sizeof(u8), bad)
 	} while (0)
 
 /*
- * struct ceph_timespec <-> struct timespec
+ * struct ceph_timespec <-> struct timespec64
  */
+static inline void ceph_decode_timespec64(struct timespec64 *ts,
+					  const struct ceph_timespec *tv)
+{
+	/*
+	 * This will still overflow in year 2106.  We could extend
+	 * the protocol to steal two more bits from tv_nsec to
+	 * add three more 136 year epochs after that the way ext4
+	 * does if necessary.
+	 */
+	ts->tv_sec = (time64_t)le32_to_cpu(tv->tv_sec);
+	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
+}
+static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
+					  const struct timespec64 *ts)
+{
+	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
+	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
+}
 static inline void ceph_decode_timespec(struct timespec *ts,
 					const struct ceph_timespec *tv)
 {
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c7dfcb8a1fb2..a718b877c597 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -330,7 +330,7 @@ struct ceph_connection {
 	int in_base_pos;     /* bytes read */
 	__le64 in_temp_ack;  /* for reading an ack */
 
-	struct timespec last_keepalive_ack; /* keepalive2 ack stamp */
+	struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
 
 	struct delayed_work work;	    /* send|recv work */
 	unsigned long       delay;          /* current delay interval */
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 2f4a1baf5f52..b05c3a540a5a 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -149,12 +149,12 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	void *dp, *dend;
 	int dlen;
 	char is_enc;
-	struct timespec validity;
+	struct timespec64 validity;
 	void *tp, *tpend;
 	void **ptp;
 	struct ceph_crypto_key new_session_key = { 0 };
 	struct ceph_buffer *new_ticket_blob;
-	unsigned long new_expires, new_renew_after;
+	time64_t new_expires, new_renew_after;
 	u64 new_secret_id;
 	int ret;
 
@@ -189,11 +189,11 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	if (ret)
 		goto out;
 
-	ceph_decode_timespec(&validity, dp);
+	ceph_decode_timespec64(&validity, dp);
 	dp += sizeof(struct ceph_timespec);
-	new_expires = get_seconds() + validity.tv_sec;
+	new_expires = ktime_get_real_seconds() + validity.tv_sec;
 	new_renew_after = new_expires - (validity.tv_sec / 4);
-	dout(" expires=%lu renew_after=%lu\n", new_expires,
+	dout(" expires=%llu renew_after=%llu\n", new_expires,
 	     new_renew_after);
 
 	/* ticket blob for service */
@@ -385,13 +385,13 @@ static bool need_key(struct ceph_x_ticket_handler *th)
 	if (!th->have_key)
 		return true;
 
-	return get_seconds() >= th->renew_after;
+	return ktime_get_real_seconds() >= th->renew_after;
 }
 
 static bool have_key(struct ceph_x_ticket_handler *th)
 {
 	if (th->have_key) {
-		if (get_seconds() >= th->expires)
+		if (ktime_get_real_seconds() >= th->expires)
 			th->have_key = false;
 	}
 
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 454cb54568af..57ba99f7736f 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -22,7 +22,7 @@ struct ceph_x_ticket_handler {
 	u64 secret_id;
 	struct ceph_buffer *ticket_blob;
 
-	unsigned long renew_after, expires;
+	time64_t renew_after, expires;
 };
 
 #define CEPHX_AU_ENC_BUF_LEN	128  /* big enough for encrypted blob */
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 8d2032b2f225..2105a6eaa66c 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -32,7 +32,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
 	int desc_len = strlen(desc);
 	void *p, *end;
 	struct page *lock_op_page;
-	struct timespec mtime;
+	struct timespec64 mtime;
 	int ret;
 
 	lock_op_buf_size = name_len + sizeof(__le32) +
@@ -63,7 +63,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
 	ceph_encode_string(&p, end, desc, desc_len);
 	/* only support infinite duration */
 	memset(&mtime, 0, sizeof(mtime));
-	ceph_encode_timespec(p, &mtime);
+	ceph_encode_timespec64(p, &mtime);
 	p += sizeof(struct ceph_timespec);
 	ceph_encode_8(&p, flags);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c6413c360771..3f6336248509 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1417,11 +1417,11 @@ static void prepare_write_keepalive(struct ceph_connection *con)
 	dout("prepare_write_keepalive %p\n", con);
 	con_out_kvec_reset(con);
 	if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
-		struct timespec now;
+		struct timespec64 now;
 
-		ktime_get_real_ts(&now);
+		ktime_get_real_ts64(&now);
 		con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
-		ceph_encode_timespec(&con->out_temp_keepalive2, &now);
+		ceph_encode_timespec64(&con->out_temp_keepalive2, &now);
 		con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
 				 &con->out_temp_keepalive2);
 	} else {
@@ -2555,7 +2555,7 @@ static int read_keepalive_ack(struct ceph_connection *con)
 	int ret = read_partial(con, size, size, &ceph_ts);
 	if (ret <= 0)
 		return ret;
-	ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
+	ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts);
 	prepare_read_tag(con);
 	return 1;
 }
@@ -3223,12 +3223,12 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
 {
 	if (interval > 0 &&
 	    (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
-		struct timespec now;
-		struct timespec ts;
-		ktime_get_real_ts(&now);
-		jiffies_to_timespec(interval, &ts);
-		ts = timespec_add(con->last_keepalive_ack, ts);
-		return timespec_compare(&now, &ts) >= 0;
+		struct timespec64 now;
+		struct timespec64 ts;
+		ktime_get_real_ts64(&now);
+		jiffies_to_timespec64(interval, &ts);
+		ts = timespec64_add(con->last_keepalive_ack, ts);
+		return timespec64_compare(&now, &ts) >= 0;
 	}
 	return false;
 }
-- 
cgit v1.2.3


From fac02ddf910814c24f5d9d969dfdab5227f6f3eb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 22:18:37 +0200
Subject: libceph: use timespec64 for r_mtime

The request mtime field is used all over ceph, and is currently
represented as a 'timespec' structure in Linux. This changes it to
timespec64 to allow times beyond 2038, modifying all users at the
same time.

[ Remove now redundant ts variable in writepage_nounlock(). ]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             |  2 +-
 fs/ceph/addr.c                  | 12 +++++-------
 fs/ceph/file.c                  |  8 ++++----
 include/linux/ceph/osd_client.h |  6 +++---
 net/ceph/osd_client.c           |  8 ++++----
 5 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c680de15fae0..11f9be10e3fa 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1466,7 +1466,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
 	osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
-	ktime_get_real_ts(&osd_req->r_mtime);
+	ktime_get_real_ts64(&osd_req->r_mtime);
 	osd_req->r_data_offset = obj_request->ex.oe_off;
 }
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 292b3d72d725..afcc59ed7090 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode,
  */
 static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 {
-	struct timespec ts;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
 	struct ceph_fs_client *fsc;
@@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
 
 	set_page_writeback(page);
-	ts = timespec64_to_timespec(inode->i_mtime);
 	err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
 				   &ci->i_layout, snapc, page_off, len,
 				   ceph_wbc.truncate_seq,
 				   ceph_wbc.truncate_size,
-				   &ts, &page, 1);
+				   &inode->i_mtime, &page, 1);
 	if (err < 0) {
 		struct writeback_control tmp_wbc;
 		if (!wbc)
@@ -1134,7 +1132,7 @@ new_request:
 			pages = NULL;
 		}
 
-		req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+		req->r_mtime = inode->i_mtime;
 		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
 		BUG_ON(rc);
 		req = NULL;
@@ -1734,7 +1732,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 		goto out;
 	}
 
-	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+	req->r_mtime = inode->i_mtime;
 	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
 	if (!err)
 		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1776,7 +1774,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 			goto out_put;
 	}
 
-	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+	req->r_mtime = inode->i_mtime;
 	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
 	if (!err)
 		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1937,7 +1935,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 				     0, false, true);
 	err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
 
-	wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime);
+	wr_req->r_mtime = ci->vfs_inode.i_mtime;
 	err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
 
 	if (!err)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ad0bed99b1d5..2f3a30ca94bf 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -721,7 +721,7 @@ struct ceph_aio_request {
 	struct list_head osd_reqs;
 	unsigned num_reqs;
 	atomic_t pending_reqs;
-	struct timespec mtime;
+	struct timespec64 mtime;
 	struct ceph_cap_flush *prealloc_cf;
 };
 
@@ -923,7 +923,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	int num_pages = 0;
 	int flags;
 	int ret;
-	struct timespec mtime = timespec64_to_timespec(current_time(inode));
+	struct timespec64 mtime = current_time(inode);
 	size_t count = iov_iter_count(iter);
 	loff_t pos = iocb->ki_pos;
 	bool write = iov_iter_rw(iter) == WRITE;
@@ -1131,7 +1131,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 	int flags;
 	int ret;
 	bool check_caps = false;
-	struct timespec mtime = timespec64_to_timespec(current_time(inode));
+	struct timespec64 mtime = current_time(inode);
 	size_t count = iov_iter_count(from);
 
 	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -1663,7 +1663,7 @@ static int ceph_zero_partial_object(struct inode *inode,
 		goto out;
 	}
 
-	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+	req->r_mtime = inode->i_mtime;
 	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
 	if (!ret) {
 		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d6fe7e4df8cf..02096da01845 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -199,7 +199,7 @@ struct ceph_osd_request {
 	/* set by submitter */
 	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
 	struct ceph_snap_context *r_snapc;    /* for writes */
-	struct timespec r_mtime;              /* ditto */
+	struct timespec64 r_mtime;            /* ditto */
 	u64 r_data_offset;                    /* ditto */
 	bool r_linger;                        /* don't resend on failure */
 
@@ -253,7 +253,7 @@ struct ceph_osd_linger_request {
 	struct ceph_osd_request_target t;
 	u32 map_dne_bound;
 
-	struct timespec mtime;
+	struct timespec64 mtime;
 
 	struct kref kref;
 	struct mutex lock;
@@ -508,7 +508,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				struct ceph_snap_context *sc,
 				u64 off, u64 len,
 				u32 truncate_seq, u64 truncate_size,
-				struct timespec *mtime,
+				struct timespec64 *mtime,
 				struct page **pages, int nr_pages);
 
 /* watch/notify */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 93614cb5e03f..8002b8e9ce24 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1978,7 +1978,7 @@ static void encode_request_partial(struct ceph_osd_request *req,
 	p += sizeof(struct ceph_blkin_trace_info);
 
 	ceph_encode_32(&p, 0); /* client_inc, always 0 */
-	ceph_encode_timespec(p, &req->r_mtime);
+	ceph_encode_timespec64(p, &req->r_mtime);
 	p += sizeof(struct ceph_timespec);
 
 	encode_oloc(&p, end, &req->r_t.target_oloc);
@@ -4512,7 +4512,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
 	ceph_oid_copy(&lreq->t.base_oid, oid);
 	ceph_oloc_copy(&lreq->t.base_oloc, oloc);
 	lreq->t.flags = CEPH_OSD_FLAG_WRITE;
-	ktime_get_real_ts(&lreq->mtime);
+	ktime_get_real_ts64(&lreq->mtime);
 
 	lreq->reg_req = alloc_linger_request(lreq);
 	if (!lreq->reg_req) {
@@ -4570,7 +4570,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
 	ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
 	ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
 	req->r_flags = CEPH_OSD_FLAG_WRITE;
-	ktime_get_real_ts(&req->r_mtime);
+	ktime_get_real_ts64(&req->r_mtime);
 	osd_req_op_watch_init(req, 0, lreq->linger_id,
 			      CEPH_OSD_WATCH_OP_UNWATCH);
 
@@ -5136,7 +5136,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 			 struct ceph_snap_context *snapc,
 			 u64 off, u64 len,
 			 u32 truncate_seq, u64 truncate_size,
-			 struct timespec *mtime,
+			 struct timespec64 *mtime,
 			 struct page **pages, int num_pages)
 {
 	struct ceph_osd_request *req;
-- 
cgit v1.2.3


From f7e52d8efe8588c5d4b4c78eb33da81d89486c1a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 23 Jul 2018 14:11:40 +0200
Subject: libceph: remove now unused ceph_{en,de}code_timespec()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 094b9b4a34f3..a6c2a48d42e0 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -214,18 +214,6 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
 	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
 	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
 }
-static inline void ceph_decode_timespec(struct timespec *ts,
-					const struct ceph_timespec *tv)
-{
-	ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
-	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
-}
-static inline void ceph_encode_timespec(struct ceph_timespec *tv,
-					const struct timespec *ts)
-{
-	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
-	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
-}
 
 /*
  * sockaddr_storage <-> ceph_sockaddr
-- 
cgit v1.2.3


From 262614c4294d33b1f19e0d18c0091d9c329b544a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 26 Jul 2018 15:17:46 +0200
Subject: libceph: store ceph_auth_handshake pointer in ceph_connection

We already copy authorizer_reply_buf and authorizer_reply_buf_len into
ceph_connection.  Factoring out __prepare_write_connect() requires two
more: authorizer_buf and authorizer_buf_len.  Store the pointer to the
handshake in con->auth rather than piling on.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/messenger.h |  3 +--
 net/ceph/messenger.c           | 54 ++++++++++++++++++++----------------------
 2 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index a718b877c597..021718570b50 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -286,9 +286,8 @@ struct ceph_connection {
 				 attempt for this connection, client */
 	u32 peer_global_seq;  /* peer's global seq for this connection */
 
+	struct ceph_auth_handshake *auth;
 	int auth_retry;       /* true if we need a newer authorizer */
-	void *auth_reply_buf;   /* where to put the authorizer reply */
-	int auth_reply_buf_len;
 
 	struct mutex mutex;
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3f6336248509..b6ebd2cc16a1 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1434,24 +1434,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
  * Connection negotiation.
  */
 
-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
-						int *auth_proto)
+static int get_connect_authorizer(struct ceph_connection *con)
 {
 	struct ceph_auth_handshake *auth;
+	int auth_proto;
 
 	if (!con->ops->get_authorizer) {
+		con->auth = NULL;
 		con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
 		con->out_connect.authorizer_len = 0;
-		return NULL;
+		return 0;
 	}
 
-	auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
+	auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
 	if (IS_ERR(auth))
-		return auth;
+		return PTR_ERR(auth);
 
-	con->auth_reply_buf = auth->authorizer_reply_buf;
-	con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
-	return auth;
+	con->auth = auth;
+	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+	con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
+	return 0;
 }
 
 /*
@@ -1471,8 +1473,7 @@ static int prepare_write_connect(struct ceph_connection *con)
 {
 	unsigned int global_seq = get_global_seq(con->msgr, 0);
 	int proto;
-	int auth_proto;
-	struct ceph_auth_handshake *auth;
+	int ret;
 
 	switch (con->peer_name.type) {
 	case CEPH_ENTITY_TYPE_MON:
@@ -1499,20 +1500,15 @@ static int prepare_write_connect(struct ceph_connection *con)
 	con->out_connect.protocol_version = cpu_to_le32(proto);
 	con->out_connect.flags = 0;
 
-	auth_proto = CEPH_AUTH_UNKNOWN;
-	auth = get_connect_authorizer(con, &auth_proto);
-	if (IS_ERR(auth))
-		return PTR_ERR(auth);
-
-	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
-	con->out_connect.authorizer_len = auth ?
-		cpu_to_le32(auth->authorizer_buf_len) : 0;
+	ret = get_connect_authorizer(con);
+	if (ret)
+		return ret;
 
 	con_out_kvec_add(con, sizeof (con->out_connect),
 					&con->out_connect);
-	if (auth && auth->authorizer_buf_len)
-		con_out_kvec_add(con, auth->authorizer_buf_len,
-					auth->authorizer_buf);
+	if (con->auth)
+		con_out_kvec_add(con, con->auth->authorizer_buf_len,
+				 con->auth->authorizer_buf);
 
 	con->out_more = 0;
 	con_flag_set(con, CON_FLAG_WRITE_PENDING);
@@ -1781,11 +1777,14 @@ static int read_partial_connect(struct ceph_connection *con)
 	if (ret <= 0)
 		goto out;
 
-	size = le32_to_cpu(con->in_reply.authorizer_len);
-	end += size;
-	ret = read_partial(con, end, size, con->auth_reply_buf);
-	if (ret <= 0)
-		goto out;
+	if (con->auth) {
+		size = le32_to_cpu(con->in_reply.authorizer_len);
+		end += size;
+		ret = read_partial(con, end, size,
+				   con->auth->authorizer_reply_buf);
+		if (ret <= 0)
+			goto out;
+	}
 
 	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
 	     con, (int)con->in_reply.tag,
@@ -1793,7 +1792,6 @@ static int read_partial_connect(struct ceph_connection *con)
 	     le32_to_cpu(con->in_reply.global_seq));
 out:
 	return ret;
-
 }
 
 /*
@@ -2076,7 +2074,7 @@ static int process_connect(struct ceph_connection *con)
 
 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
 
-	if (con->auth_reply_buf) {
+	if (con->auth) {
 		/*
 		 * Any connection that defines ->get_authorizer()
 		 * should also define ->verify_authorizer_reply().
-- 
cgit v1.2.3


From 6daca13d2e72bedaaacfc08f873114c9307d5aea Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 27 Jul 2018 19:18:34 +0200
Subject: libceph: add authorizer challenge

When a client authenticates with a service, an authorizer is sent with
a nonce to the service (ceph_x_authorize_[ab]) and the service responds
with a mutation of that nonce (ceph_x_authorize_reply).  This lets the
client verify the service is who it says it is but it doesn't protect
against a replay: someone can trivially capture the exchange and reuse
the same authorizer to authenticate themselves.

Allow the service to reject an initial authorizer with a random
challenge (ceph_x_authorize_challenge).  The client then has to respond
with an updated authorizer proving they are able to decrypt the
service's challenge and that the new authorizer was produced for this
specific connection instance.

The accepting side requires this challenge and response unconditionally
if the client side advertises they have CEPHX_V2 feature bit.

This addresses CVE-2018-1128.

Link: http://tracker.ceph.com/issues/24836
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/mds_client.c           | 11 +++++++
 include/linux/ceph/auth.h      |  8 +++++
 include/linux/ceph/messenger.h |  3 ++
 include/linux/ceph/msgr.h      |  2 +-
 net/ceph/auth.c                | 16 ++++++++++
 net/ceph/auth_x.c              | 72 +++++++++++++++++++++++++++++++++++++++---
 net/ceph/auth_x_protocol.h     |  7 ++++
 net/ceph/messenger.c           | 17 +++++++++-
 net/ceph/osd_client.c          | 11 +++++++
 9 files changed, 140 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f5a299daae95..4fc7582233db 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4154,6 +4154,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 	return auth;
 }
 
+static int add_authorizer_challenge(struct ceph_connection *con,
+				    void *challenge_buf, int challenge_buf_len)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+	return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
+					    challenge_buf, challenge_buf_len);
+}
 
 static int verify_authorizer_reply(struct ceph_connection *con)
 {
@@ -4217,6 +4227,7 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.put = con_put,
 	.dispatch = dispatch,
 	.get_authorizer = get_authorizer,
+	.add_authorizer_challenge = add_authorizer_challenge,
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index e931da8424a4..6728c2ee0205 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
 	/* ensure that an existing authorizer is up to date */
 	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
 				 struct ceph_auth_handshake *auth);
+	int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
+					struct ceph_authorizer *a,
+					void *challenge_buf,
+					int challenge_buf_len);
 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
 				       struct ceph_authorizer *a);
 	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
 extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 				       int peer_type,
 				       struct ceph_auth_handshake *a);
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+				       struct ceph_authorizer *a,
+				       void *challenge_buf,
+				       int challenge_buf_len);
 extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 					     struct ceph_authorizer *a);
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 021718570b50..fc2b4491ee0a 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -31,6 +31,9 @@ struct ceph_connection_operations {
 	struct ceph_auth_handshake *(*get_authorizer) (
 				struct ceph_connection *con,
 			       int *proto, int force_new);
+	int (*add_authorizer_challenge)(struct ceph_connection *con,
+					void *challenge_buf,
+					int challenge_buf_len);
 	int (*verify_authorizer_reply) (struct ceph_connection *con);
 	int (*invalidate_authorizer)(struct ceph_connection *con);
 
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 73ae2a926548..9e50aede46c8 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -91,7 +91,7 @@ struct ceph_entity_inst {
 #define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
 #define CEPH_MSGR_TAG_KEEPALIVE2    14 /* keepalive2 byte + ceph_timespec */
 #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
-
+#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16  /* cephx v2 doing server challenge */
 
 /*
  * connection negotiation
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index dbde2b3c3c15..fbeee068ea14 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 }
 EXPORT_SYMBOL(ceph_auth_update_authorizer);
 
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+				       struct ceph_authorizer *a,
+				       void *challenge_buf,
+				       int challenge_buf_len)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->add_authorizer_challenge)
+		ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
+							challenge_buf_len);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
+
 int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 				      struct ceph_authorizer *a)
 {
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 61cccb93f653..512eed4291fe 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -295,7 +295,8 @@ bad:
  * authorizer.  The first part (ceph_x_authorize_a) should already be
  * encoded.
  */
-static int encrypt_authorizer(struct ceph_x_authorizer *au)
+static int encrypt_authorizer(struct ceph_x_authorizer *au,
+			      u64 *server_challenge)
 {
 	struct ceph_x_authorize_a *msg_a;
 	struct ceph_x_authorize_b *msg_b;
@@ -308,16 +309,28 @@ static int encrypt_authorizer(struct ceph_x_authorizer *au)
 	end = au->buf->vec.iov_base + au->buf->vec.iov_len;
 
 	msg_b = p + ceph_x_encrypt_offset();
-	msg_b->struct_v = 1;
+	msg_b->struct_v = 2;
 	msg_b->nonce = cpu_to_le64(au->nonce);
+	if (server_challenge) {
+		msg_b->have_challenge = 1;
+		msg_b->server_challenge_plus_one =
+		    cpu_to_le64(*server_challenge + 1);
+	} else {
+		msg_b->have_challenge = 0;
+		msg_b->server_challenge_plus_one = 0;
+	}
 
 	ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
 	if (ret < 0)
 		return ret;
 
 	p += ret;
-	WARN_ON(p > end);
-	au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+	if (server_challenge) {
+		WARN_ON(p != end);
+	} else {
+		WARN_ON(p > end);
+		au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+	}
 
 	return 0;
 }
@@ -382,7 +395,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	     le64_to_cpu(msg_a->ticket_blob.secret_id));
 
 	get_random_bytes(&au->nonce, sizeof(au->nonce));
-	ret = encrypt_authorizer(au);
+	ret = encrypt_authorizer(au, NULL);
 	if (ret) {
 		pr_err("failed to encrypt authorizer: %d", ret);
 		goto out_au;
@@ -664,6 +677,54 @@ static int ceph_x_update_authorizer(
 	return 0;
 }
 
+static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
+				       void *challenge_buf,
+				       int challenge_buf_len,
+				       u64 *server_challenge)
+{
+	struct ceph_x_authorize_challenge *ch =
+	    challenge_buf + sizeof(struct ceph_x_encrypt_header);
+	int ret;
+
+	/* no leading len */
+	ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
+			       challenge_buf_len);
+	if (ret < 0)
+		return ret;
+	if (ret < sizeof(*ch)) {
+		pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
+		return -EINVAL;
+	}
+
+	*server_challenge = le64_to_cpu(ch->server_challenge);
+	return 0;
+}
+
+static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
+					   struct ceph_authorizer *a,
+					   void *challenge_buf,
+					   int challenge_buf_len)
+{
+	struct ceph_x_authorizer *au = (void *)a;
+	u64 server_challenge;
+	int ret;
+
+	ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
+					  &server_challenge);
+	if (ret) {
+		pr_err("failed to decrypt authorize challenge: %d", ret);
+		return ret;
+	}
+
+	ret = encrypt_authorizer(au, &server_challenge);
+	if (ret) {
+		pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 					  struct ceph_authorizer *a)
 {
@@ -816,6 +877,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
 	.handle_reply = ceph_x_handle_reply,
 	.create_authorizer = ceph_x_create_authorizer,
 	.update_authorizer = ceph_x_update_authorizer,
+	.add_authorizer_challenge = ceph_x_add_authorizer_challenge,
 	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
 	.invalidate_authorizer = ceph_x_invalidate_authorizer,
 	.reset =  ceph_x_reset,
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 32c13d763b9a..24b0b74564d0 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -70,6 +70,13 @@ struct ceph_x_authorize_a {
 struct ceph_x_authorize_b {
 	__u8 struct_v;
 	__le64 nonce;
+	__u8 have_challenge;
+	__le64 server_challenge_plus_one;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_challenge {
+	__u8 struct_v;
+	__le64 server_challenge;
 } __attribute__ ((packed));
 
 struct ceph_x_authorize_reply {
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 500cc3da586f..e915c8bce117 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2080,9 +2080,24 @@ static int process_connect(struct ceph_connection *con)
 	if (con->auth) {
 		/*
 		 * Any connection that defines ->get_authorizer()
-		 * should also define ->verify_authorizer_reply().
+		 * should also define ->add_authorizer_challenge() and
+		 * ->verify_authorizer_reply().
+		 *
 		 * See get_connect_authorizer().
 		 */
+		if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
+			ret = con->ops->add_authorizer_challenge(
+				    con, con->auth->authorizer_reply_buf,
+				    le32_to_cpu(con->in_reply.authorizer_len));
+			if (ret < 0)
+				return ret;
+
+			con_out_kvec_reset(con);
+			__prepare_write_connect(con);
+			prepare_read_connect(con);
+			return 0;
+		}
+
 		ret = con->ops->verify_authorizer_reply(con);
 		if (ret < 0) {
 			con->error_msg = "bad authorize reply";
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 8002b8e9ce24..60934bd8796c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5393,6 +5393,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 	return auth;
 }
 
+static int add_authorizer_challenge(struct ceph_connection *con,
+				    void *challenge_buf, int challenge_buf_len)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+	return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
+					    challenge_buf, challenge_buf_len);
+}
 
 static int verify_authorizer_reply(struct ceph_connection *con)
 {
@@ -5442,6 +5452,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.put = put_osd_con,
 	.dispatch = dispatch,
 	.get_authorizer = get_authorizer,
+	.add_authorizer_challenge = add_authorizer_challenge,
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
-- 
cgit v1.2.3


From cc255c76c70f7a87d97939621eae04b600d9f4a1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 27 Jul 2018 19:25:32 +0200
Subject: libceph: implement CEPHX_V2 calculation mode

Derive the signature from the entire buffer (both AES cipher blocks)
instead of using just the first half of the first block, leaving out
data_crc entirely.

This addresses CVE-2018-1129.

Link: http://tracker.ceph.com/issues/24837
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h |  7 ++--
 net/ceph/auth_x.c                  | 73 +++++++++++++++++++++++++++++---------
 2 files changed, 60 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 3901927cf6a0..6b92b3395fa9 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
 DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
 DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
 DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
-DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING)  // *do not share this bit*
+DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
+DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2)             // *do not share this bit*
 
-DEFINE_CEPH_FEATURE(61, 1, RESERVED2)          // unused, but slow down!
 DEFINE_CEPH_FEATURE(62, 1, RESERVED)           // do not use; used as a sentinal
 DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
 
@@ -210,7 +210,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_SERVER_JEWEL |		\
 	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
-	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
+	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING |	\
+	 CEPH_FEATURE_CEPHX_V2)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 512eed4291fe..462786f571e7 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -9,6 +9,7 @@
 
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/messenger.h>
 
@@ -803,26 +804,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
 			  __le64 *psig)
 {
 	void *enc_buf = au->enc_buf;
-	struct {
-		__le32 len;
-		__le32 header_crc;
-		__le32 front_crc;
-		__le32 middle_crc;
-		__le32 data_crc;
-	} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
 	int ret;
 
-	sigblock->len = cpu_to_le32(4*sizeof(u32));
-	sigblock->header_crc = msg->hdr.crc;
-	sigblock->front_crc = msg->footer.front_crc;
-	sigblock->middle_crc = msg->footer.middle_crc;
-	sigblock->data_crc =  msg->footer.data_crc;
-	ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
-			     sizeof(*sigblock));
-	if (ret < 0)
-		return ret;
+	if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) {
+		struct {
+			__le32 len;
+			__le32 header_crc;
+			__le32 front_crc;
+			__le32 middle_crc;
+			__le32 data_crc;
+		} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
+
+		sigblock->len = cpu_to_le32(4*sizeof(u32));
+		sigblock->header_crc = msg->hdr.crc;
+		sigblock->front_crc = msg->footer.front_crc;
+		sigblock->middle_crc = msg->footer.middle_crc;
+		sigblock->data_crc =  msg->footer.data_crc;
+
+		ret = ceph_x_encrypt(&au->session_key, enc_buf,
+				     CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
+		if (ret < 0)
+			return ret;
+
+		*psig = *(__le64 *)(enc_buf + sizeof(u32));
+	} else {
+		struct {
+			__le32 header_crc;
+			__le32 front_crc;
+			__le32 front_len;
+			__le32 middle_crc;
+			__le32 middle_len;
+			__le32 data_crc;
+			__le32 data_len;
+			__le32 seq_lower_word;
+		} __packed *sigblock = enc_buf;
+		struct {
+			__le64 a, b, c, d;
+		} __packed *penc = enc_buf;
+		int ciphertext_len;
+
+		sigblock->header_crc = msg->hdr.crc;
+		sigblock->front_crc = msg->footer.front_crc;
+		sigblock->front_len = msg->hdr.front_len;
+		sigblock->middle_crc = msg->footer.middle_crc;
+		sigblock->middle_len = msg->hdr.middle_len;
+		sigblock->data_crc =  msg->footer.data_crc;
+		sigblock->data_len = msg->hdr.data_len;
+		sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
+
+		/* no leading len, no ceph_x_encrypt_header */
+		ret = ceph_crypt(&au->session_key, true, enc_buf,
+				 CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
+				 &ciphertext_len);
+		if (ret)
+			return ret;
+
+		*psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
+	}
 
-	*psig = *(__le64 *)(enc_buf + sizeof(u32));
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2afc9166f79b8f6da5f347f48515215ceee4ae37 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 2 Aug 2018 10:51:40 -0700
Subject: scsi: sysfs: Introduce sysfs_{un,}break_active_protection()

Introduce these two functions and export them such that the next patch
can add calls to these functions from the SCSI core.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 fs/sysfs/file.c       | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h | 14 ++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5c13f29bfcdb..118fa197a35f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -405,6 +405,50 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
+/**
+ * sysfs_break_active_protection - break "active" protection
+ * @kobj: The kernel object @attr is associated with.
+ * @attr: The attribute to break the "active" protection for.
+ *
+ * With sysfs, just like kernfs, deletion of an attribute is postponed until
+ * all active .show() and .store() callbacks have finished unless this function
+ * is called. Hence this function is useful in methods that implement self
+ * deletion.
+ */
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr)
+{
+	struct kernfs_node *kn;
+
+	kobject_get(kobj);
+	kn = kernfs_find_and_get(kobj->sd, attr->name);
+	if (kn)
+		kernfs_break_active_protection(kn);
+	return kn;
+}
+EXPORT_SYMBOL_GPL(sysfs_break_active_protection);
+
+/**
+ * sysfs_unbreak_active_protection - restore "active" protection
+ * @kn: Pointer returned by sysfs_break_active_protection().
+ *
+ * Undo the effects of sysfs_break_active_protection(). Since this function
+ * calls kernfs_put() on the kernfs node that corresponds to the 'attr'
+ * argument passed to sysfs_break_active_protection() that attribute may have
+ * been removed between the sysfs_break_active_protection() and
+ * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after
+ * this function has returned.
+ */
+void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+	struct kobject *kobj = kn->parent->priv;
+
+	kernfs_unbreak_active_protection(kn);
+	kernfs_put(kn);
+	kobject_put(kobj);
+}
+EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection);
+
 /**
  * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
  * @kobj: object we're acting for
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b8bfdc173ec0..3c12198c0103 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -237,6 +237,9 @@ int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj,
 				  const struct attribute *attr, umode_t mode);
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr);
+void sysfs_unbreak_active_protection(struct kernfs_node *kn);
 void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
 			  const void *ns);
 bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr);
@@ -350,6 +353,17 @@ static inline int sysfs_chmod_file(struct kobject *kobj,
 	return 0;
 }
 
+static inline struct kernfs_node *
+sysfs_break_active_protection(struct kobject *kobj,
+			      const struct attribute *attr)
+{
+	return NULL;
+}
+
+static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+}
+
 static inline void sysfs_remove_file_ns(struct kobject *kobj,
 					const struct attribute *attr,
 					const void *ns)
-- 
cgit v1.2.3


From e7d0748dd71695b94f3a35c8bdc05226a7f3d919 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 2 Aug 2018 15:22:13 -0600
Subject: block: Switch struct packet_command to use struct scsi_sense_hdr

There is a lot of needless struct request_sense usage in the CDROM
code. These can all be struct scsi_sense_hdr instead, to avoid any
confusion over their respective structure sizes. This patch is a lot
of noise changing "sense" to "sshdr", but the final code is more
readable to distinguish between "sense" meaning "struct request_sense"
and "sshdr" meaning "struct scsi_sense_hdr".

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/pktcdvd.c    | 36 ++++++++++++++++++------------------
 drivers/cdrom/cdrom.c      | 22 +++++++++++-----------
 drivers/ide/ide-cd.c       | 11 ++++++-----
 drivers/ide/ide-cd.h       |  4 ++--
 drivers/ide/ide-cd_ioctl.c | 30 +++++++++++++++---------------
 drivers/scsi/sr_ioctl.c    | 22 +++++++++-------------
 include/linux/cdrom.h      |  3 ++-
 7 files changed, 63 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 9bb7721c26fc..e285413d4a75 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -748,13 +748,13 @@ static const char *sense_key_string(__u8 index)
 static void pkt_dump_sense(struct pktcdvd_device *pd,
 			   struct packet_command *cgc)
 {
-	struct request_sense *sense = cgc->sense;
+	struct scsi_sense_hdr *sshdr = cgc->sshdr;
 
-	if (sense)
+	if (sshdr)
 		pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
 			CDROM_PACKET_SIZE, cgc->cmd,
-			sense->sense_key, sense->asc, sense->ascq,
-			sense_key_string(sense->sense_key));
+			sshdr->sense_key, sshdr->asc, sshdr->ascq,
+			sense_key_string(sshdr->sense_key));
 	else
 		pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
 }
@@ -787,11 +787,11 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
 				unsigned write_speed, unsigned read_speed)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	int ret;
 
 	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.cmd[0] = GPCMD_SET_SPEED;
 	cgc.cmd[2] = (read_speed >> 8) & 0xff;
 	cgc.cmd[3] = read_speed & 0xff;
@@ -1651,7 +1651,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
 static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	write_param_page *wp;
 	char buffer[128];
 	int ret, size;
@@ -1662,7 +1662,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 
 	memset(buffer, 0, sizeof(buffer));
 	init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
 	if (ret) {
 		pkt_dump_sense(pd, &cgc);
@@ -1678,7 +1678,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 	 * now get it all
 	 */
 	init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
 	if (ret) {
 		pkt_dump_sense(pd, &cgc);
@@ -1916,12 +1916,12 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
 						int set)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	unsigned char buf[64];
 	int ret;
 
 	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.buflen = pd->mode_offset + 12;
 
 	/*
@@ -1962,14 +1962,14 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
 						unsigned *write_speed)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	unsigned char buf[256+18];
 	unsigned char *cap_buf;
 	int ret, offset;
 
 	cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
 	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 
 	ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
 	if (ret) {
@@ -2023,13 +2023,13 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 						unsigned *speed)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	unsigned char buf[64];
 	unsigned int size, st, sp;
 	int ret;
 
 	init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	cgc.cmd[1] = 2;
 	cgc.cmd[2] = 4; /* READ ATIP */
@@ -2044,7 +2044,7 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 		size = sizeof(buf);
 
 	init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	cgc.cmd[1] = 2;
 	cgc.cmd[2] = 4;
@@ -2095,13 +2095,13 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	int ret;
 
 	pkt_dbg(2, pd, "Performing OPC\n");
 
 	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.timeout = 60*HZ;
 	cgc.cmd[0] = GPCMD_SEND_OPC;
 	cgc.cmd[1] = 1;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index a78b8e7085e9..86619472d916 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -345,10 +345,10 @@ static LIST_HEAD(cdrom_list);
 int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
 			       struct packet_command *cgc)
 {
-	if (cgc->sense) {
-		cgc->sense->sense_key = 0x05;
-		cgc->sense->asc = 0x20;
-		cgc->sense->ascq = 0x00;
+	if (cgc->sshdr) {
+		cgc->sshdr->sense_key = 0x05;
+		cgc->sshdr->asc = 0x20;
+		cgc->sshdr->ascq = 0x00;
 	}
 
 	cgc->stat = -EIO;
@@ -2943,7 +2943,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
 					      struct packet_command *cgc,
 					      int cmd)
 {
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	struct cdrom_msf msf;
 	int blocksize = 0, format = 0, lba;
 	int ret;
@@ -2971,13 +2971,13 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
 	if (cgc->buffer == NULL)
 		return -ENOMEM;
 
-	memset(&sense, 0, sizeof(sense));
-	cgc->sense = &sense;
+	memset(&sshdr, 0, sizeof(sshdr));
+	cgc->sshdr = &sshdr;
 	cgc->data_direction = CGC_DATA_READ;
 	ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize);
-	if (ret && sense.sense_key == 0x05 &&
-	    sense.asc == 0x20 &&
-	    sense.ascq == 0x00) {
+	if (ret && sshdr.sense_key == 0x05 &&
+	    sshdr.asc == 0x20 &&
+	    sshdr.ascq == 0x00) {
 		/*
 		 * SCSI-II devices are not required to support
 		 * READ_CD, so let's try switching block size
@@ -2986,7 +2986,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
 		ret = cdrom_switch_blocksize(cdi, blocksize);
 		if (ret)
 			goto out;
-		cgc->sense = NULL;
+		cgc->sshdr = NULL;
 		ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1);
 		ret |= cdrom_switch_blocksize(cdi, blocksize);
 	}
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index a37dd381d307..a24cdff01865 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -419,7 +419,7 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
 
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		    int write, void *buffer, unsigned *bufflen,
-		    struct request_sense *sense, int timeout,
+		    struct scsi_sense_hdr *sshdr, int timeout,
 		    req_flags_t rq_flags)
 {
 	struct cdrom_info *info = drive->driver_data;
@@ -456,8 +456,9 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 
 		if (buffer)
 			*bufflen = scsi_req(rq)->resid_len;
-		if (sense)
-			memcpy(sense, scsi_req(rq)->sense, sizeof(*sense));
+		if (sshdr)
+			scsi_normalize_sense(scsi_req(rq)->sense,
+					     scsi_req(rq)->sense_len, sshdr);
 
 		/*
 		 * FIXME: we should probably abort/retry or something in case of
@@ -864,7 +865,7 @@ static void msf_from_bcd(struct atapi_msf *msf)
 	msf->frame  = bcd2bin(msf->frame);
 }
 
-int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
+int cdrom_check_status(ide_drive_t *drive, struct scsi_sense_hdr *sshdr)
 {
 	struct cdrom_info *info = drive->driver_data;
 	struct cdrom_device_info *cdi;
@@ -886,7 +887,7 @@ int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 	 */
 	cmd[7] = cdi->sanyo_slot % 3;
 
-	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sense, 0, RQF_QUIET);
+	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sshdr, 0, RQF_QUIET);
 }
 
 static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index fc162fbb6629..a69dc7f61c4d 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -98,11 +98,11 @@ void ide_cd_log_error(const char *, struct request *, struct request_sense *);
 
 /* ide-cd.c functions used by ide-cd_ioctl.c */
 int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *,
-		    unsigned *, struct request_sense *, int, req_flags_t);
+		    unsigned *, struct scsi_sense_hdr *, int, req_flags_t);
 int ide_cd_read_toc(ide_drive_t *);
 int ide_cdrom_get_capabilities(ide_drive_t *, u8 *);
 void ide_cdrom_update_speed(ide_drive_t *, u8 *);
-int cdrom_check_status(ide_drive_t *, struct request_sense *);
+int cdrom_check_status(ide_drive_t *, struct scsi_sense_hdr *);
 
 /* ide-cd_ioctl.c */
 int ide_cdrom_open_real(struct cdrom_device_info *, int);
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 14540544413c..4a6e1a413ead 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -43,14 +43,14 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 {
 	ide_drive_t *drive = cdi->handle;
 	struct media_event_desc med;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	int stat;
 
 	if (slot_nr != CDSL_CURRENT)
 		return -EINVAL;
 
-	stat = cdrom_check_status(drive, &sense);
-	if (!stat || sense.sense_key == UNIT_ATTENTION)
+	stat = cdrom_check_status(drive, &sshdr);
+	if (!stat || sshdr.sense_key == UNIT_ATTENTION)
 		return CDS_DISC_OK;
 
 	if (!cdrom_get_media_event(cdi, &med)) {
@@ -62,8 +62,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 			return CDS_NO_DISC;
 	}
 
-	if (sense.sense_key == NOT_READY && sense.asc == 0x04
-			&& sense.ascq == 0x04)
+	if (sshdr.sense_key == NOT_READY && sshdr.asc == 0x04
+			&& sshdr.ascq == 0x04)
 		return CDS_DISC_OK;
 
 	/*
@@ -71,8 +71,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 	 * just return TRAY_OPEN since ATAPI doesn't provide
 	 * any other way to detect this...
 	 */
-	if (sense.sense_key == NOT_READY) {
-		if (sense.asc == 0x3a && sense.ascq == 1)
+	if (sshdr.sense_key == NOT_READY) {
+		if (sshdr.asc == 0x3a && sshdr.ascq == 1)
 			return CDS_NO_DISC;
 		else
 			return CDS_TRAY_OPEN;
@@ -135,7 +135,7 @@ int cdrom_eject(ide_drive_t *drive, int ejectflag)
 static
 int ide_cd_lockdoor(ide_drive_t *drive, int lockflag)
 {
-	struct request_sense my_sense, *sense = &my_sense;
+	struct scsi_sense_hdr sshdr;
 	int stat;
 
 	/* If the drive cannot lock the door, just pretend. */
@@ -150,14 +150,14 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag)
 		cmd[4] = lockflag ? 1 : 0;
 
 		stat = ide_cd_queue_pc(drive, cmd, 0, NULL, NULL,
-				       sense, 0, 0);
+				       &sshdr, 0, 0);
 	}
 
 	/* If we got an illegal field error, the drive
 	   probably cannot lock the door. */
 	if (stat != 0 &&
-	    sense->sense_key == ILLEGAL_REQUEST &&
-	    (sense->asc == 0x24 || sense->asc == 0x20)) {
+	    sshdr.sense_key == ILLEGAL_REQUEST &&
+	    (sshdr.asc == 0x24 || sshdr.asc == 0x20)) {
 		printk(KERN_ERR "%s: door locking not supported\n",
 			drive->name);
 		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
@@ -165,7 +165,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag)
 	}
 
 	/* no medium, that's alright. */
-	if (stat != 0 && sense->sense_key == NOT_READY && sense->asc == 0x3a)
+	if (stat != 0 && sshdr.sense_key == NOT_READY && sshdr.asc == 0x3a)
 		stat = 0;
 
 	if (stat == 0) {
@@ -451,8 +451,8 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	   layer. the packet must be complete, as we do not
 	   touch it at all. */
 
-	if (cgc->sense)
-		memset(cgc->sense, 0, sizeof(struct request_sense));
+	if (cgc->sshdr)
+		memset(cgc->sshdr, 0, sizeof(*cgc->sshdr));
 
 	if (cgc->quiet)
 		flags |= RQF_QUIET;
@@ -460,7 +460,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	cgc->stat = ide_cd_queue_pc(drive, cgc->cmd,
 				    cgc->data_direction == CGC_DATA_WRITE,
 				    cgc->buffer, &len,
-				    cgc->sense, cgc->timeout, flags);
+				    cgc->sshdr, cgc->timeout, flags);
 	if (!cgc->stat)
 		cgc->buflen -= len;
 	return cgc->stat;
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 35fab1e18adc..ffcf902da390 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -186,14 +186,13 @@ static int sr_play_trkind(struct cdrom_device_info *cdi,
 int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 {
 	struct scsi_device *SDev;
-	struct scsi_sense_hdr sshdr;
+	struct scsi_sense_hdr local_sshdr, *sshdr = &local_sshdr;
 	int result, err = 0, retries = 0;
-	unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE], *senseptr = NULL;
 
 	SDev = cd->device;
 
-	if (cgc->sense)
-		senseptr = sense_buffer;
+	if (cgc->sshdr)
+		sshdr = cgc->sshdr;
 
       retry:
 	if (!scsi_block_when_processing_errors(SDev)) {
@@ -202,15 +201,12 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 	}
 
 	result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
-			      cgc->buffer, cgc->buflen, senseptr, &sshdr,
+			      cgc->buffer, cgc->buflen, NULL, sshdr,
 			      cgc->timeout, IOCTL_RETRIES, 0, 0, NULL);
 
-	if (cgc->sense)
-		memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense));
-
 	/* Minimal error checking.  Ignore cases we know about, and report the rest. */
 	if (driver_byte(result) != 0) {
-		switch (sshdr.sense_key) {
+		switch (sshdr->sense_key) {
 		case UNIT_ATTENTION:
 			SDev->changed = 1;
 			if (!cgc->quiet)
@@ -221,8 +217,8 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 			err = -ENOMEDIUM;
 			break;
 		case NOT_READY:	/* This happens if there is no disc in drive */
-			if (sshdr.asc == 0x04 &&
-			    sshdr.ascq == 0x01) {
+			if (sshdr->asc == 0x04 &&
+			    sshdr->ascq == 0x01) {
 				/* sense: Logical unit is in process of becoming ready */
 				if (!cgc->quiet)
 					sr_printk(KERN_INFO, cd,
@@ -245,8 +241,8 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 			break;
 		case ILLEGAL_REQUEST:
 			err = -EIO;
-			if (sshdr.asc == 0x20 &&
-			    sshdr.ascq == 0x00)
+			if (sshdr->asc == 0x20 &&
+			    sshdr->ascq == 0x00)
 				/* sense: Invalid command operation code */
 				err = -EDRIVE_CANT_DO_THIS;
 			break;
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index e75dfd1f1dec..528271c60018 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -13,6 +13,7 @@
 
 #include <linux/fs.h>		/* not really needed, later.. */
 #include <linux/list.h>
+#include <scsi/scsi_common.h>
 #include <uapi/linux/cdrom.h>
 
 struct packet_command
@@ -21,7 +22,7 @@ struct packet_command
 	unsigned char 		*buffer;
 	unsigned int 		buflen;
 	int			stat;
-	struct request_sense	*sense;
+	struct scsi_sense_hdr	*sshdr;
 	unsigned char		data_direction;
 	int			quiet;
 	int			timeout;
-- 
cgit v1.2.3


From 9a47249d444d344051c7c0e909fad0e88515a5c2 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 31 Jul 2018 21:11:00 +0200
Subject: random: Make crng state queryable

It is very useful to be able to know whether or not get_random_bytes_wait
/ wait_for_random_bytes is going to block or not, or whether plain
get_random_bytes is going to return good randomness or bad randomness.

The particular use case is for mitigating certain attacks in WireGuard.
A handshake packet arrives and is queued up. Elsewhere a worker thread
takes items from the queue and processes them. In replying to these
items, it needs to use some random data, and it has to be good random
data. If we simply block until we can have good randomness, then it's
possible for an attacker to fill the queue up with packets waiting to be
processed. Upon realizing the queue is full, WireGuard will detect that
it's under a denial of service attack, and behave accordingly. A better
approach is just to drop incoming handshake packets if the crng is not
yet initialized.

This patch, therefore, makes that information directly accessible.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 15 +++++++++++++++
 include/linux/random.h |  1 +
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 561082d46a82..bf5f99fc36f1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1663,6 +1663,21 @@ int wait_for_random_bytes(void)
 }
 EXPORT_SYMBOL(wait_for_random_bytes);
 
+/*
+ * Returns whether or not the urandom pool has been seeded and thus guaranteed
+ * to supply cryptographically secure random numbers. This applies to: the
+ * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
+ * ,u64,int,long} family of functions.
+ *
+ * Returns: true if the urandom pool has been seeded.
+ *          false if the urandom pool has not been seeded.
+ */
+bool rng_is_initialized(void)
+{
+	return crng_ready();
+}
+EXPORT_SYMBOL(rng_is_initialized);
+
 /*
  * Add a callback function that will be invoked when the nonblocking
  * pool is initialised.
diff --git a/include/linux/random.h b/include/linux/random.h
index f1c9bc5cd231..445a0ea4ff49 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -36,6 +36,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
 
 extern void get_random_bytes(void *buf, int nbytes);
 extern int wait_for_random_bytes(void);
+extern bool rng_is_initialized(void);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
-- 
cgit v1.2.3


From 0a4c58f5702858822621fa1177c7d3475f181ccb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:17 -0700
Subject: bpf: add ability to charge bpf maps memory dynamically

This commits extends existing bpf maps memory charging API
to support dynamic charging/uncharging.

This is required to account memory used by maps,
if all entries are created dynamically after
the map initialization.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h  |  2 ++
 kernel/bpf/syscall.c | 58 ++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 45 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b5ad95cf339..5a4a256473c3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -435,6 +435,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
+int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
+void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
 void *bpf_map_area_alloc(size_t size, int numa_node);
 void bpf_map_area_free(void *base);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a31a1ba0f8ea..7958252a4d29 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -181,32 +181,60 @@ int bpf_map_precharge_memlock(u32 pages)
 	return 0;
 }
 
-static int bpf_map_charge_memlock(struct bpf_map *map)
+static int bpf_charge_memlock(struct user_struct *user, u32 pages)
 {
-	struct user_struct *user = get_current_user();
-	unsigned long memlock_limit;
+	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
-	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
+		atomic_long_sub(pages, &user->locked_vm);
+		return -EPERM;
+	}
+	return 0;
+}
 
-	atomic_long_add(map->pages, &user->locked_vm);
+static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
+{
+	atomic_long_sub(pages, &user->locked_vm);
+}
+
+static int bpf_map_init_memlock(struct bpf_map *map)
+{
+	struct user_struct *user = get_current_user();
+	int ret;
 
-	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
-		atomic_long_sub(map->pages, &user->locked_vm);
+	ret = bpf_charge_memlock(user, map->pages);
+	if (ret) {
 		free_uid(user);
-		return -EPERM;
+		return ret;
 	}
 	map->user = user;
-	return 0;
+	return ret;
 }
 
-static void bpf_map_uncharge_memlock(struct bpf_map *map)
+static void bpf_map_release_memlock(struct bpf_map *map)
 {
 	struct user_struct *user = map->user;
-
-	atomic_long_sub(map->pages, &user->locked_vm);
+	bpf_uncharge_memlock(user, map->pages);
 	free_uid(user);
 }
 
+int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
+{
+	int ret;
+
+	ret = bpf_charge_memlock(map->user, pages);
+	if (ret)
+		return ret;
+	map->pages += pages;
+	return ret;
+}
+
+void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
+{
+	bpf_uncharge_memlock(map->user, pages);
+	map->pages -= pages;
+}
+
 static int bpf_map_alloc_id(struct bpf_map *map)
 {
 	int id;
@@ -256,7 +284,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
 {
 	struct bpf_map *map = container_of(work, struct bpf_map, work);
 
-	bpf_map_uncharge_memlock(map);
+	bpf_map_release_memlock(map);
 	security_bpf_map_free(map);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
@@ -492,7 +520,7 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		goto free_map_nouncharge;
 
-	err = bpf_map_charge_memlock(map);
+	err = bpf_map_init_memlock(map);
 	if (err)
 		goto free_map_sec;
 
@@ -515,7 +543,7 @@ static int map_create(union bpf_attr *attr)
 	return err;
 
 free_map:
-	bpf_map_uncharge_memlock(map);
+	bpf_map_release_memlock(map);
 free_map_sec:
 	security_bpf_map_free(map);
 free_map_nouncharge:
-- 
cgit v1.2.3


From de9cbbaadba5adf88a19e46df61f7054000838f6 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:18 -0700
Subject: bpf: introduce cgroup storage maps

This commit introduces BPF_MAP_TYPE_CGROUP_STORAGE maps:
a special type of maps which are implementing the cgroup storage.

>From the userspace point of view it's almost a generic
hash map with the (cgroup inode id, attachment type) pair
used as a key.

The only difference is that some operations are restricted:
  1) a user can't create new entries,
  2) a user can't remove existing entries.

The lookup from userspace is o(log(n)).

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h |  38 +++++
 include/linux/bpf.h        |   1 +
 include/linux/bpf_types.h  |   3 +
 include/uapi/linux/bpf.h   |   6 +
 kernel/bpf/Makefile        |   1 +
 kernel/bpf/local_storage.c | 376 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c       |   3 +
 kernel/bpf/verifier.c      |  12 ++
 8 files changed, 440 insertions(+)
 create mode 100644 kernel/bpf/local_storage.c

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d50c2f0a655a..7d00d58869ed 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -4,19 +4,39 @@
 
 #include <linux/errno.h>
 #include <linux/jump_label.h>
+#include <linux/rbtree.h>
 #include <uapi/linux/bpf.h>
 
 struct sock;
 struct sockaddr;
 struct cgroup;
 struct sk_buff;
+struct bpf_map;
+struct bpf_prog;
 struct bpf_sock_ops_kern;
+struct bpf_cgroup_storage;
 
 #ifdef CONFIG_CGROUP_BPF
 
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
+struct bpf_cgroup_storage_map;
+
+struct bpf_storage_buffer {
+	struct rcu_head rcu;
+	char data[0];
+};
+
+struct bpf_cgroup_storage {
+	struct bpf_storage_buffer *buf;
+	struct bpf_cgroup_storage_map *map;
+	struct bpf_cgroup_storage_key key;
+	struct list_head list;
+	struct rb_node node;
+	struct rcu_head rcu;
+};
+
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
@@ -77,6 +97,15 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);
 
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
+void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
+void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+			     struct cgroup *cgroup,
+			     enum bpf_attach_type type);
+void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
+int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
+void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -221,6 +250,15 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
+static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
+					    struct bpf_map *map) { return 0; }
+static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
+					      struct bpf_map *map) {}
+static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+	struct bpf_prog *prog) { return 0; }
+static inline void bpf_cgroup_storage_free(
+	struct bpf_cgroup_storage *storage) {}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5a4a256473c3..9d1e4727495e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -282,6 +282,7 @@ struct bpf_prog_aux {
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
+	struct bpf_map *cgroup_storage;
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
 	void *security;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c5700c2d5549..add08be53b6f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
 #ifdef CONFIG_CGROUPS
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
 #endif
+#ifdef CONFIG_CGROUP_BPF
+BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
+#endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0ebaaf7f3568..b10118ee5afe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
 	__u8	data[0];	/* Arbitrary size */
 };
 
+struct bpf_cgroup_storage_key {
+	__u64	cgroup_inode_id;	/* cgroup inode id */
+	__u32	attach_type;		/* program attach type */
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -120,6 +125,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CPUMAP,
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
+	BPF_MAP_TYPE_CGROUP_STORAGE,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f27f5496d6fe..e8906cbad81f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,6 +3,7 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
new file mode 100644
index 000000000000..f23d3fdeba23
--- /dev/null
+++ b/kernel/bpf/local_storage.c
@@ -0,0 +1,376 @@
+//SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf-cgroup.h>
+#include <linux/bpf.h>
+#include <linux/bug.h>
+#include <linux/filter.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_CGROUP_BPF
+
+#define LOCAL_STORAGE_CREATE_FLAG_MASK					\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+struct bpf_cgroup_storage_map {
+	struct bpf_map map;
+
+	spinlock_t lock;
+	struct bpf_prog *prog;
+	struct rb_root root;
+	struct list_head list;
+};
+
+static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
+{
+	return container_of(map, struct bpf_cgroup_storage_map, map);
+}
+
+static int bpf_cgroup_storage_key_cmp(
+	const struct bpf_cgroup_storage_key *key1,
+	const struct bpf_cgroup_storage_key *key2)
+{
+	if (key1->cgroup_inode_id < key2->cgroup_inode_id)
+		return -1;
+	else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
+		return 1;
+	else if (key1->attach_type < key2->attach_type)
+		return -1;
+	else if (key1->attach_type > key2->attach_type)
+		return 1;
+	return 0;
+}
+
+static struct bpf_cgroup_storage *cgroup_storage_lookup(
+	struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
+	bool locked)
+{
+	struct rb_root *root = &map->root;
+	struct rb_node *node;
+
+	if (!locked)
+		spin_lock_bh(&map->lock);
+
+	node = root->rb_node;
+	while (node) {
+		struct bpf_cgroup_storage *storage;
+
+		storage = container_of(node, struct bpf_cgroup_storage, node);
+
+		switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
+		case -1:
+			node = node->rb_left;
+			break;
+		case 1:
+			node = node->rb_right;
+			break;
+		default:
+			if (!locked)
+				spin_unlock_bh(&map->lock);
+			return storage;
+		}
+	}
+
+	if (!locked)
+		spin_unlock_bh(&map->lock);
+
+	return NULL;
+}
+
+static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
+				 struct bpf_cgroup_storage *storage)
+{
+	struct rb_root *root = &map->root;
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	while (*new) {
+		struct bpf_cgroup_storage *this;
+
+		this = container_of(*new, struct bpf_cgroup_storage, node);
+
+		parent = *new;
+		switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
+		case -1:
+			new = &((*new)->rb_left);
+			break;
+		case 1:
+			new = &((*new)->rb_right);
+			break;
+		default:
+			return -EEXIST;
+		}
+	}
+
+	rb_link_node(&storage->node, parent, new);
+	rb_insert_color(&storage->node, root);
+
+	return 0;
+}
+
+static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage *storage;
+
+	storage = cgroup_storage_lookup(map, key, false);
+	if (!storage)
+		return NULL;
+
+	return &READ_ONCE(storage->buf)->data[0];
+}
+
+static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
+				      void *value, u64 flags)
+{
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage *storage;
+	struct bpf_storage_buffer *new;
+
+	if (flags & BPF_NOEXIST)
+		return -EINVAL;
+
+	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
+					key, false);
+	if (!storage)
+		return -ENOENT;
+
+	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
+			   map->value_size, __GFP_ZERO | GFP_USER,
+			   map->numa_node);
+	if (!new)
+		return -ENOMEM;
+
+	memcpy(&new->data[0], value, map->value_size);
+
+	new = xchg(&storage->buf, new);
+	kfree_rcu(new, rcu);
+
+	return 0;
+}
+
+static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
+				       void *_next_key)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage_key *next = _next_key;
+	struct bpf_cgroup_storage *storage;
+
+	spin_lock_bh(&map->lock);
+
+	if (list_empty(&map->list))
+		goto enoent;
+
+	if (key) {
+		storage = cgroup_storage_lookup(map, key, true);
+		if (!storage)
+			goto enoent;
+
+		storage = list_next_entry(storage, list);
+		if (!storage)
+			goto enoent;
+	} else {
+		storage = list_first_entry(&map->list,
+					 struct bpf_cgroup_storage, list);
+	}
+
+	spin_unlock_bh(&map->lock);
+	next->attach_type = storage->key.attach_type;
+	next->cgroup_inode_id = storage->key.cgroup_inode_id;
+	return 0;
+
+enoent:
+	spin_unlock_bh(&map->lock);
+	return -ENOENT;
+}
+
+static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
+{
+	int numa_node = bpf_map_attr_numa_node(attr);
+	struct bpf_cgroup_storage_map *map;
+
+	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
+		return ERR_PTR(-EINVAL);
+
+	if (attr->value_size > PAGE_SIZE)
+		return ERR_PTR(-E2BIG);
+
+	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
+		/* reserved bits should not be used */
+		return ERR_PTR(-EINVAL);
+
+	if (attr->max_entries)
+		/* max_entries is not used and enforced to be 0 */
+		return ERR_PTR(-EINVAL);
+
+	map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
+			   __GFP_ZERO | GFP_USER, numa_node);
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+
+	map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
+				  PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* copy mandatory map attributes */
+	bpf_map_init_from_attr(&map->map, attr);
+
+	spin_lock_init(&map->lock);
+	map->root = RB_ROOT;
+	INIT_LIST_HEAD(&map->list);
+
+	return &map->map;
+}
+
+static void cgroup_storage_map_free(struct bpf_map *_map)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+
+	WARN_ON(!RB_EMPTY_ROOT(&map->root));
+	WARN_ON(!list_empty(&map->list));
+
+	kfree(map);
+}
+
+static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	return -EINVAL;
+}
+
+const struct bpf_map_ops cgroup_storage_map_ops = {
+	.map_alloc = cgroup_storage_map_alloc,
+	.map_free = cgroup_storage_map_free,
+	.map_get_next_key = cgroup_storage_get_next_key,
+	.map_lookup_elem = cgroup_storage_lookup_elem,
+	.map_update_elem = cgroup_storage_update_elem,
+	.map_delete_elem = cgroup_storage_delete_elem,
+};
+
+int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	int ret = -EBUSY;
+
+	spin_lock_bh(&map->lock);
+
+	if (map->prog && map->prog != prog)
+		goto unlock;
+	if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
+		goto unlock;
+
+	map->prog = prog;
+	prog->aux->cgroup_storage = _map;
+	ret = 0;
+unlock:
+	spin_unlock_bh(&map->lock);
+
+	return ret;
+}
+
+void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+
+	spin_lock_bh(&map->lock);
+	if (map->prog == prog) {
+		WARN_ON(prog->aux->cgroup_storage != _map);
+		map->prog = NULL;
+		prog->aux->cgroup_storage = NULL;
+	}
+	spin_unlock_bh(&map->lock);
+}
+
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
+{
+	struct bpf_cgroup_storage *storage;
+	struct bpf_map *map;
+	u32 pages;
+
+	map = prog->aux->cgroup_storage;
+	if (!map)
+		return NULL;
+
+	pages = round_up(sizeof(struct bpf_cgroup_storage) +
+			 sizeof(struct bpf_storage_buffer) +
+			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+	if (bpf_map_charge_memlock(map, pages))
+		return ERR_PTR(-EPERM);
+
+	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
+			       __GFP_ZERO | GFP_USER, map->numa_node);
+	if (!storage) {
+		bpf_map_uncharge_memlock(map, pages);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
+				    map->value_size, __GFP_ZERO | GFP_USER,
+				    map->numa_node);
+	if (!storage->buf) {
+		bpf_map_uncharge_memlock(map, pages);
+		kfree(storage);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	storage->map = (struct bpf_cgroup_storage_map *)map;
+
+	return storage;
+}
+
+void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
+{
+	u32 pages;
+	struct bpf_map *map;
+
+	if (!storage)
+		return;
+
+	map = &storage->map->map;
+	pages = round_up(sizeof(struct bpf_cgroup_storage) +
+			 sizeof(struct bpf_storage_buffer) +
+			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+	bpf_map_uncharge_memlock(map, pages);
+
+	kfree_rcu(storage->buf, rcu);
+	kfree_rcu(storage, rcu);
+}
+
+void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+			     struct cgroup *cgroup,
+			     enum bpf_attach_type type)
+{
+	struct bpf_cgroup_storage_map *map;
+
+	if (!storage)
+		return;
+
+	storage->key.attach_type = type;
+	storage->key.cgroup_inode_id = cgroup->kn->id.id;
+
+	map = storage->map;
+
+	spin_lock_bh(&map->lock);
+	WARN_ON(cgroup_storage_insert(map, storage));
+	list_add(&storage->list, &map->list);
+	spin_unlock_bh(&map->lock);
+}
+
+void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
+{
+	struct bpf_cgroup_storage_map *map;
+	struct rb_root *root;
+
+	if (!storage)
+		return;
+
+	map = storage->map;
+
+	spin_lock_bh(&map->lock);
+	root = &map->root;
+	rb_erase(&storage->node, root);
+
+	list_del(&storage->list);
+	spin_unlock_bh(&map->lock);
+}
+
+#endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7958252a4d29..5af4e9e2722d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -957,6 +957,9 @@ static void free_used_maps(struct bpf_prog_aux *aux)
 {
 	int i;
 
+	if (aux->cgroup_storage)
+		bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
+
 	for (i = 0; i < aux->used_map_cnt; i++)
 		bpf_map_put(aux->used_maps[i]);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e948303a0ea8..7e75434a9e54 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5154,6 +5154,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 			}
 			env->used_maps[env->used_map_cnt++] = map;
 
+			if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE &&
+			    bpf_cgroup_storage_assign(env->prog, map)) {
+				verbose(env,
+					"only one cgroup storage is allowed\n");
+				fdput(f);
+				return -EBUSY;
+			}
+
 			fdput(f);
 next_insn:
 			insn++;
@@ -5180,6 +5188,10 @@ static void release_maps(struct bpf_verifier_env *env)
 {
 	int i;
 
+	if (env->prog->aux->cgroup_storage)
+		bpf_cgroup_storage_release(env->prog,
+					   env->prog->aux->cgroup_storage);
+
 	for (i = 0; i < env->used_map_cnt; i++)
 		bpf_map_put(env->used_maps[i]);
 }
-- 
cgit v1.2.3


From aa0ad5b0391e268bdecf6cda31268388844f8afd Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:19 -0700
Subject: bpf: pass a pointer to a cgroup storage using pcpu variable

This commit introduces the bpf_cgroup_storage_set() helper,
which will be used to pass a pointer to a cgroup storage
to the bpf helper.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 15 +++++++++++++++
 kernel/bpf/local_storage.c |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 7d00d58869ed..9a144ddbbc8f 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -4,6 +4,7 @@
 
 #include <linux/errno.h>
 #include <linux/jump_label.h>
+#include <linux/percpu.h>
 #include <linux/rbtree.h>
 #include <uapi/linux/bpf.h>
 
@@ -21,6 +22,8 @@ struct bpf_cgroup_storage;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
+DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+
 struct bpf_cgroup_storage_map;
 
 struct bpf_storage_buffer {
@@ -97,6 +100,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);
 
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
+{
+	struct bpf_storage_buffer *buf;
+
+	if (!storage)
+		return;
+
+	buf = READ_ONCE(storage->buf);
+	this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
+}
+
 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
@@ -250,6 +264,7 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
 					    struct bpf_map *map) { return 0; }
 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index f23d3fdeba23..fc4e37f68f2a 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -7,6 +7,8 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 
+DEFINE_PER_CPU(void*, bpf_cgroup_storage);
+
 #ifdef CONFIG_CGROUP_BPF
 
 #define LOCAL_STORAGE_CREATE_FLAG_MASK					\
-- 
cgit v1.2.3


From d7bf2c10af053191e931b58704cd862fccb7f9de Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:20 -0700
Subject: bpf: allocate cgroup storage entries on attaching bpf programs

If a bpf program is using cgroup local storage, allocate
a bpf_cgroup_storage structure automatically on attaching the program
to a cgroup and save the pointer into the corresponding bpf_prog_list
entry.
Analogically, release the cgroup local storage on detaching
of the bpf program.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h |  1 +
 kernel/bpf/cgroup.c        | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 9a144ddbbc8f..f91b0f8ff3a9 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -43,6 +43,7 @@ struct bpf_cgroup_storage {
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
+	struct bpf_cgroup_storage *storage;
 };
 
 struct bpf_prog_array;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index badabb0b435c..935274c86bfe 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -34,6 +34,8 @@ void cgroup_bpf_put(struct cgroup *cgrp)
 		list_for_each_entry_safe(pl, tmp, progs, node) {
 			list_del(&pl->node);
 			bpf_prog_put(pl->prog);
+			bpf_cgroup_storage_unlink(pl->storage);
+			bpf_cgroup_storage_free(pl->storage);
 			kfree(pl);
 			static_branch_dec(&cgroup_bpf_enabled_key);
 		}
@@ -188,6 +190,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 {
 	struct list_head *progs = &cgrp->bpf.progs[type];
 	struct bpf_prog *old_prog = NULL;
+	struct bpf_cgroup_storage *storage, *old_storage = NULL;
 	struct cgroup_subsys_state *css;
 	struct bpf_prog_list *pl;
 	bool pl_was_allocated;
@@ -210,31 +213,47 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
 		return -E2BIG;
 
+	storage = bpf_cgroup_storage_alloc(prog);
+	if (IS_ERR(storage))
+		return -ENOMEM;
+
 	if (flags & BPF_F_ALLOW_MULTI) {
-		list_for_each_entry(pl, progs, node)
-			if (pl->prog == prog)
+		list_for_each_entry(pl, progs, node) {
+			if (pl->prog == prog) {
 				/* disallow attaching the same prog twice */
+				bpf_cgroup_storage_free(storage);
 				return -EINVAL;
+			}
+		}
 
 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-		if (!pl)
+		if (!pl) {
+			bpf_cgroup_storage_free(storage);
 			return -ENOMEM;
+		}
+
 		pl_was_allocated = true;
 		pl->prog = prog;
+		pl->storage = storage;
 		list_add_tail(&pl->node, progs);
 	} else {
 		if (list_empty(progs)) {
 			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-			if (!pl)
+			if (!pl) {
+				bpf_cgroup_storage_free(storage);
 				return -ENOMEM;
+			}
 			pl_was_allocated = true;
 			list_add_tail(&pl->node, progs);
 		} else {
 			pl = list_first_entry(progs, typeof(*pl), node);
 			old_prog = pl->prog;
+			old_storage = pl->storage;
+			bpf_cgroup_storage_unlink(old_storage);
 			pl_was_allocated = false;
 		}
 		pl->prog = prog;
+		pl->storage = storage;
 	}
 
 	cgrp->bpf.flags[type] = flags;
@@ -257,10 +276,13 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	}
 
 	static_branch_inc(&cgroup_bpf_enabled_key);
+	if (old_storage)
+		bpf_cgroup_storage_free(old_storage);
 	if (old_prog) {
 		bpf_prog_put(old_prog);
 		static_branch_dec(&cgroup_bpf_enabled_key);
 	}
+	bpf_cgroup_storage_link(storage, cgrp, type);
 	return 0;
 
 cleanup:
@@ -276,6 +298,9 @@ cleanup:
 
 	/* and cleanup the prog list */
 	pl->prog = old_prog;
+	bpf_cgroup_storage_free(pl->storage);
+	pl->storage = old_storage;
+	bpf_cgroup_storage_link(old_storage, cgrp, type);
 	if (pl_was_allocated) {
 		list_del(&pl->node);
 		kfree(pl);
@@ -356,6 +381,8 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 
 	/* now can actually delete it from this cgroup list */
 	list_del(&pl->node);
+	bpf_cgroup_storage_unlink(pl->storage);
+	bpf_cgroup_storage_free(pl->storage);
 	kfree(pl);
 	if (list_empty(progs))
 		/* last program was detached, reset flags to zero */
-- 
cgit v1.2.3


From 394e40a29788820c9c0526b1c3497c9e0ec2a126 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:21 -0700
Subject: bpf: extend bpf_prog_array to store pointers to the cgroup storage

This patch converts bpf_prog_array from an array of prog pointers
to the array of struct bpf_prog_array_item elements.

This allows to save a cgroup storage pointer for each bpf program
efficiently attached to a cgroup.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/media/rc/bpf-lirc.c | 10 +++---
 include/linux/bpf.h         | 19 ++++++++----
 kernel/bpf/cgroup.c         | 21 +++++++------
 kernel/bpf/core.c           | 76 +++++++++++++++++++++++----------------------
 4 files changed, 70 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index fcfab6635f9c..8c26df9b96c1 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -195,14 +195,16 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
  */
 void lirc_bpf_free(struct rc_dev *rcdev)
 {
-	struct bpf_prog **progs;
+	struct bpf_prog_array_item *item;
 
 	if (!rcdev->raw->progs)
 		return;
 
-	progs = rcu_dereference(rcdev->raw->progs)->progs;
-	while (*progs)
-		bpf_prog_put(*progs++);
+	item = rcu_dereference(rcdev->raw->progs)->items;
+	while (item->prog) {
+		bpf_prog_put(item->prog);
+		item++;
+	}
 
 	bpf_prog_array_free(rcdev->raw->progs);
 }
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9d1e4727495e..16be67888c30 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -349,9 +349,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
  * The 'struct bpf_prog_array *' should only be replaced with xchg()
  * since other cpus are walking the array of pointers in parallel.
  */
+struct bpf_prog_array_item {
+	struct bpf_prog *prog;
+	struct bpf_cgroup_storage *cgroup_storage;
+};
+
 struct bpf_prog_array {
 	struct rcu_head rcu;
-	struct bpf_prog *progs[0];
+	struct bpf_prog_array_item items[0];
 };
 
 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@@ -372,7 +377,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 
 #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
 	({						\
-		struct bpf_prog **_prog, *__prog;	\
+		struct bpf_prog_array_item *_item;	\
+		struct bpf_prog *_prog;			\
 		struct bpf_prog_array *_array;		\
 		u32 _ret = 1;				\
 		preempt_disable();			\
@@ -380,10 +386,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 		_array = rcu_dereference(array);	\
 		if (unlikely(check_non_null && !_array))\
 			goto _out;			\
-		_prog = _array->progs;			\
-		while ((__prog = READ_ONCE(*_prog))) {	\
-			_ret &= func(__prog, ctx);	\
-			_prog++;			\
+		_item = &_array->items[0];		\
+		while ((_prog = READ_ONCE(_item->prog))) {		\
+			bpf_cgroup_storage_set(_item->cgroup_storage);	\
+			_ret &= func(_prog, ctx);	\
+			_item++;			\
 		}					\
 _out:							\
 		rcu_read_unlock();			\
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 935274c86bfe..ddfa6cc13e57 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -117,15 +117,18 @@ static int compute_effective_progs(struct cgroup *cgrp,
 	cnt = 0;
 	p = cgrp;
 	do {
-		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
-			list_for_each_entry(pl,
-					    &p->bpf.progs[type], node) {
-				if (!pl->prog)
-					continue;
-				progs->progs[cnt++] = pl->prog;
-			}
-		p = cgroup_parent(p);
-	} while (p);
+		if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+			continue;
+
+		list_for_each_entry(pl, &p->bpf.progs[type], node) {
+			if (!pl->prog)
+				continue;
+
+			progs->items[cnt].prog = pl->prog;
+			progs->items[cnt].cgroup_storage = pl->storage;
+			cnt++;
+		}
+	} while ((p = cgroup_parent(p)));
 
 	rcu_assign_pointer(*array, progs);
 	return 0;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 253aa8e79c7b..9abcf25ebf9f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1542,7 +1542,8 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 {
 	if (prog_cnt)
 		return kzalloc(sizeof(struct bpf_prog_array) +
-			       sizeof(struct bpf_prog *) * (prog_cnt + 1),
+			       sizeof(struct bpf_prog_array_item) *
+			       (prog_cnt + 1),
 			       flags);
 
 	return &empty_prog_array.hdr;
@@ -1556,43 +1557,45 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
 	kfree_rcu(progs, rcu);
 }
 
-int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
+int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
 {
-	struct bpf_prog **prog;
+	struct bpf_prog_array_item *item;
 	u32 cnt = 0;
 
 	rcu_read_lock();
-	prog = rcu_dereference(progs)->progs;
-	for (; *prog; prog++)
-		if (*prog != &dummy_bpf_prog.prog)
+	item = rcu_dereference(array)->items;
+	for (; item->prog; item++)
+		if (item->prog != &dummy_bpf_prog.prog)
 			cnt++;
 	rcu_read_unlock();
 	return cnt;
 }
 
-static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
+
+static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
 				     u32 *prog_ids,
 				     u32 request_cnt)
 {
+	struct bpf_prog_array_item *item;
 	int i = 0;
 
-	for (; *prog; prog++) {
-		if (*prog == &dummy_bpf_prog.prog)
+	item = rcu_dereference(array)->items;
+	for (; item->prog; item++) {
+		if (item->prog == &dummy_bpf_prog.prog)
 			continue;
-		prog_ids[i] = (*prog)->aux->id;
+		prog_ids[i] = item->prog->aux->id;
 		if (++i == request_cnt) {
-			prog++;
+			item++;
 			break;
 		}
 	}
 
-	return !!(*prog);
+	return !!(item->prog);
 }
 
-int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
 				__u32 __user *prog_ids, u32 cnt)
 {
-	struct bpf_prog **prog;
 	unsigned long err = 0;
 	bool nospc;
 	u32 *ids;
@@ -1611,8 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 	if (!ids)
 		return -ENOMEM;
 	rcu_read_lock();
-	prog = rcu_dereference(progs)->progs;
-	nospc = bpf_prog_array_copy_core(prog, ids, cnt);
+	nospc = bpf_prog_array_copy_core(array, ids, cnt);
 	rcu_read_unlock();
 	err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
 	kfree(ids);
@@ -1623,14 +1625,14 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 	return 0;
 }
 
-void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
+void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
 				struct bpf_prog *old_prog)
 {
-	struct bpf_prog **prog = progs->progs;
+	struct bpf_prog_array_item *item = array->items;
 
-	for (; *prog; prog++)
-		if (*prog == old_prog) {
-			WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
+	for (; item->prog; item++)
+		if (item->prog == old_prog) {
+			WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
 			break;
 		}
 }
@@ -1641,7 +1643,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 			struct bpf_prog_array **new_array)
 {
 	int new_prog_cnt, carry_prog_cnt = 0;
-	struct bpf_prog **existing_prog;
+	struct bpf_prog_array_item *existing;
 	struct bpf_prog_array *array;
 	bool found_exclude = false;
 	int new_prog_idx = 0;
@@ -1650,15 +1652,15 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 	 * the new array.
 	 */
 	if (old_array) {
-		existing_prog = old_array->progs;
-		for (; *existing_prog; existing_prog++) {
-			if (*existing_prog == exclude_prog) {
+		existing = old_array->items;
+		for (; existing->prog; existing++) {
+			if (existing->prog == exclude_prog) {
 				found_exclude = true;
 				continue;
 			}
-			if (*existing_prog != &dummy_bpf_prog.prog)
+			if (existing->prog != &dummy_bpf_prog.prog)
 				carry_prog_cnt++;
-			if (*existing_prog == include_prog)
+			if (existing->prog == include_prog)
 				return -EEXIST;
 		}
 	}
@@ -1684,15 +1686,17 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 
 	/* Fill in the new prog array */
 	if (carry_prog_cnt) {
-		existing_prog = old_array->progs;
-		for (; *existing_prog; existing_prog++)
-			if (*existing_prog != exclude_prog &&
-			    *existing_prog != &dummy_bpf_prog.prog)
-				array->progs[new_prog_idx++] = *existing_prog;
+		existing = old_array->items;
+		for (; existing->prog; existing++)
+			if (existing->prog != exclude_prog &&
+			    existing->prog != &dummy_bpf_prog.prog) {
+				array->items[new_prog_idx++].prog =
+					existing->prog;
+			}
 	}
 	if (include_prog)
-		array->progs[new_prog_idx++] = include_prog;
-	array->progs[new_prog_idx] = NULL;
+		array->items[new_prog_idx++].prog = include_prog;
+	array->items[new_prog_idx].prog = NULL;
 	*new_array = array;
 	return 0;
 }
@@ -1701,7 +1705,6 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
 			     u32 *prog_ids, u32 request_cnt,
 			     u32 *prog_cnt)
 {
-	struct bpf_prog **prog;
 	u32 cnt = 0;
 
 	if (array)
@@ -1714,8 +1717,7 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
 		return 0;
 
 	/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
-	prog = rcu_dereference_check(array, 1)->progs;
-	return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
+	return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
 								     : 0;
 }
 
-- 
cgit v1.2.3


From 3e6a4b3e0289dc9540a2c1d8a20657f4707fbabb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:22 -0700
Subject: bpf/verifier: introduce BPF_PTR_TO_MAP_VALUE

BPF_MAP_TYPE_CGROUP_STORAGE maps are special in a way
that the access from the bpf program side is lookup-free.
That means the result is guaranteed to be a valid
pointer to the cgroup storage; no NULL-check is required.

This patch introduces BPF_PTR_TO_MAP_VALUE return type,
which is required to cause the verifier accept programs,
which are not checking the map value pointer for being NULL.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h   | 1 +
 kernel/bpf/verifier.c | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 16be67888c30..ca4ac2a39def 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_arg_type {
 enum bpf_return_type {
 	RET_INTEGER,			/* function returns integer */
 	RET_VOID,			/* function doesn't return anything */
+	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7e75434a9e54..1ede16c8bb40 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2545,8 +2545,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		mark_reg_unknown(env, regs, BPF_REG_0);
 	} else if (fn->ret_type == RET_VOID) {
 		regs[BPF_REG_0].type = NOT_INIT;
-	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
-		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
+		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+		if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
+			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+		else
+			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
 		/* There is no offset yet applied, variable or fixed */
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].off = 0;
-- 
cgit v1.2.3


From cd3394317653837e2eb5c5d0904a8996102af9fc Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:24 -0700
Subject: bpf: introduce the bpf_get_local_storage() helper function

The bpf_get_local_storage() helper function is used
to get a pointer to the bpf local storage from a bpf program.

It takes a pointer to a storage map and flags as arguments.
Right now it accepts only cgroup storage maps, and flags
argument has to be 0. Further it can be extended to support
other types of local storage: e.g. thread local storage etc.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h      |  2 ++
 include/uapi/linux/bpf.h | 21 ++++++++++++++++++++-
 kernel/bpf/cgroup.c      |  2 ++
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 20 ++++++++++++++++++++
 kernel/bpf/verifier.c    | 18 ++++++++++++++++++
 net/core/filter.c        | 23 ++++++++++++++++++++++-
 7 files changed, 85 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ca4ac2a39def..cd8790d2c6ed 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -788,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
 extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
 
+extern const struct bpf_func_proto bpf_get_local_storage_proto;
+
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b10118ee5afe..dd5758dc35d3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2095,6 +2095,24 @@ union bpf_attr {
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
  * 		on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ *	Description
+ *		Get the pointer to the local storage area.
+ *		The type and the size of the local storage is defined
+ *		by the *map* argument.
+ *		The *flags* meaning is specific for each map type,
+ *		and has to be 0 for cgroup local storage.
+ *
+ *		Depending on the bpf program type, a local storage area
+ *		can be shared between multiple instances of the bpf program,
+ *		running simultaneously.
+ *
+ *		A user should care about the synchronization by himself.
+ *		For example, by using the BPF_STX_XADD instruction to alter
+ *		the shared data.
+ *	Return
+ *		Pointer to the local storage area.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2177,7 +2195,8 @@ union bpf_attr {
 	FN(rc_repeat),			\
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
-	FN(get_current_cgroup_id),
+	FN(get_current_cgroup_id),	\
+	FN(get_local_storage),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ddfa6cc13e57..0a4fe5a7dc91 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -684,6 +684,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_map_delete_elem_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	case BPF_FUNC_trace_printk:
 		if (capable(CAP_SYS_ADMIN))
 			return bpf_get_trace_printk_proto();
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9abcf25ebf9f..4d09e610777f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1795,6 +1795,7 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_sock_map_update_proto __weak;
 const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
+const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 73065e2d23c2..1991466b8327 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -193,4 +193,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
+
+DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+
+BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
+{
+	/* map and flags arguments are not used now,
+	 * but provide an ability to extend the API
+	 * for other types of local storages.
+	 * verifier checks that their values are correct.
+	 */
+	return (unsigned long) this_cpu_read(bpf_cgroup_storage);
+}
+
+const struct bpf_func_proto bpf_get_local_storage_proto = {
+	.func		= bpf_get_local_storage,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MAP_VALUE,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_ANYTHING,
+};
 #endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1ede16c8bb40..587468a9c37d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2127,6 +2127,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_CGROUP_STORAGE:
+		if (func_id != BPF_FUNC_get_local_storage)
+			goto error;
+		break;
 	/* devmap returns a pointer to a live net_device ifindex that we cannot
 	 * allow to be modified from bpf side. So do not allow lookup elements
 	 * for now.
@@ -2209,6 +2213,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
 			goto error;
 		break;
+	case BPF_FUNC_get_local_storage:
+		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -2533,6 +2541,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	}
 
 	regs = cur_regs(env);
+
+	/* check that flags argument in get_local_storage(map, flags) is 0,
+	 * this is required because get_local_storage() can't return an error.
+	 */
+	if (func_id == BPF_FUNC_get_local_storage &&
+	    !register_is_null(&regs[BPF_REG_2])) {
+		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
+		return -EINVAL;
+	}
+
 	/* reset caller saved regs */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(env, regs, caller_saved[i]);
diff --git a/net/core/filter.c b/net/core/filter.c
index 9bb9a4488e25..9f73aae2f089 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4820,6 +4820,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	 */
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4844,6 +4846,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		}
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_sock_addr_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4866,6 +4870,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
+	default:
+		return sk_filter_func_proto(func_id, prog);
+	}
+}
+
 static const struct bpf_func_proto *
 tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4988,6 +5003,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sock_hash_update_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_sock_ops_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -5007,6 +5024,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_msg_cork_bytes_proto;
 	case BPF_FUNC_msg_pull_data:
 		return &bpf_msg_pull_data_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -5034,6 +5053,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_redirect_map_proto;
 	case BPF_FUNC_sk_redirect_hash:
 		return &bpf_sk_redirect_hash_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6838,7 +6859,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 };
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
-	.get_func_proto		= sk_filter_func_proto,
+	.get_func_proto		= cg_skb_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };
-- 
cgit v1.2.3


From 9f49a5b5c21d58aa84e16cfdc5e99e49faefcb7a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Sun, 29 Jul 2018 11:34:56 +0300
Subject: RDMA/netdev: Use priv_destructor for netdev cleanup

Now that the unregister_netdev flow for IPoIB no longer relies on external
code we can now introduce the use of priv_destructor and
needs_free_netdev.

The rdma_netdev flow is switched to use the netdev common priv_destructor
instead of the special free_rdma_netdev and the IPOIB ULP adjusted:
 - priv_destructor needs to switch to point to the ULP's destructor
   which will then call the rdma_ndev's in the right order
 - We need to be careful around the error unwind of register_netdev
   as it sometimes calls priv_destructor on failure
 - ULPs need to use ndo_init/uninit to ensure proper ordering
   of failures around register_netdev

Switching to priv_destructor is a necessary pre-requisite to using
the rtnl new_link mechanism.

The VNIC user for rdma_netdev should also be revised, but that is left for
another patch.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                  |  10 --
 drivers/infiniband/ulp/ipoib/ipoib.h               |   2 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c          | 101 +++++++++++++--------
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c          |  68 ++++++++------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |  37 ++++----
 include/linux/mlx5/driver.h                        |   3 -
 include/rdma/ib_verbs.h                            |   6 +-
 7 files changed, 129 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 06d6309b719a..13744b4631b4 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5157,11 +5157,6 @@ done:
 	return num_counters;
 }
 
-static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
-{
-	return mlx5_rdma_netdev_free(netdev);
-}
-
 static struct net_device*
 mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
 			  u8 port_num,
@@ -5171,17 +5166,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
 			  void (*setup)(struct net_device *))
 {
 	struct net_device *netdev;
-	struct rdma_netdev *rn;
 
 	if (type != RDMA_NETDEV_IPOIB)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
 					name, setup);
-	if (likely(!IS_ERR_OR_NULL(netdev))) {
-		rn = netdev_priv(netdev);
-		rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
-	}
 	return netdev;
 }
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 02ad1a60dc80..d2cb0a8500e3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -323,6 +323,7 @@ struct ipoib_dev_priv {
 	spinlock_t lock;
 
 	struct net_device *dev;
+	void (*next_priv_destructor)(struct net_device *dev);
 
 	struct napi_struct send_napi;
 	struct napi_struct recv_napi;
@@ -481,6 +482,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
 	kref_put(&ah->ref, ipoib_free_ah);
 }
 int ipoib_open(struct net_device *dev);
+void ipoib_intf_free(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 int ipoib_add_umcast_attr(struct net_device *dev);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 67ab52eec3e9..73d917d57f93 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2062,6 +2062,13 @@ void ipoib_setup_common(struct net_device *dev)
 	netif_keep_dst(dev);
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+
+	/*
+	 * unregister_netdev always frees the netdev, we use this mode
+	 * consistently to unify all the various unregister paths, including
+	 * those connected to rtnl_link_ops which require it.
+	 */
+	dev->needs_free_netdev = true;
 }
 
 static void ipoib_build_priv(struct net_device *dev)
@@ -2116,9 +2123,7 @@ static struct net_device
 	rn->send = ipoib_send;
 	rn->attach_mcast = ipoib_mcast_attach;
 	rn->detach_mcast = ipoib_mcast_detach;
-	rn->free_rdma_netdev = free_netdev;
 	rn->hca = hca;
-
 	dev->netdev_ops = &ipoib_netdev_default_pf;
 
 	return dev;
@@ -2173,6 +2178,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
 
 	rn = netdev_priv(dev);
 	rn->clnt_priv = priv;
+
+	/*
+	 * Only the child register_netdev flows can handle priv_destructor
+	 * being set, so we force it to NULL here and handle manually until it
+	 * is safe to turn on.
+	 */
+	priv->next_priv_destructor = dev->priv_destructor;
+	dev->priv_destructor = NULL;
+
 	ipoib_build_priv(dev);
 
 	return priv;
@@ -2181,6 +2195,27 @@ free_priv:
 	return NULL;
 }
 
+void ipoib_intf_free(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	dev->priv_destructor = priv->next_priv_destructor;
+	if (dev->priv_destructor)
+		dev->priv_destructor(dev);
+
+	/*
+	 * There are some error flows around register_netdev failing that may
+	 * attempt to call priv_destructor twice, prevent that from happening.
+	 */
+	dev->priv_destructor = NULL;
+
+	/* unregister/destroy is very complicated. Make bugs more obvious. */
+	rn->clnt_priv = NULL;
+
+	kfree(priv);
+}
+
 static ssize_t show_pkey(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
@@ -2341,7 +2376,7 @@ static struct net_device *ipoib_add_port(const char *format,
 					 struct ib_device *hca, u8 port)
 {
 	struct ipoib_dev_priv *priv;
-	struct rdma_netdev *rn;
+	struct net_device *ndev;
 	int result;
 
 	priv = ipoib_intf_alloc(hca, port, format);
@@ -2349,6 +2384,7 @@ static struct net_device *ipoib_add_port(const char *format,
 		pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
 		return ERR_PTR(-ENOMEM);
 	}
+	ndev = priv->dev;
 
 	INIT_IB_EVENT_HANDLER(&priv->event_handler,
 			      priv->ca, ipoib_event);
@@ -2357,38 +2393,43 @@ static struct net_device *ipoib_add_port(const char *format,
 	/* call event handler to ensure pkey in sync */
 	queue_work(ipoib_workqueue, &priv->flush_heavy);
 
-	result = register_netdev(priv->dev);
+	result = register_netdev(ndev);
 	if (result) {
 		pr_warn("%s: couldn't register ipoib port %d; error %d\n",
 			hca->name, port, result);
-		ipoib_parent_unregister_pre(priv->dev);
-		goto device_init_failed;
+
+		ipoib_parent_unregister_pre(ndev);
+		ipoib_intf_free(ndev);
+		free_netdev(ndev);
+
+		return ERR_PTR(result);
 	}
 
-	result = -ENOMEM;
-	if (ipoib_cm_add_mode_attr(priv->dev))
+	/*
+	 * We cannot set priv_destructor before register_netdev because we
+	 * need priv to be always valid during the error flow to execute
+	 * ipoib_parent_unregister_pre(). Instead handle it manually and only
+	 * enter priv_destructor mode once we are completely registered.
+	 */
+	ndev->priv_destructor = ipoib_intf_free;
+
+	if (ipoib_cm_add_mode_attr(ndev))
 		goto sysfs_failed;
-	if (ipoib_add_pkey_attr(priv->dev))
+	if (ipoib_add_pkey_attr(ndev))
 		goto sysfs_failed;
-	if (ipoib_add_umcast_attr(priv->dev))
+	if (ipoib_add_umcast_attr(ndev))
 		goto sysfs_failed;
-	if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
+	if (device_create_file(&ndev->dev, &dev_attr_create_child))
 		goto sysfs_failed;
-	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
+	if (device_create_file(&ndev->dev, &dev_attr_delete_child))
 		goto sysfs_failed;
 
-	return priv->dev;
+	return ndev;
 
 sysfs_failed:
-	ipoib_parent_unregister_pre(priv->dev);
-	unregister_netdev(priv->dev);
-
-device_init_failed:
-	rn = netdev_priv(priv->dev);
-	rn->free_rdma_netdev(priv->dev);
-	kfree(priv);
-
-	return ERR_PTR(result);
+	ipoib_parent_unregister_pre(ndev);
+	unregister_netdev(ndev);
+	return ERR_PTR(-ENOMEM);
 }
 
 static void ipoib_add_one(struct ib_device *device)
@@ -2426,33 +2467,19 @@ static void ipoib_add_one(struct ib_device *device)
 
 static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
-	struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
+	struct ipoib_dev_priv *priv, *tmp;
 	struct list_head *dev_list = client_data;
 
 	if (!dev_list)
 		return;
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
-		struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
-
 		ipoib_parent_unregister_pre(priv->dev);
 
 		/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
 		mutex_lock(&priv->sysfs_mutex);
 		unregister_netdev(priv->dev);
 		mutex_unlock(&priv->sysfs_mutex);
-
-		parent_rn->free_rdma_netdev(priv->dev);
-
-		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
-			struct rdma_netdev *child_rn;
-
-			child_rn = netdev_priv(cpriv->dev);
-			child_rn->free_rdma_netdev(cpriv->dev);
-			kfree(cpriv);
-		}
-
-		kfree(priv);
 	}
 
 	kfree(dev_list);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 3103729a73fd..7776334cf8c5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -50,31 +50,53 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
 }
 static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
 
+/*
+ * NOTE: If this function fails then the priv->dev will remain valid, however
+ * priv can have been freed and must not be touched by caller in the error
+ * case.
+ *
+ * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
+ * free the net_device (just as rtnl_newlink does) otherwise the net_device
+ * will be freed when the rtnl is unlocked.
+ */
 int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 		     u16 pkey, int type)
 {
+	struct net_device *ndev = priv->dev;
 	int result;
 
+	ASSERT_RTNL();
+
 	priv->parent = ppriv->dev;
 	priv->pkey = pkey;
 	priv->child_type = type;
 
-	result = register_netdevice(priv->dev);
+	/* We do not need to touch priv if register_netdevice fails */
+	ndev->priv_destructor = ipoib_intf_free;
+
+	result = register_netdevice(ndev);
 	if (result) {
 		ipoib_warn(priv, "failed to initialize; error %i", result);
+
+		/*
+		 * register_netdevice sometimes calls priv_destructor,
+		 * sometimes not. Make sure it was done.
+		 */
+		if (ndev->priv_destructor)
+			ndev->priv_destructor(ndev);
 		return result;
 	}
 
 	/* RTNL childs don't need proprietary sysfs entries */
 	if (type == IPOIB_LEGACY_CHILD) {
-		if (ipoib_cm_add_mode_attr(priv->dev))
+		if (ipoib_cm_add_mode_attr(ndev))
 			goto sysfs_failed;
-		if (ipoib_add_pkey_attr(priv->dev))
+		if (ipoib_add_pkey_attr(ndev))
 			goto sysfs_failed;
-		if (ipoib_add_umcast_attr(priv->dev))
+		if (ipoib_add_umcast_attr(ndev))
 			goto sysfs_failed;
 
-		if (device_create_file(&priv->dev->dev, &dev_attr_parent))
+		if (device_create_file(&ndev->dev, &dev_attr_parent))
 			goto sysfs_failed;
 	}
 
@@ -91,6 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 {
 	struct ipoib_dev_priv *ppriv, *priv;
 	char intf_name[IFNAMSIZ];
+	struct net_device *ndev;
 	struct ipoib_dev_priv *tpriv;
 	int result;
 
@@ -122,12 +145,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 		return restart_syscall();
 	}
 
-	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
-	if (!priv) {
-		result = -ENOMEM;
-		goto out;
-	}
-
 	/*
 	 * First ensure this isn't a duplicate. We check the parent device and
 	 * then all of the legacy child interfaces to make sure the Pkey
@@ -146,21 +163,23 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 		}
 	}
 
+	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
+	if (!priv) {
+		result = -ENOMEM;
+		goto out;
+	}
+	ndev = priv->dev;
+
 	result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
 
+	if (result && ndev->reg_state == NETREG_UNINITIALIZED)
+		free_netdev(ndev);
+
 out:
 	up_write(&ppriv->vlan_rwsem);
 	rtnl_unlock();
 	mutex_unlock(&ppriv->sysfs_mutex);
 
-	if (result && priv) {
-		struct rdma_netdev *rn;
-
-		rn = netdev_priv(priv->dev);
-		rn->free_rdma_netdev(priv->dev);
-		kfree(priv);
-	}
-
 	return result;
 }
 
@@ -212,14 +231,5 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 	rtnl_unlock();
 	mutex_unlock(&ppriv->sysfs_mutex);
 
-	if (dev) {
-		struct rdma_netdev *rn;
-
-		rn = netdev_priv(dev);
-		rn->free_rdma_netdev(priv->dev);
-		kfree(priv);
-		return 0;
-	}
-
-	return -ENODEV;
+	return (dev) ? 0 : -ENODEV;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index af3bb2f7a504..b8d150d2fd72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -580,6 +580,22 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
+static void mlx5_rdma_netdev_free(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = priv->ppriv;
+	const struct mlx5e_profile *profile = priv->profile;
+
+	mlx5e_detach_netdev(priv);
+	profile->cleanup(priv);
+	destroy_workqueue(priv->wq);
+
+	if (!ipriv->sub_interface) {
+		mlx5i_pkey_qpn_ht_cleanup(netdev);
+		mlx5e_destroy_mdev_resources(priv->mdev);
+	}
+}
+
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
@@ -653,6 +669,9 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	rn->detach_mcast = mlx5i_detach_mcast;
 	rn->set_id = mlx5i_set_pkey_index;
 
+	netdev->priv_destructor = mlx5_rdma_netdev_free;
+	netdev->needs_free_netdev = 1;
+
 	return netdev;
 
 destroy_ht:
@@ -665,21 +684,3 @@ err_free_netdev:
 	return NULL;
 }
 EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
-
-void mlx5_rdma_netdev_free(struct net_device *netdev)
-{
-	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
-	struct mlx5i_priv *ipriv = priv->ppriv;
-	const struct mlx5e_profile *profile = priv->profile;
-
-	mlx5e_detach_netdev(priv);
-	profile->cleanup(priv);
-	destroy_workqueue(priv->wq);
-
-	if (!ipriv->sub_interface) {
-		mlx5i_pkey_qpn_ht_cleanup(netdev);
-		mlx5e_destroy_mdev_resources(priv->mdev);
-	}
-	free_netdev(netdev);
-}
-EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 957199c20a0f..96498ff6beb6 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1218,14 +1218,11 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
-
-static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {}
 #else
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
 					  void (*setup)(struct net_device *));
-void mlx5_rdma_netdev_free(struct net_device *netdev);
 #endif /* CONFIG_MLX5_CORE_IPOIB */
 
 struct mlx5_profile {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index dea770e5b9ae..4ffe3e11e8fb 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2203,7 +2203,11 @@ struct rdma_netdev {
 	struct ib_device  *hca;
 	u8                 port_num;
 
-	/* cleanup function must be specified */
+	/*
+	 * cleanup function must be specified.
+	 * FIXME: This is only used for OPA_VNIC and that usage should be
+	 * removed too.
+	 */
 	void (*free_rdma_netdev)(struct net_device *netdev);
 
 	/* control functions */
-- 
cgit v1.2.3


From 401c0d7712eb3189023efc9c0708a2ac984ed62e Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 3 Aug 2018 11:50:39 +0200
Subject: spi: spi-mem: Constify spi_mem->name

There is no reason to make spi_mem->name modifiable. Moreover,
spi_mem_ops->get_name() returns a const char *, which generates a gcc
warning when assigning the value returned by spi_mem_ops->get_name()
to spi_mem->name.

Fixes: 5d27a9c8ea9e ("spi: spi-mem: Extend the SPI mem interface to set a custom memory name")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 0d64ccc4e584..62722fb7472d 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -134,7 +134,7 @@ struct spi_mem_op {
 struct spi_mem {
 	struct spi_device *spi;
 	void *drvpriv;
-	char *name;
+	const char *name;
 };
 
 /**
-- 
cgit v1.2.3


From 2ad5157650b446f14a719280ba3670303bc4f439 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 11 Jul 2018 18:42:11 -0500
Subject: mailbox/omap: switch to SPDX license identifier

Use the appropriate SPDX license identifier in the OMAP Mailbox
driver source files and drop the previous boilerplate license text.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/omap-mailbox.c | 10 +---------
 include/linux/omap-mailbox.h   |  5 +----
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c
index e1e2c085e68e..97c7d9b7f46f 100644
--- a/drivers/mailbox/omap-mailbox.c
+++ b/drivers/mailbox/omap-mailbox.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * OMAP mailbox driver
  *
@@ -6,15 +7,6 @@
  *
  * Contact: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
  *          Suman Anna <s-anna@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #include <linux/interrupt.h>
diff --git a/include/linux/omap-mailbox.h b/include/linux/omap-mailbox.h
index c726bd833761..6dbcd2da0332 100644
--- a/include/linux/omap-mailbox.h
+++ b/include/linux/omap-mailbox.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * omap-mailbox: interprocessor communication module for OMAP
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef OMAP_MAILBOX_H
-- 
cgit v1.2.3


From 623a6143a845bd485b00ba684f0ccef11835edab Mon Sep 17 00:00:00 2001
From: Houlong Wei <houlong.wei@mediatek.com>
Date: Wed, 25 Jul 2018 09:26:40 +0800
Subject: mailbox: mediatek: Add Mediatek CMDQ driver

This patch is first version of Mediatek Command Queue(CMDQ) driver. The
CMDQ is used to help write registers with critical time limitation,
such as updating display configuration during the vblank. It controls
Global Command Engine (GCE) hardware to achieve this requirement.
Currently, CMDQ only supports display related hardwares, but we expect
it can be extended to other hardwares for future requirements.

Signed-off-by: Houlong Wei <houlong.wei@mediatek.com>
Signed-off-by: HS Liao <hs.liao@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/Kconfig                  |  10 +
 drivers/mailbox/Makefile                 |   2 +
 drivers/mailbox/mtk-cmdq-mailbox.c       | 569 +++++++++++++++++++++++++++++++
 include/linux/mailbox/mtk-cmdq-mailbox.h |  77 +++++
 4 files changed, 658 insertions(+)
 create mode 100644 drivers/mailbox/mtk-cmdq-mailbox.c
 create mode 100644 include/linux/mailbox/mtk-cmdq-mailbox.h

(limited to 'include/linux')

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index e63d29a95e76..2bbabc907add 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -189,4 +189,14 @@ config STM32_IPCC
 	  Mailbox implementation for STMicroelectonics STM32 family chips
 	  with hardware for Inter-Processor Communication Controller (IPCC)
 	  between processors. Say Y here if you want to have this support.
+
+config MTK_CMDQ_MBOX
+	tristate "MediaTek CMDQ Mailbox Support"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select MTK_INFRACFG
+	help
+	  Say yes here to add support for the MediaTek Command Queue (CMDQ)
+	  mailbox driver. The CMDQ is used to help read/write registers with
+	  critical time limitation, such as updating display configuration
+	  during the vblank.
 endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 4d501bea7863..4b0080480661 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -40,3 +40,5 @@ obj-$(CONFIG_QCOM_APCS_IPC)	+= qcom-apcs-ipc-mailbox.o
 obj-$(CONFIG_TEGRA_HSP_MBOX)	+= tegra-hsp.o
 
 obj-$(CONFIG_STM32_IPCC) 	+= stm32-ipcc.o
+
+obj-$(CONFIG_MTK_CMDQ_MBOX)	+= mtk-cmdq-mailbox.o
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
new file mode 100644
index 000000000000..6f92c5ee12f7
--- /dev/null
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2018 MediaTek Inc.
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox/mtk-cmdq-mailbox.h>
+#include <linux/of_device.h>
+
+#define CMDQ_OP_CODE_MASK		(0xff << CMDQ_OP_CODE_SHIFT)
+#define CMDQ_IRQ_MASK			0xffff
+#define CMDQ_NUM_CMD(t)			(t->cmd_buf_size / CMDQ_INST_SIZE)
+
+#define CMDQ_CURR_IRQ_STATUS		0x10
+#define CMDQ_THR_SLOT_CYCLES		0x30
+#define CMDQ_THR_BASE			0x100
+#define CMDQ_THR_SIZE			0x80
+#define CMDQ_THR_WARM_RESET		0x00
+#define CMDQ_THR_ENABLE_TASK		0x04
+#define CMDQ_THR_SUSPEND_TASK		0x08
+#define CMDQ_THR_CURR_STATUS		0x0c
+#define CMDQ_THR_IRQ_STATUS		0x10
+#define CMDQ_THR_IRQ_ENABLE		0x14
+#define CMDQ_THR_CURR_ADDR		0x20
+#define CMDQ_THR_END_ADDR		0x24
+#define CMDQ_THR_WAIT_TOKEN		0x30
+#define CMDQ_THR_PRIORITY		0x40
+
+#define CMDQ_THR_ACTIVE_SLOT_CYCLES	0x3200
+#define CMDQ_THR_ENABLED		0x1
+#define CMDQ_THR_DISABLED		0x0
+#define CMDQ_THR_SUSPEND		0x1
+#define CMDQ_THR_RESUME			0x0
+#define CMDQ_THR_STATUS_SUSPENDED	BIT(1)
+#define CMDQ_THR_DO_WARM_RESET		BIT(0)
+#define CMDQ_THR_IRQ_DONE		0x1
+#define CMDQ_THR_IRQ_ERROR		0x12
+#define CMDQ_THR_IRQ_EN			(CMDQ_THR_IRQ_ERROR | CMDQ_THR_IRQ_DONE)
+#define CMDQ_THR_IS_WAITING		BIT(31)
+
+#define CMDQ_JUMP_BY_OFFSET		0x10000000
+#define CMDQ_JUMP_BY_PA			0x10000001
+
+struct cmdq_thread {
+	struct mbox_chan	*chan;
+	void __iomem		*base;
+	struct list_head	task_busy_list;
+	u32			priority;
+	bool			atomic_exec;
+};
+
+struct cmdq_task {
+	struct cmdq		*cmdq;
+	struct list_head	list_entry;
+	dma_addr_t		pa_base;
+	struct cmdq_thread	*thread;
+	struct cmdq_pkt		*pkt; /* the packet sent from mailbox client */
+};
+
+struct cmdq {
+	struct mbox_controller	mbox;
+	void __iomem		*base;
+	u32			irq;
+	u32			thread_nr;
+	struct cmdq_thread	*thread;
+	struct clk		*clock;
+	bool			suspended;
+};
+
+static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread)
+{
+	u32 status;
+
+	writel(CMDQ_THR_SUSPEND, thread->base + CMDQ_THR_SUSPEND_TASK);
+
+	/* If already disabled, treat as suspended successful. */
+	if (!(readl(thread->base + CMDQ_THR_ENABLE_TASK) & CMDQ_THR_ENABLED))
+		return 0;
+
+	if (readl_poll_timeout_atomic(thread->base + CMDQ_THR_CURR_STATUS,
+			status, status & CMDQ_THR_STATUS_SUSPENDED, 0, 10)) {
+		dev_err(cmdq->mbox.dev, "suspend GCE thread 0x%x failed\n",
+			(u32)(thread->base - cmdq->base));
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static void cmdq_thread_resume(struct cmdq_thread *thread)
+{
+	writel(CMDQ_THR_RESUME, thread->base + CMDQ_THR_SUSPEND_TASK);
+}
+
+static void cmdq_init(struct cmdq *cmdq)
+{
+	WARN_ON(clk_enable(cmdq->clock) < 0);
+	writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES);
+	clk_disable(cmdq->clock);
+}
+
+static int cmdq_thread_reset(struct cmdq *cmdq, struct cmdq_thread *thread)
+{
+	u32 warm_reset;
+
+	writel(CMDQ_THR_DO_WARM_RESET, thread->base + CMDQ_THR_WARM_RESET);
+	if (readl_poll_timeout_atomic(thread->base + CMDQ_THR_WARM_RESET,
+			warm_reset, !(warm_reset & CMDQ_THR_DO_WARM_RESET),
+			0, 10)) {
+		dev_err(cmdq->mbox.dev, "reset GCE thread 0x%x failed\n",
+			(u32)(thread->base - cmdq->base));
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static void cmdq_thread_disable(struct cmdq *cmdq, struct cmdq_thread *thread)
+{
+	cmdq_thread_reset(cmdq, thread);
+	writel(CMDQ_THR_DISABLED, thread->base + CMDQ_THR_ENABLE_TASK);
+}
+
+/* notify GCE to re-fetch commands by setting GCE thread PC */
+static void cmdq_thread_invalidate_fetched_data(struct cmdq_thread *thread)
+{
+	writel(readl(thread->base + CMDQ_THR_CURR_ADDR),
+	       thread->base + CMDQ_THR_CURR_ADDR);
+}
+
+static void cmdq_task_insert_into_thread(struct cmdq_task *task)
+{
+	struct device *dev = task->cmdq->mbox.dev;
+	struct cmdq_thread *thread = task->thread;
+	struct cmdq_task *prev_task = list_last_entry(
+			&thread->task_busy_list, typeof(*task), list_entry);
+	u64 *prev_task_base = prev_task->pkt->va_base;
+
+	/* let previous task jump to this task */
+	dma_sync_single_for_cpu(dev, prev_task->pa_base,
+				prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
+	prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] =
+		(u64)CMDQ_JUMP_BY_PA << 32 | task->pa_base;
+	dma_sync_single_for_device(dev, prev_task->pa_base,
+				   prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
+
+	cmdq_thread_invalidate_fetched_data(thread);
+}
+
+static bool cmdq_command_is_wfe(u64 cmd)
+{
+	u64 wfe_option = CMDQ_WFE_UPDATE | CMDQ_WFE_WAIT | CMDQ_WFE_WAIT_VALUE;
+	u64 wfe_op = (u64)(CMDQ_CODE_WFE << CMDQ_OP_CODE_SHIFT) << 32;
+	u64 wfe_mask = (u64)CMDQ_OP_CODE_MASK << 32 | 0xffffffff;
+
+	return ((cmd & wfe_mask) == (wfe_op | wfe_option));
+}
+
+/* we assume tasks in the same display GCE thread are waiting the same event. */
+static void cmdq_task_remove_wfe(struct cmdq_task *task)
+{
+	struct device *dev = task->cmdq->mbox.dev;
+	u64 *base = task->pkt->va_base;
+	int i;
+
+	dma_sync_single_for_cpu(dev, task->pa_base, task->pkt->cmd_buf_size,
+				DMA_TO_DEVICE);
+	for (i = 0; i < CMDQ_NUM_CMD(task->pkt); i++)
+		if (cmdq_command_is_wfe(base[i]))
+			base[i] = (u64)CMDQ_JUMP_BY_OFFSET << 32 |
+				  CMDQ_JUMP_PASS;
+	dma_sync_single_for_device(dev, task->pa_base, task->pkt->cmd_buf_size,
+				   DMA_TO_DEVICE);
+}
+
+static bool cmdq_thread_is_in_wfe(struct cmdq_thread *thread)
+{
+	return readl(thread->base + CMDQ_THR_WAIT_TOKEN) & CMDQ_THR_IS_WAITING;
+}
+
+static void cmdq_thread_wait_end(struct cmdq_thread *thread,
+				 unsigned long end_pa)
+{
+	struct device *dev = thread->chan->mbox->dev;
+	unsigned long curr_pa;
+
+	if (readl_poll_timeout_atomic(thread->base + CMDQ_THR_CURR_ADDR,
+			curr_pa, curr_pa == end_pa, 1, 20))
+		dev_err(dev, "GCE thread cannot run to end.\n");
+}
+
+static void cmdq_task_exec_done(struct cmdq_task *task, enum cmdq_cb_status sta)
+{
+	struct cmdq_task_cb *cb = &task->pkt->async_cb;
+	struct cmdq_cb_data data;
+
+	WARN_ON(cb->cb == (cmdq_async_flush_cb)NULL);
+	data.sta = sta;
+	data.data = cb->data;
+	cb->cb(data);
+
+	list_del(&task->list_entry);
+}
+
+static void cmdq_task_handle_error(struct cmdq_task *task)
+{
+	struct cmdq_thread *thread = task->thread;
+	struct cmdq_task *next_task;
+
+	dev_err(task->cmdq->mbox.dev, "task 0x%p error\n", task);
+	WARN_ON(cmdq_thread_suspend(task->cmdq, thread) < 0);
+	next_task = list_first_entry_or_null(&thread->task_busy_list,
+			struct cmdq_task, list_entry);
+	if (next_task)
+		writel(next_task->pa_base, thread->base + CMDQ_THR_CURR_ADDR);
+	cmdq_thread_resume(thread);
+}
+
+static void cmdq_thread_irq_handler(struct cmdq *cmdq,
+				    struct cmdq_thread *thread)
+{
+	struct cmdq_task *task, *tmp, *curr_task = NULL;
+	u32 curr_pa, irq_flag, task_end_pa;
+	bool err;
+
+	irq_flag = readl(thread->base + CMDQ_THR_IRQ_STATUS);
+	writel(~irq_flag, thread->base + CMDQ_THR_IRQ_STATUS);
+
+	/*
+	 * When ISR call this function, another CPU core could run
+	 * "release task" right before we acquire the spin lock, and thus
+	 * reset / disable this GCE thread, so we need to check the enable
+	 * bit of this GCE thread.
+	 */
+	if (!(readl(thread->base + CMDQ_THR_ENABLE_TASK) & CMDQ_THR_ENABLED))
+		return;
+
+	if (irq_flag & CMDQ_THR_IRQ_ERROR)
+		err = true;
+	else if (irq_flag & CMDQ_THR_IRQ_DONE)
+		err = false;
+	else
+		return;
+
+	curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR);
+
+	list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
+				 list_entry) {
+		task_end_pa = task->pa_base + task->pkt->cmd_buf_size;
+		if (curr_pa >= task->pa_base && curr_pa < task_end_pa)
+			curr_task = task;
+
+		if (!curr_task || curr_pa == task_end_pa - CMDQ_INST_SIZE) {
+			cmdq_task_exec_done(task, CMDQ_CB_NORMAL);
+			kfree(task);
+		} else if (err) {
+			cmdq_task_exec_done(task, CMDQ_CB_ERROR);
+			cmdq_task_handle_error(curr_task);
+			kfree(task);
+		}
+
+		if (curr_task)
+			break;
+	}
+
+	if (list_empty(&thread->task_busy_list)) {
+		cmdq_thread_disable(cmdq, thread);
+		clk_disable(cmdq->clock);
+	}
+}
+
+static irqreturn_t cmdq_irq_handler(int irq, void *dev)
+{
+	struct cmdq *cmdq = dev;
+	unsigned long irq_status, flags = 0L;
+	int bit;
+
+	irq_status = readl(cmdq->base + CMDQ_CURR_IRQ_STATUS) & CMDQ_IRQ_MASK;
+	if (!(irq_status ^ CMDQ_IRQ_MASK))
+		return IRQ_NONE;
+
+	for_each_clear_bit(bit, &irq_status, fls(CMDQ_IRQ_MASK)) {
+		struct cmdq_thread *thread = &cmdq->thread[bit];
+
+		spin_lock_irqsave(&thread->chan->lock, flags);
+		cmdq_thread_irq_handler(cmdq, thread);
+		spin_unlock_irqrestore(&thread->chan->lock, flags);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int cmdq_suspend(struct device *dev)
+{
+	struct cmdq *cmdq = dev_get_drvdata(dev);
+	struct cmdq_thread *thread;
+	int i;
+	bool task_running = false;
+
+	cmdq->suspended = true;
+
+	for (i = 0; i < cmdq->thread_nr; i++) {
+		thread = &cmdq->thread[i];
+		if (!list_empty(&thread->task_busy_list)) {
+			task_running = true;
+			break;
+		}
+	}
+
+	if (task_running)
+		dev_warn(dev, "exist running task(s) in suspend\n");
+
+	clk_unprepare(cmdq->clock);
+
+	return 0;
+}
+
+static int cmdq_resume(struct device *dev)
+{
+	struct cmdq *cmdq = dev_get_drvdata(dev);
+
+	WARN_ON(clk_prepare(cmdq->clock) < 0);
+	cmdq->suspended = false;
+	return 0;
+}
+
+static int cmdq_remove(struct platform_device *pdev)
+{
+	struct cmdq *cmdq = platform_get_drvdata(pdev);
+
+	mbox_controller_unregister(&cmdq->mbox);
+	clk_unprepare(cmdq->clock);
+
+	if (cmdq->mbox.chans)
+		devm_kfree(&pdev->dev, cmdq->mbox.chans);
+
+	if (cmdq->thread)
+		devm_kfree(&pdev->dev, cmdq->thread);
+
+	devm_kfree(&pdev->dev, cmdq);
+
+	return 0;
+}
+
+static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
+{
+	struct cmdq_pkt *pkt = (struct cmdq_pkt *)data;
+	struct cmdq_thread *thread = (struct cmdq_thread *)chan->con_priv;
+	struct cmdq *cmdq = dev_get_drvdata(chan->mbox->dev);
+	struct cmdq_task *task;
+	unsigned long curr_pa, end_pa;
+
+	/* Client should not flush new tasks if suspended. */
+	WARN_ON(cmdq->suspended);
+
+	task = kzalloc(sizeof(*task), GFP_ATOMIC);
+	task->cmdq = cmdq;
+	INIT_LIST_HEAD(&task->list_entry);
+	task->pa_base = pkt->pa_base;
+	task->thread = thread;
+	task->pkt = pkt;
+
+	if (list_empty(&thread->task_busy_list)) {
+		WARN_ON(clk_enable(cmdq->clock) < 0);
+		WARN_ON(cmdq_thread_reset(cmdq, thread) < 0);
+
+		writel(task->pa_base, thread->base + CMDQ_THR_CURR_ADDR);
+		writel(task->pa_base + pkt->cmd_buf_size,
+		       thread->base + CMDQ_THR_END_ADDR);
+		writel(thread->priority, thread->base + CMDQ_THR_PRIORITY);
+		writel(CMDQ_THR_IRQ_EN, thread->base + CMDQ_THR_IRQ_ENABLE);
+		writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK);
+	} else {
+		WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
+		curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR);
+		end_pa = readl(thread->base + CMDQ_THR_END_ADDR);
+
+		/*
+		 * Atomic execution should remove the following wfe, i.e. only
+		 * wait event at first task, and prevent to pause when running.
+		 */
+		if (thread->atomic_exec) {
+			/* GCE is executing if command is not WFE */
+			if (!cmdq_thread_is_in_wfe(thread)) {
+				cmdq_thread_resume(thread);
+				cmdq_thread_wait_end(thread, end_pa);
+				WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
+				/* set to this task directly */
+				writel(task->pa_base,
+				       thread->base + CMDQ_THR_CURR_ADDR);
+			} else {
+				cmdq_task_insert_into_thread(task);
+				cmdq_task_remove_wfe(task);
+				smp_mb(); /* modify jump before enable thread */
+			}
+		} else {
+			/* check boundary */
+			if (curr_pa == end_pa - CMDQ_INST_SIZE ||
+			    curr_pa == end_pa) {
+				/* set to this task directly */
+				writel(task->pa_base,
+				       thread->base + CMDQ_THR_CURR_ADDR);
+			} else {
+				cmdq_task_insert_into_thread(task);
+				smp_mb(); /* modify jump before enable thread */
+			}
+		}
+		writel(task->pa_base + pkt->cmd_buf_size,
+		       thread->base + CMDQ_THR_END_ADDR);
+		cmdq_thread_resume(thread);
+	}
+	list_move_tail(&task->list_entry, &thread->task_busy_list);
+
+	return 0;
+}
+
+static int cmdq_mbox_startup(struct mbox_chan *chan)
+{
+	return 0;
+}
+
+static void cmdq_mbox_shutdown(struct mbox_chan *chan)
+{
+}
+
+static const struct mbox_chan_ops cmdq_mbox_chan_ops = {
+	.send_data = cmdq_mbox_send_data,
+	.startup = cmdq_mbox_startup,
+	.shutdown = cmdq_mbox_shutdown,
+};
+
+static struct mbox_chan *cmdq_xlate(struct mbox_controller *mbox,
+		const struct of_phandle_args *sp)
+{
+	int ind = sp->args[0];
+	struct cmdq_thread *thread;
+
+	if (ind >= mbox->num_chans)
+		return ERR_PTR(-EINVAL);
+
+	thread = (struct cmdq_thread *)mbox->chans[ind].con_priv;
+	thread->priority = sp->args[1];
+	thread->atomic_exec = (sp->args[2] != 0);
+	thread->chan = &mbox->chans[ind];
+
+	return &mbox->chans[ind];
+}
+
+static int cmdq_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct cmdq *cmdq;
+	int err, i;
+
+	cmdq = devm_kzalloc(dev, sizeof(*cmdq), GFP_KERNEL);
+	if (!cmdq)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cmdq->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(cmdq->base)) {
+		dev_err(dev, "failed to ioremap gce\n");
+		return PTR_ERR(cmdq->base);
+	}
+
+	cmdq->irq = platform_get_irq(pdev, 0);
+	if (!cmdq->irq) {
+		dev_err(dev, "failed to get irq\n");
+		return -EINVAL;
+	}
+	err = devm_request_irq(dev, cmdq->irq, cmdq_irq_handler, IRQF_SHARED,
+			       "mtk_cmdq", cmdq);
+	if (err < 0) {
+		dev_err(dev, "failed to register ISR (%d)\n", err);
+		return err;
+	}
+
+	dev_dbg(dev, "cmdq device: addr:0x%p, va:0x%p, irq:%d\n",
+		dev, cmdq->base, cmdq->irq);
+
+	cmdq->clock = devm_clk_get(dev, "gce");
+	if (IS_ERR(cmdq->clock)) {
+		dev_err(dev, "failed to get gce clk\n");
+		return PTR_ERR(cmdq->clock);
+	}
+
+	cmdq->thread_nr = (u32)(unsigned long)of_device_get_match_data(dev);
+	cmdq->mbox.dev = dev;
+	cmdq->mbox.chans = devm_kcalloc(dev, cmdq->thread_nr,
+					sizeof(*cmdq->mbox.chans), GFP_KERNEL);
+	if (!cmdq->mbox.chans)
+		return -ENOMEM;
+
+	cmdq->mbox.num_chans = cmdq->thread_nr;
+	cmdq->mbox.ops = &cmdq_mbox_chan_ops;
+	cmdq->mbox.of_xlate = cmdq_xlate;
+
+	/* make use of TXDONE_BY_ACK */
+	cmdq->mbox.txdone_irq = false;
+	cmdq->mbox.txdone_poll = false;
+
+	cmdq->thread = devm_kcalloc(dev, cmdq->thread_nr,
+					sizeof(*cmdq->thread), GFP_KERNEL);
+	if (!cmdq->thread)
+		return -ENOMEM;
+
+	for (i = 0; i < cmdq->thread_nr; i++) {
+		cmdq->thread[i].base = cmdq->base + CMDQ_THR_BASE +
+				CMDQ_THR_SIZE * i;
+		INIT_LIST_HEAD(&cmdq->thread[i].task_busy_list);
+		cmdq->mbox.chans[i].con_priv = (void *)&cmdq->thread[i];
+	}
+
+	err = mbox_controller_register(&cmdq->mbox);
+	if (err < 0) {
+		dev_err(dev, "failed to register mailbox: %d\n", err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, cmdq);
+	WARN_ON(clk_prepare(cmdq->clock) < 0);
+
+	cmdq_init(cmdq);
+
+	return 0;
+}
+
+static const struct dev_pm_ops cmdq_pm_ops = {
+	.suspend = cmdq_suspend,
+	.resume = cmdq_resume,
+};
+
+static const struct of_device_id cmdq_of_ids[] = {
+	{.compatible = "mediatek,mt8173-gce", .data = (void *)16},
+	{}
+};
+
+static struct platform_driver cmdq_drv = {
+	.probe = cmdq_probe,
+	.remove = cmdq_remove,
+	.driver = {
+		.name = "mtk_cmdq",
+		.pm = &cmdq_pm_ops,
+		.of_match_table = cmdq_of_ids,
+	}
+};
+
+static int __init cmdq_drv_init(void)
+{
+	return platform_driver_register(&cmdq_drv);
+}
+
+static void __exit cmdq_drv_exit(void)
+{
+	platform_driver_unregister(&cmdq_drv);
+}
+
+subsys_initcall(cmdq_drv_init);
+module_exit(cmdq_drv_exit);
diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
new file mode 100644
index 000000000000..ccb73422c2fa
--- /dev/null
+++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ *
+ */
+
+#ifndef __MTK_CMDQ_MAILBOX_H__
+#define __MTK_CMDQ_MAILBOX_H__
+
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define CMDQ_INST_SIZE			8 /* instruction is 64-bit */
+#define CMDQ_SUBSYS_SHIFT		16
+#define CMDQ_OP_CODE_SHIFT		24
+#define CMDQ_JUMP_PASS			CMDQ_INST_SIZE
+
+#define CMDQ_WFE_UPDATE			BIT(31)
+#define CMDQ_WFE_WAIT			BIT(15)
+#define CMDQ_WFE_WAIT_VALUE		0x1
+
+/*
+ * CMDQ_CODE_MASK:
+ *   set write mask
+ *   format: op mask
+ * CMDQ_CODE_WRITE:
+ *   write value into target register
+ *   format: op subsys address value
+ * CMDQ_CODE_JUMP:
+ *   jump by offset
+ *   format: op offset
+ * CMDQ_CODE_WFE:
+ *   wait for event and clear
+ *   it is just clear if no wait
+ *   format: [wait]  op event update:1 to_wait:1 wait:1
+ *           [clear] op event update:1 to_wait:0 wait:0
+ * CMDQ_CODE_EOC:
+ *   end of command
+ *   format: op irq_flag
+ */
+enum cmdq_code {
+	CMDQ_CODE_MASK = 0x02,
+	CMDQ_CODE_WRITE = 0x04,
+	CMDQ_CODE_JUMP = 0x10,
+	CMDQ_CODE_WFE = 0x20,
+	CMDQ_CODE_EOC = 0x40,
+};
+
+enum cmdq_cb_status {
+	CMDQ_CB_NORMAL = 0,
+	CMDQ_CB_ERROR
+};
+
+struct cmdq_cb_data {
+	enum cmdq_cb_status	sta;
+	void			*data;
+};
+
+typedef void (*cmdq_async_flush_cb)(struct cmdq_cb_data data);
+
+struct cmdq_task_cb {
+	cmdq_async_flush_cb	cb;
+	void			*data;
+};
+
+struct cmdq_pkt {
+	void			*va_base;
+	dma_addr_t		pa_base;
+	size_t			cmd_buf_size; /* command occupied size */
+	size_t			buf_size; /* real buffer size */
+	struct cmdq_task_cb	cb;
+	struct cmdq_task_cb	async_cb;
+	void			*cl;
+};
+
+#endif /* __MTK_CMDQ_MAILBOX_H__ */
-- 
cgit v1.2.3


From 7cca1ed0bb248b8d5768d17f5afe297a832d66c0 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Tue, 31 Jul 2018 20:25:00 +0200
Subject: netfilter: nf_osf: move nf_osf_fingers to non-uapi header file

All warnings (new ones prefixed by >>):

>> ./usr/include/linux/netfilter/nf_osf.h:73: userspace cannot reference function or variable defined in the kernel

Fixes: f9324952088f ("netfilter: nfnetlink_osf: extract nfnetlink_subsystem code from xt_osf.c")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_osf.h      | 2 ++
 include/uapi/linux/netfilter/nf_osf.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
index aee460fcbd31..3e455d6f94d5 100644
--- a/include/linux/netfilter/nf_osf.h
+++ b/include/linux/netfilter/nf_osf.h
@@ -25,6 +25,8 @@ enum osf_fmatch_states {
 	FMATCH_OPT_WRONG,
 };
 
+extern struct list_head nf_osf_fingers[2];
+
 struct nf_osf_finger {
 	struct rcu_head			rcu_head;
 	struct list_head		finger_entry;
diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
index cc2487ff74f6..3b93fbb9fc24 100644
--- a/include/uapi/linux/netfilter/nf_osf.h
+++ b/include/uapi/linux/netfilter/nf_osf.h
@@ -70,8 +70,6 @@ struct nf_osf_nlmsg {
 	struct tcphdr			tcp;
 };
 
-extern struct list_head nf_osf_fingers[2];
-
 /* Defines for IANA option kinds */
 enum iana_options {
 	OSFOPT_EOL = 0,		/* End of options */
-- 
cgit v1.2.3


From ddba40be59c9be4059288464f8e6f38fbba27495 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Tue, 31 Jul 2018 20:25:01 +0200
Subject: netfilter: nfnetlink_osf: rename nf_osf header file to nfnetlink_osf

The first client of the nf_osf.h userspace header is nft_osf, coming in
this batch, rename it to nfnetlink_osf.h as there are no userspace
clients for this yet, hence this looks consistent with other nfnetlink
subsystem.

Suggested-by: Jan Engelhardt <jengelh@inai.de>
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_osf.h             |  44 -----------
 include/linux/netfilter/nfnetlink_osf.h      |  44 +++++++++++
 include/uapi/linux/netfilter/nf_osf.h        | 106 ---------------------------
 include/uapi/linux/netfilter/nfnetlink_osf.h | 106 +++++++++++++++++++++++++++
 include/uapi/linux/netfilter/xt_osf.h        |   2 +-
 net/netfilter/nfnetlink_osf.c                |   2 +-
 net/netfilter/nft_osf.c                      |   2 +-
 7 files changed, 153 insertions(+), 153 deletions(-)
 delete mode 100644 include/linux/netfilter/nf_osf.h
 create mode 100644 include/linux/netfilter/nfnetlink_osf.h
 delete mode 100644 include/uapi/linux/netfilter/nf_osf.h
 create mode 100644 include/uapi/linux/netfilter/nfnetlink_osf.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
deleted file mode 100644
index 3e455d6f94d5..000000000000
--- a/include/linux/netfilter/nf_osf.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NFOSF_H
-#define _NFOSF_H
-
-#include <uapi/linux/netfilter/nf_osf.h>
-
-/* Initial window size option state machine: multiple of mss, mtu or
- * plain numeric value. Can also be made as plain numeric value which
- * is not a multiple of specified value.
- */
-enum nf_osf_window_size_options {
-	OSF_WSS_PLAIN   = 0,
-	OSF_WSS_MSS,
-	OSF_WSS_MTU,
-	OSF_WSS_MODULO,
-	OSF_WSS_MAX,
-};
-
-enum osf_fmatch_states {
-	/* Packet does not match the fingerprint */
-	FMATCH_WRONG = 0,
-	/* Packet matches the fingerprint */
-	FMATCH_OK,
-	/* Options do not match the fingerprint, but header does */
-	FMATCH_OPT_WRONG,
-};
-
-extern struct list_head nf_osf_fingers[2];
-
-struct nf_osf_finger {
-	struct rcu_head			rcu_head;
-	struct list_head		finger_entry;
-	struct nf_osf_user_finger	finger;
-};
-
-bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
-		  int hooknum, struct net_device *in, struct net_device *out,
-		  const struct nf_osf_info *info, struct net *net,
-		  const struct list_head *nf_osf_fingers);
-
-const char *nf_osf_find(const struct sk_buff *skb,
-                        const struct list_head *nf_osf_fingers);
-
-#endif /* _NFOSF_H */
diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
new file mode 100644
index 000000000000..a7311bc03d3a
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NFOSF_H
+#define _NFOSF_H
+
+#include <uapi/linux/netfilter/nfnetlink_osf.h>
+
+/* Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum nf_osf_window_size_options {
+	OSF_WSS_PLAIN   = 0,
+	OSF_WSS_MSS,
+	OSF_WSS_MTU,
+	OSF_WSS_MODULO,
+	OSF_WSS_MAX,
+};
+
+enum osf_fmatch_states {
+	/* Packet does not match the fingerprint */
+	FMATCH_WRONG = 0,
+	/* Packet matches the fingerprint */
+	FMATCH_OK,
+	/* Options do not match the fingerprint, but header does */
+	FMATCH_OPT_WRONG,
+};
+
+extern struct list_head nf_osf_fingers[2];
+
+struct nf_osf_finger {
+	struct rcu_head			rcu_head;
+	struct list_head		finger_entry;
+	struct nf_osf_user_finger	finger;
+};
+
+bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+		  int hooknum, struct net_device *in, struct net_device *out,
+		  const struct nf_osf_info *info, struct net *net,
+		  const struct list_head *nf_osf_fingers);
+
+const char *nf_osf_find(const struct sk_buff *skb,
+                        const struct list_head *nf_osf_fingers);
+
+#endif /* _NFOSF_H */
diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
deleted file mode 100644
index 3b93fbb9fc24..000000000000
--- a/include/uapi/linux/netfilter/nf_osf.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef _NF_OSF_H
-#define _NF_OSF_H
-
-#include <linux/types.h>
-
-#define MAXGENRELEN	32
-
-#define NF_OSF_GENRE	(1 << 0)
-#define NF_OSF_TTL	(1 << 1)
-#define NF_OSF_LOG	(1 << 2)
-#define NF_OSF_INVERT	(1 << 3)
-
-#define NF_OSF_LOGLEVEL_ALL		0	/* log all matched fingerprints */
-#define NF_OSF_LOGLEVEL_FIRST		1	/* log only the first matced fingerprint */
-#define NF_OSF_LOGLEVEL_ALL_KNOWN	2	/* do not log unknown packets */
-
-#define NF_OSF_TTL_TRUE			0	/* True ip and fingerprint TTL comparison */
-
-/* Check if ip TTL is less than fingerprint one */
-#define NF_OSF_TTL_LESS			1
-
-/* Do not compare ip and fingerprint TTL at all */
-#define NF_OSF_TTL_NOCHECK		2
-
-#define NF_OSF_FLAGMASK		(NF_OSF_GENRE | NF_OSF_TTL | \
-				 NF_OSF_LOG | NF_OSF_INVERT)
-/* Wildcard MSS (kind of).
- * It is used to implement a state machine for the different wildcard values
- * of the MSS and window sizes.
- */
-struct nf_osf_wc {
-	__u32	wc;
-	__u32	val;
-};
-
-/* This struct represents IANA options
- * http://www.iana.org/assignments/tcp-parameters
- */
-struct nf_osf_opt {
-	__u16			kind, length;
-	struct nf_osf_wc	wc;
-};
-
-struct nf_osf_info {
-	char	genre[MAXGENRELEN];
-	__u32	len;
-	__u32	flags;
-	__u32	loglevel;
-	__u32	ttl;
-};
-
-struct nf_osf_user_finger {
-	struct nf_osf_wc	wss;
-
-	__u8	ttl, df;
-	__u16	ss, mss;
-	__u16	opt_num;
-
-	char	genre[MAXGENRELEN];
-	char	version[MAXGENRELEN];
-	char	subtype[MAXGENRELEN];
-
-	/* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
-	struct nf_osf_opt	opt[MAX_IPOPTLEN];
-};
-
-struct nf_osf_nlmsg {
-	struct nf_osf_user_finger	f;
-	struct iphdr			ip;
-	struct tcphdr			tcp;
-};
-
-/* Defines for IANA option kinds */
-enum iana_options {
-	OSFOPT_EOL = 0,		/* End of options */
-	OSFOPT_NOP,		/* NOP */
-	OSFOPT_MSS,		/* Maximum segment size */
-	OSFOPT_WSO,		/* Window scale option */
-	OSFOPT_SACKP,		/* SACK permitted */
-	OSFOPT_SACK,		/* SACK */
-	OSFOPT_ECHO,
-	OSFOPT_ECHOREPLY,
-	OSFOPT_TS,		/* Timestamp option */
-	OSFOPT_POCP,		/* Partial Order Connection Permitted */
-	OSFOPT_POSP,		/* Partial Order Service Profile */
-
-	/* Others are not used in the current OSF */
-	OSFOPT_EMPTY = 255,
-};
-
-enum nf_osf_attr_type {
-	OSF_ATTR_UNSPEC,
-	OSF_ATTR_FINGER,
-	OSF_ATTR_MAX,
-};
-
-/*
- * Add/remove fingerprint from the kernel.
- */
-enum nf_osf_msg_types {
-	OSF_MSG_ADD,
-	OSF_MSG_REMOVE,
-	OSF_MSG_MAX,
-};
-
-#endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h
new file mode 100644
index 000000000000..3b93fbb9fc24
--- /dev/null
+++ b/include/uapi/linux/netfilter/nfnetlink_osf.h
@@ -0,0 +1,106 @@
+#ifndef _NF_OSF_H
+#define _NF_OSF_H
+
+#include <linux/types.h>
+
+#define MAXGENRELEN	32
+
+#define NF_OSF_GENRE	(1 << 0)
+#define NF_OSF_TTL	(1 << 1)
+#define NF_OSF_LOG	(1 << 2)
+#define NF_OSF_INVERT	(1 << 3)
+
+#define NF_OSF_LOGLEVEL_ALL		0	/* log all matched fingerprints */
+#define NF_OSF_LOGLEVEL_FIRST		1	/* log only the first matced fingerprint */
+#define NF_OSF_LOGLEVEL_ALL_KNOWN	2	/* do not log unknown packets */
+
+#define NF_OSF_TTL_TRUE			0	/* True ip and fingerprint TTL comparison */
+
+/* Check if ip TTL is less than fingerprint one */
+#define NF_OSF_TTL_LESS			1
+
+/* Do not compare ip and fingerprint TTL at all */
+#define NF_OSF_TTL_NOCHECK		2
+
+#define NF_OSF_FLAGMASK		(NF_OSF_GENRE | NF_OSF_TTL | \
+				 NF_OSF_LOG | NF_OSF_INVERT)
+/* Wildcard MSS (kind of).
+ * It is used to implement a state machine for the different wildcard values
+ * of the MSS and window sizes.
+ */
+struct nf_osf_wc {
+	__u32	wc;
+	__u32	val;
+};
+
+/* This struct represents IANA options
+ * http://www.iana.org/assignments/tcp-parameters
+ */
+struct nf_osf_opt {
+	__u16			kind, length;
+	struct nf_osf_wc	wc;
+};
+
+struct nf_osf_info {
+	char	genre[MAXGENRELEN];
+	__u32	len;
+	__u32	flags;
+	__u32	loglevel;
+	__u32	ttl;
+};
+
+struct nf_osf_user_finger {
+	struct nf_osf_wc	wss;
+
+	__u8	ttl, df;
+	__u16	ss, mss;
+	__u16	opt_num;
+
+	char	genre[MAXGENRELEN];
+	char	version[MAXGENRELEN];
+	char	subtype[MAXGENRELEN];
+
+	/* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
+	struct nf_osf_opt	opt[MAX_IPOPTLEN];
+};
+
+struct nf_osf_nlmsg {
+	struct nf_osf_user_finger	f;
+	struct iphdr			ip;
+	struct tcphdr			tcp;
+};
+
+/* Defines for IANA option kinds */
+enum iana_options {
+	OSFOPT_EOL = 0,		/* End of options */
+	OSFOPT_NOP,		/* NOP */
+	OSFOPT_MSS,		/* Maximum segment size */
+	OSFOPT_WSO,		/* Window scale option */
+	OSFOPT_SACKP,		/* SACK permitted */
+	OSFOPT_SACK,		/* SACK */
+	OSFOPT_ECHO,
+	OSFOPT_ECHOREPLY,
+	OSFOPT_TS,		/* Timestamp option */
+	OSFOPT_POCP,		/* Partial Order Connection Permitted */
+	OSFOPT_POSP,		/* Partial Order Service Profile */
+
+	/* Others are not used in the current OSF */
+	OSFOPT_EMPTY = 255,
+};
+
+enum nf_osf_attr_type {
+	OSF_ATTR_UNSPEC,
+	OSF_ATTR_FINGER,
+	OSF_ATTR_MAX,
+};
+
+/*
+ * Add/remove fingerprint from the kernel.
+ */
+enum nf_osf_msg_types {
+	OSF_MSG_ADD,
+	OSF_MSG_REMOVE,
+	OSF_MSG_MAX,
+};
+
+#endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index a90e90c27cef..c56c59605c2b 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -23,7 +23,7 @@
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
-#include <linux/netfilter/nf_osf.h>
+#include <linux/netfilter/nfnetlink_osf.h>
 
 #define XT_OSF_GENRE		NF_OSF_GENRE
 #define XT_OSF_INVERT		NF_OSF_INVERT
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index ba0fa11869ce..f9dba62c450f 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_log.h>
-#include <linux/netfilter/nf_osf.h>
+#include <linux/netfilter/nfnetlink_osf.h>
 
 /*
  * Indexed by dont-fragment bit.
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index bdacc4cffba4..9b2f3de7be4f 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -2,7 +2,7 @@
 #include <net/tcp.h>
 
 #include <net/netfilter/nf_tables.h>
-#include <linux/netfilter/nf_osf.h>
+#include <linux/netfilter/nfnetlink_osf.h>
 
 #define OSF_GENRE_SIZE		32
 
-- 
cgit v1.2.3


From 94276fa8a2a4c08ccb2e9d55e88b95dc972ccea3 Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Fri, 3 Aug 2018 13:36:13 +0200
Subject: netfilter: bridge: Expose nf_tables bridge hook priorities through
 uapi
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Netfilter exposes standard hook priorities in case of ipv4, ipv6 and
arp but not in case of bridge.

This patch exposes the hook priority values of the bridge family (which are
different from the formerly mentioned) via uapi so that they can be used by
user-space applications just like the others.

Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h      | 11 -----------
 include/uapi/linux/netfilter_bridge.h | 11 +++++++++++
 net/bridge/br_netfilter_hooks.c       |  1 +
 net/bridge/netfilter/ebtable_filter.c |  1 +
 net/bridge/netfilter/ebtable_nat.c    |  1 +
 5 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index b671fdfd212b..fa0686500970 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -5,17 +5,6 @@
 #include <uapi/linux/netfilter_bridge.h>
 #include <linux/skbuff.h>
 
-enum nf_br_hook_priorities {
-	NF_BR_PRI_FIRST = INT_MIN,
-	NF_BR_PRI_NAT_DST_BRIDGED = -300,
-	NF_BR_PRI_FILTER_BRIDGED = -200,
-	NF_BR_PRI_BRNF = 0,
-	NF_BR_PRI_NAT_DST_OTHER = 100,
-	NF_BR_PRI_FILTER_OTHER = 200,
-	NF_BR_PRI_NAT_SRC = 300,
-	NF_BR_PRI_LAST = INT_MAX,
-};
-
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h
index 12fb77633f83..156ccd089df1 100644
--- a/include/uapi/linux/netfilter_bridge.h
+++ b/include/uapi/linux/netfilter_bridge.h
@@ -26,4 +26,15 @@
 #define NF_BR_BROUTING		5
 #define NF_BR_NUMHOOKS		6
 
+enum nf_br_hook_priorities {
+	NF_BR_PRI_FIRST = INT_MIN,
+	NF_BR_PRI_NAT_DST_BRIDGED = -300,
+	NF_BR_PRI_FILTER_BRIDGED = -200,
+	NF_BR_PRI_BRNF = 0,
+	NF_BR_PRI_NAT_DST_OTHER = 100,
+	NF_BR_PRI_FILTER_OTHER = 200,
+	NF_BR_PRI_NAT_SRC = 300,
+	NF_BR_PRI_LAST = INT_MAX,
+};
+
 #endif /* _UAPI__LINUX_BRIDGE_NETFILTER_H */
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 9b16eaf33819..6e0dc6bcd32a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -26,6 +26,7 @@
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
 #include <linux/netfilter_bridge.h>
+#include <uapi/linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index c41da5fac84f..550324c516ee 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/netfilter_bridge/ebtables.h>
+#include <uapi/linux/netfilter_bridge.h>
 #include <linux/module.h>
 
 #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 08df7406ecb3..c0fb3ca518af 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/netfilter_bridge/ebtables.h>
+#include <uapi/linux/netfilter_bridge.h>
 #include <linux/module.h>
 
 #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \
-- 
cgit v1.2.3


From c2b6d621c4ffe9936adf7a55c8b1c769672c306f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Jun 2018 15:53:17 -0400
Subject: new primitive: discard_new_inode()

	We don't want open-by-handle picking half-set-up in-core
struct inode from e.g. mkdir() having failed halfway through.
In other words, we don't want such inodes returned by iget_locked()
on their way to extinction.  However, we can't just have them
unhashed - otherwise open-by-handle immediately *after* that would've
ended up creating a new in-core inode over the on-disk one that
is in process of being freed right under us.

	Solution: new flag (I_CREATING) set by insert_inode_locked() and
removed by unlock_new_inode() and a new primitive (discard_new_inode())
to be used by such halfway-through-setup failure exits instead of
unlock_new_inode() / iput() combinations.  That primitive unlocks new
inode, but leaves I_CREATING in place.

	iget_locked() treats finding an I_CREATING inode as failure
(-ESTALE, once we sort out the error propagation).
	insert_inode_locked() treats the same as instant -EBUSY.
	ilookup() treats those as icache miss.

[Fix by Dan Carpenter <dan.carpenter@oracle.com> folded in]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c        |  2 +-
 fs/inode.c         | 45 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/fs.h |  6 +++++-
 3 files changed, 47 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index a7d9e7a4c283..11b753d29409 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1892,7 +1892,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 	spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
 	WARN_ON(!(inode->i_state & I_NEW));
-	inode->i_state &= ~I_NEW;
+	inode->i_state &= ~I_NEW & ~I_CREATING;
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..6cd2e7ba9f4d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -804,6 +804,10 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
+		if (unlikely(inode->i_state & I_CREATING)) {
+			spin_unlock(&inode->i_lock);
+			return ERR_PTR(-ESTALE);
+		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
 		return inode;
@@ -831,6 +835,10 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
+		if (unlikely(inode->i_state & I_CREATING)) {
+			spin_unlock(&inode->i_lock);
+			return ERR_PTR(-ESTALE);
+		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
 		return inode;
@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
 	lockdep_annotate_inode_mutex_key(inode);
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
-	inode->i_state &= ~I_NEW;
+	inode->i_state &= ~I_NEW & ~I_CREATING;
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(unlock_new_inode);
 
+void discard_new_inode(struct inode *inode)
+{
+	lockdep_annotate_inode_mutex_key(inode);
+	spin_lock(&inode->i_lock);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
+	smp_mb();
+	wake_up_bit(&inode->i_state, __I_NEW);
+	spin_unlock(&inode->i_lock);
+	iput(inode);
+}
+EXPORT_SYMBOL(discard_new_inode);
+
 /**
  * lock_two_nondirectories - take two i_mutexes on non-directory objects
  *
@@ -1039,6 +1060,8 @@ again:
 		 * Use the old inode instead of the preallocated one.
 		 */
 		spin_unlock(&inode_hash_lock);
+		if (IS_ERR(old))
+			return NULL;
 		wait_on_inode(old);
 		if (unlikely(inode_unhashed(old))) {
 			iput(old);
@@ -1128,6 +1151,8 @@ again:
 	inode = find_inode_fast(sb, head, ino);
 	spin_unlock(&inode_hash_lock);
 	if (inode) {
+		if (IS_ERR(inode))
+			return NULL;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
 			iput(inode);
@@ -1165,6 +1190,8 @@ again:
 		 */
 		spin_unlock(&inode_hash_lock);
 		destroy_inode(inode);
+		if (IS_ERR(old))
+			return NULL;
 		inode = old;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
@@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
 	inode = find_inode(sb, head, test, data);
 	spin_unlock(&inode_hash_lock);
 
-	return inode;
+	return IS_ERR(inode) ? NULL : inode;
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 
@@ -1338,6 +1365,8 @@ again:
 	spin_unlock(&inode_hash_lock);
 
 	if (inode) {
+		if (IS_ERR(inode))
+			return NULL;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
 			iput(inode);
@@ -1421,12 +1450,17 @@ int insert_inode_locked(struct inode *inode)
 		}
 		if (likely(!old)) {
 			spin_lock(&inode->i_lock);
-			inode->i_state |= I_NEW;
+			inode->i_state |= I_NEW | I_CREATING;
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_hash_lock);
 			return 0;
 		}
+		if (unlikely(old->i_state & I_CREATING)) {
+			spin_unlock(&old->i_lock);
+			spin_unlock(&inode_hash_lock);
+			return -EBUSY;
+		}
 		__iget(old);
 		spin_unlock(&old->i_lock);
 		spin_unlock(&inode_hash_lock);
@@ -1443,7 +1477,10 @@ EXPORT_SYMBOL(insert_inode_locked);
 int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
-	struct inode *old = inode_insert5(inode, hashval, test, NULL, data);
+	struct inode *old;
+
+	inode->i_state |= I_CREATING;
+	old = inode_insert5(inode, hashval, test, NULL, data);
 
 	if (old != inode) {
 		iput(old);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..a42600565925 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
  *			and work dirs among overlayfs mounts.
  *
+ * I_CREATING		New object's inode in the middle of setting up.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 #define __I_DIRTY_TIME_EXPIRED	12
 #define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
 #define I_WB_SWITCH		(1 << 13)
-#define I_OVL_INUSE			(1 << 14)
+#define I_OVL_INUSE		(1 << 14)
+#define I_CREATING		(1 << 15)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
 static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
 #endif
 extern void unlock_new_inode(struct inode *);
+extern void discard_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 extern void evict_inodes(struct super_block *sb);
 
-- 
cgit v1.2.3


From 5bef915104f32c9d0bb5df6e86a98e31cb524e9a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Jun 2018 19:36:57 -0400
Subject: new helper: inode_fake_hash()

open-coded in a quite a few places...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/inode.c     |  2 +-
 fs/jfs/jfs_imap.c  |  8 +-------
 fs/jfs/super.c     |  2 +-
 fs/xfs/xfs_iops.c  |  2 +-
 include/linux/fs.h | 11 +++++++++++
 5 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2a16111d312f..a2dfa1b2a89c 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
 	HFS_I(inode)->rsrc_inode = dir;
 	HFS_I(dir)->rsrc_inode = inode;
 	igrab(dir);
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 	mark_inode_dirty(inode);
 	dont_mount(dentry);
 out:
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f36ef68905a7..93e8c590ff5c 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 	/* release the page */
 	release_metapage(mp);
 
-	/*
-	 * __mark_inode_dirty expects inodes to be hashed.  Since we don't
-	 * want special inodes in the fileset inode space, we make them
-	 * appear hashed, but do not put on any lists.  hlist_del()
-	 * will work fine and require no locking.
-	 */
-	hlist_add_fake(&ip->i_hash);
+	inode_fake_hash(ip);
 
 	return (ip);
 }
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1b9264fd54b6..5403ece57dba 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	inode->i_ino = 0;
 	inode->i_size = i_size_read(sb->s_bdev->bd_inode);
 	inode->i_mapping->a_ops = &jfs_metapage_aops;
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 
 	sbi->direct_inode = inode;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 0fa29f39d658..3a75de777843 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1253,7 +1253,7 @@ xfs_setup_inode(
 
 	inode_sb_list_add(inode);
 	/* make the inode look hashed for the writeback code */
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 
 	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
 	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a42600565925..43941e230e2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -684,6 +684,17 @@ static inline int inode_unhashed(struct inode *inode)
 	return hlist_unhashed(&inode->i_hash);
 }
 
+/*
+ * __mark_inode_dirty expects inodes to be hashed.  Since we don't
+ * want special inodes in the fileset inode space, we make them
+ * appear hashed, but do not put on any lists.  hlist_del()
+ * will work fine and require no locking.
+ */
+static inline void inode_fake_hash(struct inode *inode)
+{
+	hlist_add_fake(&inode->i_hash);
+}
+
 /*
  * inode->i_mutex nesting subclasses for the lock validator:
  *
-- 
cgit v1.2.3


From 088fe47ce952542389e604e83f533811750aaf7c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 17:26:49 -0500
Subject: signal: Add calculate_sigpending()

Add a function calculate_sigpending to test to see if any signals are
pending for a new task immediately following fork.  Signals have to
happen either before or after fork.  Today our practice is to push
all of the signals to before the fork, but that has the downside that
frequent or periodic signals can make fork take much much longer than
normal or prevent fork from completing entirely.

So we need move signals that we can after the fork to prevent that.

This updates the code to set TIF_SIGPENDING on a new task if there
are signals or other activities that have moved so that they appear
to happen after the fork.

As the code today restarts if it sees any such activity this won't
immediately have an effect, as there will be no reason for it
to set TIF_SIGPENDING immediately after the fork.

Adding calculate_sigpending means the code in fork can safely be
changed to not always restart if a signal is pending.

The new calculate_sigpending function sets sigpending if there
are pending bits in jobctl, pending signals, the freezer needs
to freeze the new task or the live kernel patching framework
need the new thread to take the slow path to userspace.

I have verified that setting TIF_SIGPENDING does make a new process
take the slow path to userspace before it executes it's first userspace
instruction.

I have looked at the callers of signal_wake_up and the code paths
setting TIF_SIGPENDING and I don't see anything else that needs to be
handled.  The code probably doesn't need to set TIF_SIGPENDING for the
kernel live patching as it uses a separate thread flag as well.  But
at this point it seems safer reuse the recalc_sigpending logic and get
the kernel live patching folks to sort out their story later.

V2: I have moved the test into schedule_tail where siglock can
    be grabbed and recalc_sigpending can be reused directly.
    Further as the last action of setting up a new task this
    guarantees that TIF_SIGPENDING will be properly set in the
    new process.

    The helper calculate_sigpending takes the siglock and
    uncontitionally sets TIF_SIGPENDING and let's recalc_sigpending
    clear TIF_SIGPENDING if it is unnecessary.  This allows reusing
    the existing code and keeps maintenance of the conditions simple.

    Oleg Nesterov <oleg@redhat.com>  suggested the movement
    and pointed out the need to take siglock if this code
    was going to be called while the new task is discoverable.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  1 +
 kernel/sched/core.c          |  2 ++
 kernel/signal.c              | 11 +++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 94558ffa82ab..b55fd293c1e5 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -372,6 +372,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
  */
 extern void recalc_sigpending_and_wake(struct task_struct *t);
 extern void recalc_sigpending(void);
+extern void calculate_sigpending(void);
 
 extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78d8facba456..3e4ed4b7aa2d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2813,6 +2813,8 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 
 	if (current->set_child_tid)
 		put_user(task_pid_vnr(current), current->set_child_tid);
+
+	calculate_sigpending();
 }
 
 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index dddbea558455..1e06f1eba363 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -172,6 +172,17 @@ void recalc_sigpending(void)
 
 }
 
+void calculate_sigpending(void)
+{
+	/* Have any signals or users of TIF_SIGPENDING been delayed
+	 * until after fork?
+	 */
+	spin_lock_irq(&current->sighand->siglock);
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+}
+
 /* Given the mask, find the first available signal that should be serviced. */
 
 #define SYNCHRONOUS_MASK \
-- 
cgit v1.2.3


From 4390e9eadbbb6774b7ba03fde0a0fdf3f07db4cd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Aug 2018 20:10:54 -0500
Subject: fork: Skip setting TIF_SIGPENDING in ptrace_init_task

The code in calculate_sigpending will now handle this so
it is just redundant and possibly a little confusing
to continue setting TIF_SIGPENDING in ptrace_init_task.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/ptrace.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 037bf0ef1ae9..4f36431c380b 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -214,8 +214,6 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
 		else
 			sigaddset(&child->pending.signal, SIGSTOP);
-
-		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
 	else
 		child->ptracer_cred = NULL;
-- 
cgit v1.2.3


From 924de3b8c9410c404c6eda7abffd282b97b3ff7f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 13:38:00 -0500
Subject: fork: Have new threads join on-going signal group stops

There are only two signals that are delivered to every member of a
signal group: SIGSTOP and SIGKILL.  Signal delivery requires every
signal appear to be delivered either before or after a clone syscall.
SIGKILL terminates the clone so does not need to be considered.  Which
leaves only SIGSTOP that needs to be considered when creating new
threads.

Today in the event of a group stop TIF_SIGPENDING will get set and the
fork will restart ensuring the fork syscall participates in the group
stop.

A fork (especially of a process with a lot of memory) is one of the
most expensive system so we really only want to restart a fork when
necessary.

It is easy so check to see if a SIGSTOP is ongoing and have the new
thread join it immediate after the clone completes.  Making it appear
the clone completed happened just before the SIGSTOP.

The calculate_sigpending function will see the bits set in jobctl and
set TIF_SIGPENDING to ensure the new task takes the slow path to userspace.

V2: The call to task_join_group_stop was moved before the new task is
    added to the thread group list.  This should not matter as
    sighand->siglock is held over both the addition of the threads,
    the call to task_join_group_stop and do_signal_stop.  But the change
    is trivial and it is one less thing to worry about when reading
    the code.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  2 ++
 kernel/fork.c                | 27 +++++++++++++++------------
 kernel/signal.c              | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b55fd293c1e5..ae2b0b81be25 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -385,6 +385,8 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
 	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
 }
 
+void task_join_group_stop(struct task_struct *task);
+
 #ifdef TIF_RESTORE_SIGMASK
 /*
  * Legacy restore_sigmask accessors.  These are inefficient on
diff --git a/kernel/fork.c b/kernel/fork.c
index 22d4cdb9a7ca..ab731e15a600 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1934,18 +1934,20 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cancel_cgroup;
 	}
 
-	/*
-	 * Process group and session signals need to be delivered to just the
-	 * parent before the fork or both the parent and the child after the
-	 * fork. Restart if a signal comes in before we add the new process to
-	 * it's process group.
-	 * A fatal signal pending means that current will exit, so the new
-	 * thread can't slip out of an OOM kill (or normal SIGKILL).
-	*/
-	recalc_sigpending();
-	if (signal_pending(current)) {
-		retval = -ERESTARTNOINTR;
-		goto bad_fork_cancel_cgroup;
+	if (!(clone_flags & CLONE_THREAD)) {
+		/*
+		 * Process group and session signals need to be delivered to just the
+		 * parent before the fork or both the parent and the child after the
+		 * fork. Restart if a signal comes in before we add the new process to
+		 * it's process group.
+		 * A fatal signal pending means that current will exit, so the new
+		 * thread can't slip out of an OOM kill (or normal SIGKILL).
+		 */
+		recalc_sigpending();
+		if (signal_pending(current)) {
+			retval = -ERESTARTNOINTR;
+			goto bad_fork_cancel_cgroup;
+		}
 	}
 
 
@@ -1982,6 +1984,7 @@ static __latent_entropy struct task_struct *copy_process(
 			current->signal->nr_threads++;
 			atomic_inc(&current->signal->live);
 			atomic_inc(&current->signal->sigcnt);
+			task_join_group_stop(p);
 			list_add_tail_rcu(&p->thread_group,
 					  &p->group_leader->thread_group);
 			list_add_tail_rcu(&p->thread_node,
diff --git a/kernel/signal.c b/kernel/signal.c
index 1e06f1eba363..9f0eafb6d474 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -373,6 +373,20 @@ static bool task_participate_group_stop(struct task_struct *task)
 	return false;
 }
 
+void task_join_group_stop(struct task_struct *task)
+{
+	/* Have the new thread join an on-going signal group stop */
+	unsigned long jobctl = current->jobctl;
+	if (jobctl & JOBCTL_STOP_PENDING) {
+		struct signal_struct *sig = current->signal;
+		unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
+		unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
+		if (task_set_jobctl_pending(task, signr | gstop)) {
+			sig->group_stop_count++;
+		}
+	}
+}
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
-- 
cgit v1.2.3


From d9cfe2ce246845b9cca0ec1b881e826965893c58 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 23 Jul 2018 22:26:05 +0200
Subject: i2c: quirks: add zero length checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some adapters do not support a message length of 0. Add this as a quirk
so drivers don't have to open code it.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 6 ++++++
 include/linux/i2c.h         | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 02d6f27b19e4..a26b3e9cc441 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1839,9 +1839,15 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs,
 		if (msgs[i].flags & I2C_M_RD) {
 			if (do_len_check && i2c_quirk_exceeded(len, q->max_read_len))
 				return i2c_quirk_error(adap, &msgs[i], "msg too long");
+
+			if (q->flags & I2C_AQ_NO_ZERO_LEN_READ && len == 0)
+				return i2c_quirk_error(adap, &msgs[i], "no zero length");
 		} else {
 			if (do_len_check && i2c_quirk_exceeded(len, q->max_write_len))
 				return i2c_quirk_error(adap, &msgs[i], "msg too long");
+
+			if (q->flags & I2C_AQ_NO_ZERO_LEN_WRITE && len == 0)
+				return i2c_quirk_error(adap, &msgs[i], "no zero length");
 		}
 	}
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index bc8d42f8544f..2a98d0886d2e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -661,6 +661,10 @@ struct i2c_adapter_quirks {
 					 I2C_AQ_COMB_READ_SECOND | I2C_AQ_COMB_SAME_ADDR)
 /* clock stretching is not supported */
 #define I2C_AQ_NO_CLK_STRETCH		BIT(4)
+/* message cannot have length of 0 */
+#define I2C_AQ_NO_ZERO_LEN_READ		BIT(5)
+#define I2C_AQ_NO_ZERO_LEN_WRITE	BIT(6)
+#define I2C_AQ_NO_ZERO_LEN		(I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE)
 
 /*
  * i2c_adapter is the structure used to identify a physical i2c bus along
-- 
cgit v1.2.3


From 91305f2812624c0cf7ccbb44133b66d3b24676e4 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Wed, 1 Aug 2018 18:05:54 +0800
Subject: ptp_qoriq: support automatic configuration for ptp timer

This patch is to support automatic configuration for ptp timer.
If required ptp dts properties are not provided, driver could
try to calculate a set of default configurations to initialize
the ptp timer. This makes the driver work for many boards which
don't have the required ptp dts properties in current kernel.
Also the users could set dts properties by themselves according
to their requirement.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_qoriq.c       | 111 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/fsl/ptp_qoriq.h |   6 ++-
 2 files changed, 113 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index a14c317b5a38..095c18532dc7 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -29,6 +29,7 @@
 #include <linux/of_platform.h>
 #include <linux/timex.h>
 #include <linux/slab.h>
+#include <linux/clk.h>
 
 #include <linux/fsl/ptp_qoriq.h>
 
@@ -317,6 +318,105 @@ static const struct ptp_clock_info ptp_qoriq_caps = {
 	.enable		= ptp_qoriq_enable,
 };
 
+/**
+ * qoriq_ptp_nominal_freq - calculate nominal frequency according to
+ *			    reference clock frequency
+ *
+ * @clk_src: reference clock frequency
+ *
+ * The nominal frequency is the desired clock frequency.
+ * It should be less than the reference clock frequency.
+ * It should be a factor of 1000MHz.
+ *
+ * Return the nominal frequency
+ */
+static u32 qoriq_ptp_nominal_freq(u32 clk_src)
+{
+	u32 remainder = 0;
+
+	clk_src /= 1000000;
+	remainder = clk_src % 100;
+	if (remainder) {
+		clk_src -= remainder;
+		clk_src += 100;
+	}
+
+	do {
+		clk_src -= 100;
+
+	} while (1000 % clk_src);
+
+	return clk_src * 1000000;
+}
+
+/**
+ * qoriq_ptp_auto_config - calculate a set of default configurations
+ *
+ * @qoriq_ptp: pointer to qoriq_ptp
+ * @node: pointer to device_node
+ *
+ * If below dts properties are not provided, this function will be
+ * called to calculate a set of default configurations for them.
+ *   "fsl,tclk-period"
+ *   "fsl,tmr-prsc"
+ *   "fsl,tmr-add"
+ *   "fsl,tmr-fiper1"
+ *   "fsl,tmr-fiper2"
+ *   "fsl,max-adj"
+ *
+ * Return 0 if success
+ */
+static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
+				 struct device_node *node)
+{
+	struct clk *clk;
+	u64 freq_comp;
+	u64 max_adj;
+	u32 nominal_freq;
+	u32 clk_src = 0;
+
+	qoriq_ptp->cksel = DEFAULT_CKSEL;
+
+	clk = of_clk_get(node, 0);
+	if (!IS_ERR(clk)) {
+		clk_src = clk_get_rate(clk);
+		clk_put(clk);
+	}
+
+	if (clk_src <= 100000000UL) {
+		pr_err("error reference clock value, or lower than 100MHz\n");
+		return -EINVAL;
+	}
+
+	nominal_freq = qoriq_ptp_nominal_freq(clk_src);
+	if (!nominal_freq)
+		return -EINVAL;
+
+	qoriq_ptp->tclk_period = 1000000000UL / nominal_freq;
+	qoriq_ptp->tmr_prsc = DEFAULT_TMR_PRSC;
+
+	/* Calculate initial frequency compensation value for TMR_ADD register.
+	 * freq_comp = ceil(2^32 / freq_ratio)
+	 * freq_ratio = reference_clock_freq / nominal_freq
+	 */
+	freq_comp = ((u64)1 << 32) * nominal_freq;
+	if (do_div(freq_comp, clk_src))
+		freq_comp++;
+
+	qoriq_ptp->tmr_add = freq_comp;
+	qoriq_ptp->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - qoriq_ptp->tclk_period;
+	qoriq_ptp->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - qoriq_ptp->tclk_period;
+
+	/* max_adj = 1000000000 * (freq_ratio - 1.0) - 1
+	 * freq_ratio = reference_clock_freq / nominal_freq
+	 */
+	max_adj = 1000000000ULL * (clk_src - nominal_freq);
+	max_adj = max_adj / nominal_freq - 1;
+	qoriq_ptp->caps.max_adj = max_adj;
+
+	return 0;
+}
+
 static int qoriq_ptp_probe(struct platform_device *dev)
 {
 	struct device_node *node = dev->dev.of_node;
@@ -332,7 +432,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 	if (!qoriq_ptp)
 		goto no_memory;
 
-	err = -ENODEV;
+	err = -EINVAL;
 
 	qoriq_ptp->caps = ptp_qoriq_caps;
 
@@ -351,10 +451,14 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 				 "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) ||
 	    of_property_read_u32(node,
 				 "fsl,max-adj", &qoriq_ptp->caps.max_adj)) {
-		pr_err("device tree node missing required elements\n");
-		goto no_node;
+		pr_warn("device tree node missing required elements, try automatic configuration\n");
+
+		if (qoriq_ptp_auto_config(qoriq_ptp, node))
+			goto no_config;
 	}
 
+	err = -ENODEV;
+
 	qoriq_ptp->irq = platform_get_irq(dev, 0);
 
 	if (qoriq_ptp->irq < 0) {
@@ -436,6 +540,7 @@ no_ioremap:
 	release_resource(qoriq_ptp->rsrc);
 no_resource:
 	free_irq(qoriq_ptp->irq, qoriq_ptp);
+no_config:
 no_node:
 	kfree(qoriq_ptp);
 no_memory:
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index dc3dac40f069..c1f003aadcce 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -127,9 +127,13 @@ struct qoriq_ptp_registers {
 
 
 #define DRIVER		"ptp_qoriq"
-#define DEFAULT_CKSEL	1
 #define N_EXT_TS	2
 
+#define DEFAULT_CKSEL		1
+#define DEFAULT_TMR_PRSC	2
+#define DEFAULT_FIPER1_PERIOD	1000000000
+#define DEFAULT_FIPER2_PERIOD	100000
+
 struct qoriq_ptp {
 	void __iomem *base;
 	struct qoriq_ptp_registers regs;
-- 
cgit v1.2.3


From 385114dec8a49b5e5945e77ba7de6356106713f4 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Thu, 2 Aug 2018 23:34:38 +0000
Subject: net: modify skb_rbtree_purge to return the truesize of all purged
 skbs.

Tested: see the next patch is the series.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 net/core/skbuff.c      | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fd3cb1b247df..47848367c816 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2585,7 +2585,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 		kfree_skb(skb);
 }
 
-void skb_rbtree_purge(struct rb_root *root);
+unsigned int skb_rbtree_purge(struct rb_root *root);
 
 void *netdev_alloc_frag(unsigned int fragsz);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 51b0a9126e12..8d574a88125d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2858,23 +2858,27 @@ EXPORT_SYMBOL(skb_queue_purge);
 /**
  *	skb_rbtree_purge - empty a skb rbtree
  *	@root: root of the rbtree to empty
+ *	Return value: the sum of truesizes of all purged skbs.
  *
  *	Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
  *	the list and one reference dropped. This function does not take
  *	any lock. Synchronization should be handled by the caller (e.g., TCP
  *	out-of-order queue is protected by the socket lock).
  */
-void skb_rbtree_purge(struct rb_root *root)
+unsigned int skb_rbtree_purge(struct rb_root *root)
 {
 	struct rb_node *p = rb_first(root);
+	unsigned int sum = 0;
 
 	while (p) {
 		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
 
 		p = rb_next(p);
 		rb_erase(&skb->rbnode, root);
+		sum += skb->truesize;
 		kfree_skb(skb);
 	}
+	return sum;
 }
 
 /**
-- 
cgit v1.2.3


From fa0f527358bd900ef92f925878ed6bfbd51305cc Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Thu, 2 Aug 2018 23:34:39 +0000
Subject: ip: use rb trees for IP frag queue.

Similar to TCP OOO RX queue, it makes sense to use rb trees to store
IP fragments, so that OOO fragments are inserted faster.

Tested:

- a follow-up patch contains a rather comprehensive ip defrag
  self-test (functional)
- ran neper `udp_stream -c -H <host> -F 100 -l 300 -T 20`:
    netstat --statistics
    Ip:
        282078937 total packets received
        0 forwarded
        0 incoming packets discarded
        946760 incoming packets delivered
        18743456 requests sent out
        101 fragments dropped after timeout
        282077129 reassemblies required
        944952 packets reassembled ok
        262734239 packet reassembles failed
   (The numbers/stats above are somewhat better re:
    reassemblies vs a kernel without this patchset. More
    comprehensive performance testing TBD).

Reported-by: Jann Horn <jannh@google.com>
Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  |   9 +-
 include/net/inet_frag.h                 |   3 +-
 net/ipv4/inet_fragment.c                |  16 +--
 net/ipv4/ip_fragment.c                  | 182 ++++++++++++++++++--------------
 net/ipv6/netfilter/nf_conntrack_reasm.c |   1 +
 net/ipv6/reassembly.c                   |   1 +
 6 files changed, 121 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 47848367c816..7ebdf158a795 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -676,13 +676,16 @@ struct sk_buff {
 				 * UDP receive path is one user.
 				 */
 				unsigned long		dev_scratch;
-				int			ip_defrag_offset;
 			};
 		};
-		struct rb_node		rbnode; /* used in netem & tcp stack */
+		struct rb_node		rbnode; /* used in netem, ip4 defrag, and tcp stack */
 		struct list_head	list;
 	};
-	struct sock		*sk;
+
+	union {
+		struct sock		*sk;
+		int			ip_defrag_offset;
+	};
 
 	union {
 		ktime_t		tstamp;
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index f4272a29dc44..b86d14528188 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -75,7 +75,8 @@ struct inet_frag_queue {
 	struct timer_list	timer;
 	spinlock_t		lock;
 	refcount_t		refcnt;
-	struct sk_buff		*fragments;
+	struct sk_buff		*fragments;  /* Used in IPv6. */
+	struct rb_root		rb_fragments; /* Used in IPv4. */
 	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index ccd140e4082d..6d258a5669e7 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -137,12 +137,16 @@ void inet_frag_destroy(struct inet_frag_queue *q)
 	fp = q->fragments;
 	nf = q->net;
 	f = nf->f;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		sum_truesize += fp->truesize;
-		kfree_skb(fp);
-		fp = xp;
+	if (fp) {
+		do {
+			struct sk_buff *xp = fp->next;
+
+			sum_truesize += fp->truesize;
+			kfree_skb(fp);
+			fp = xp;
+		} while (fp);
+	} else {
+		sum_truesize = skb_rbtree_purge(&q->rb_fragments);
 	}
 	sum = sum_truesize + f->qsize;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 960bf5eab59f..0e8f8de77e71 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -136,7 +136,7 @@ static void ip_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	const struct iphdr *iph;
-	struct sk_buff *head;
+	struct sk_buff *head = NULL;
 	struct net *net;
 	struct ipq *qp;
 	int err;
@@ -152,14 +152,31 @@ static void ip_expire(struct timer_list *t)
 
 	ipq_kill(qp);
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
-
-	head = qp->q.fragments;
-
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
 
-	if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
+	if (!qp->q.flags & INET_FRAG_FIRST_IN)
 		goto out;
 
+	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
+	 * pull the head out of the tree in order to be able to
+	 * deal with head->dev.
+	 */
+	if (qp->q.fragments) {
+		head = qp->q.fragments;
+		qp->q.fragments = head->next;
+	} else {
+		head = skb_rb_first(&qp->q.rb_fragments);
+		if (!head)
+			goto out;
+		rb_erase(&head->rbnode, &qp->q.rb_fragments);
+		memset(&head->rbnode, 0, sizeof(head->rbnode));
+		barrier();
+	}
+	if (head == qp->q.fragments_tail)
+		qp->q.fragments_tail = NULL;
+
+	sub_frag_mem_limit(qp->q.net, head->truesize);
+
 	head->dev = dev_get_by_index_rcu(net, qp->iif);
 	if (!head->dev)
 		goto out;
@@ -179,16 +196,16 @@ static void ip_expire(struct timer_list *t)
 	    (skb_rtable(head)->rt_type != RTN_LOCAL))
 		goto out;
 
-	skb_get(head);
 	spin_unlock(&qp->q.lock);
 	icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-	kfree_skb(head);
 	goto out_rcu_unlock;
 
 out:
 	spin_unlock(&qp->q.lock);
 out_rcu_unlock:
 	rcu_read_unlock();
+	if (head)
+		kfree_skb(head);
 	ipq_put(qp);
 }
 
@@ -231,7 +248,7 @@ static int ip_frag_too_far(struct ipq *qp)
 	end = atomic_inc_return(&peer->rid);
 	qp->rid = end;
 
-	rc = qp->q.fragments && (end - start) > max;
+	rc = qp->q.fragments_tail && (end - start) > max;
 
 	if (rc) {
 		struct net *net;
@@ -245,7 +262,6 @@ static int ip_frag_too_far(struct ipq *qp)
 
 static int ip_frag_reinit(struct ipq *qp)
 {
-	struct sk_buff *fp;
 	unsigned int sum_truesize = 0;
 
 	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
@@ -253,20 +269,14 @@ static int ip_frag_reinit(struct ipq *qp)
 		return -ETIMEDOUT;
 	}
 
-	fp = qp->q.fragments;
-	do {
-		struct sk_buff *xp = fp->next;
-
-		sum_truesize += fp->truesize;
-		kfree_skb(fp);
-		fp = xp;
-	} while (fp);
+	sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments);
 	sub_frag_mem_limit(qp->q.net, sum_truesize);
 
 	qp->q.flags = 0;
 	qp->q.len = 0;
 	qp->q.meat = 0;
 	qp->q.fragments = NULL;
+	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
 	qp->ecn = 0;
@@ -278,7 +288,8 @@ static int ip_frag_reinit(struct ipq *qp)
 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
-	struct sk_buff *prev, *next;
+	struct rb_node **rbn, *parent;
+	struct sk_buff *skb1;
 	struct net_device *dev;
 	unsigned int fragsize;
 	int flags, offset;
@@ -341,58 +352,58 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (err)
 		goto err;
 
-	/* Find out which fragments are in front and at the back of us
-	 * in the chain of fragments so far.  We must know where to put
-	 * this fragment, right?
-	 */
-	prev = qp->q.fragments_tail;
-	if (!prev || prev->ip_defrag_offset < offset) {
-		next = NULL;
-		goto found;
-	}
-	prev = NULL;
-	for (next = qp->q.fragments; next != NULL; next = next->next) {
-		if (next->ip_defrag_offset >= offset)
-			break;	/* bingo! */
-		prev = next;
-	}
+	/* Note : skb->rbnode and skb->dev share the same location. */
+	dev = skb->dev;
+	/* Makes sure compiler wont do silly aliasing games */
+	barrier();
 
-found:
 	/* RFC5722, Section 4, amended by Errata ID : 3089
 	 *                          When reassembling an IPv6 datagram, if
 	 *   one or more its constituent fragments is determined to be an
 	 *   overlapping fragment, the entire datagram (and any constituent
 	 *   fragments) MUST be silently discarded.
 	 *
-	 * We do the same here for IPv4.
+	 * We do the same here for IPv4 (and increment an snmp counter).
 	 */
 
-	/* Is there an overlap with the previous fragment? */
-	if (prev &&
-	    (prev->ip_defrag_offset + prev->len) > offset)
-		goto discard_qp;
-
-	/* Is there an overlap with the next fragment? */
-	if (next && next->ip_defrag_offset < end)
-		goto discard_qp;
+	/* Find out where to put this fragment.  */
+	skb1 = qp->q.fragments_tail;
+	if (!skb1) {
+		/* This is the first fragment we've received. */
+		rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node);
+		qp->q.fragments_tail = skb;
+	} else if ((skb1->ip_defrag_offset + skb1->len) < end) {
+		/* This is the common/special case: skb goes to the end. */
+		/* Detect and discard overlaps. */
+		if (offset < (skb1->ip_defrag_offset + skb1->len))
+			goto discard_qp;
+		/* Insert after skb1. */
+		rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right);
+		qp->q.fragments_tail = skb;
+	} else {
+		/* Binary search. Note that skb can become the first fragment, but
+		 * not the last (covered above). */
+		rbn = &qp->q.rb_fragments.rb_node;
+		do {
+			parent = *rbn;
+			skb1 = rb_to_skb(parent);
+			if (end <= skb1->ip_defrag_offset)
+				rbn = &parent->rb_left;
+			else if (offset >= skb1->ip_defrag_offset + skb1->len)
+				rbn = &parent->rb_right;
+			else /* Found an overlap with skb1. */
+				goto discard_qp;
+		} while (*rbn);
+		/* Here we have parent properly set, and rbn pointing to
+		 * one of its NULL left/right children. Insert skb. */
+		rb_link_node(&skb->rbnode, parent, rbn);
+	}
+	rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
 
-	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
-	dev = skb->dev;
 	if (dev)
 		qp->iif = dev->ifindex;
-	/* Makes sure compiler wont do silly aliasing games */
-	barrier();
 	skb->ip_defrag_offset = offset;
 
-	/* Insert this fragment in the chain of fragments. */
-	skb->next = next;
-	if (!next)
-		qp->q.fragments_tail = skb;
-	if (prev)
-		prev->next = skb;
-	else
-		qp->q.fragments = skb;
-
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
@@ -414,7 +425,7 @@ found:
 		unsigned long orefdst = skb->_skb_refdst;
 
 		skb->_skb_refdst = 0UL;
-		err = ip_frag_reasm(qp, prev, dev);
+		err = ip_frag_reasm(qp, skb, dev);
 		skb->_skb_refdst = orefdst;
 		return err;
 	}
@@ -431,15 +442,15 @@ err:
 	return err;
 }
 
-
 /* Build a new IP datagram from all its fragments. */
-
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 			 struct net_device *dev)
 {
 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
 	struct iphdr *iph;
-	struct sk_buff *fp, *head = qp->q.fragments;
+	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
+	struct sk_buff **nextp; /* To build frag_list. */
+	struct rb_node *rbn;
 	int len;
 	int ihlen;
 	int err;
@@ -453,25 +464,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		goto out_fail;
 	}
 	/* Make the one we just received the head. */
-	if (prev) {
-		head = prev->next;
-		fp = skb_clone(head, GFP_ATOMIC);
+	if (head != skb) {
+		fp = skb_clone(skb, GFP_ATOMIC);
 		if (!fp)
 			goto out_nomem;
-
-		fp->next = head->next;
-		if (!fp->next)
+		rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments);
+		if (qp->q.fragments_tail == skb)
 			qp->q.fragments_tail = fp;
-		prev->next = fp;
-
-		skb_morph(head, qp->q.fragments);
-		head->next = qp->q.fragments->next;
-
-		consume_skb(qp->q.fragments);
-		qp->q.fragments = head;
+		skb_morph(skb, head);
+		rb_replace_node(&head->rbnode, &skb->rbnode,
+				&qp->q.rb_fragments);
+		consume_skb(head);
+		head = skb;
 	}
 
-	WARN_ON(!head);
 	WARN_ON(head->ip_defrag_offset != 0);
 
 	/* Allocate a new buffer for the datagram. */
@@ -496,24 +502,35 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		clone = alloc_skb(0, GFP_ATOMIC);
 		if (!clone)
 			goto out_nomem;
-		clone->next = head->next;
-		head->next = clone;
 		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
 		skb_frag_list_init(head);
 		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
 			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
 		clone->len = clone->data_len = head->data_len - plen;
-		head->data_len -= clone->len;
-		head->len -= clone->len;
+		skb->truesize += clone->truesize;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
 		add_frag_mem_limit(qp->q.net, clone->truesize);
+		skb_shinfo(head)->frag_list = clone;
+		nextp = &clone->next;
+	} else {
+		nextp = &skb_shinfo(head)->frag_list;
 	}
 
-	skb_shinfo(head)->frag_list = head->next;
 	skb_push(head, head->data - skb_network_header(head));
 
-	for (fp=head->next; fp; fp = fp->next) {
+	/* Traverse the tree in order, to build frag_list. */
+	rbn = rb_next(&head->rbnode);
+	rb_erase(&head->rbnode, &qp->q.rb_fragments);
+	while (rbn) {
+		struct rb_node *rbnext = rb_next(rbn);
+		fp = rb_to_skb(rbn);
+		rb_erase(rbn, &qp->q.rb_fragments);
+		rbn = rbnext;
+		*nextp = fp;
+		nextp = &fp->next;
+		fp->prev = NULL;
+		memset(&fp->rbnode, 0, sizeof(fp->rbnode));
 		head->data_len += fp->len;
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
@@ -524,7 +541,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	}
 	sub_frag_mem_limit(qp->q.net, head->truesize);
 
+	*nextp = NULL;
 	head->next = NULL;
+	head->prev = NULL;
 	head->dev = dev;
 	head->tstamp = qp->q.stamp;
 	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
@@ -552,6 +571,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
+	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	return 0;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0610bdab721c..38d69ef516d5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -463,6 +463,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_devic
 					  head->csum);
 
 	fq->q.fragments = NULL;
+	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 
 	return true;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6edd2ac8ae4b..b4e558ab39fa 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -405,6 +405,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
+	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 	return 1;
 
-- 
cgit v1.2.3


From 55f2503c3b69328735e88031ff8d6ba291bd952b Mon Sep 17 00:00:00 2001
From: Pingfan Liu <kernelfans@gmail.com>
Date: Tue, 31 Jul 2018 16:51:32 +0800
Subject: PM / reboot: Eliminate race between reboot and suspend

At present, "systemctl suspend" and "shutdown" can run in parrallel. A
system can suspend after devices_shutdown(), and resume. Then the shutdown
task goes on to power off. This causes many devices are not really shut
off. Hence replacing reboot_mutex with system_transition_mutex (renamed
from pm_mutex) to achieve the exclusion. The renaming of pm_mutex as
system_transition_mutex can be better to reflect the purpose of the mutex.

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/freezing-of-tasks.txt      | 12 ++++++------
 Documentation/power/suspend-and-cpuhotplug.txt |  6 +++---
 include/linux/suspend.h                        |  2 +-
 kernel/freezer.c                               |  4 +++-
 kernel/power/hibernate.c                       | 15 ++++++++-------
 kernel/power/main.c                            | 12 ++++++------
 kernel/power/suspend.c                         |  4 ++--
 kernel/power/user.c                            |  4 ++--
 kernel/reboot.c                                |  6 +++---
 mm/page_alloc.c                                | 11 ++++++-----
 10 files changed, 40 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index af005770e767..cd283190855a 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -204,26 +204,26 @@ VI. Are there any precautions to be taken to prevent freezing failures?
 
 Yes, there are.
 
-First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code
+First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code
 from system-wide sleep such as suspend/hibernation is not encouraged.
 If possible, that piece of code must instead hook onto the suspend/hibernation
 notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
 (kernel/cpu.c) for an example.
 
-However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary,
-it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since
+However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary,
+it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since
 that could lead to freezing failures, because if the suspend/hibernate code
-successfully acquired the 'pm_mutex' lock, and hence that other entity failed
+successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed
 to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
 state. As a consequence, the freezer would not be able to freeze that task,
 leading to freezing failure.
 
 However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
 since they ask the freezer to skip freezing this task, since it is anyway
-"frozen enough" as it is blocked on 'pm_mutex', which will be released
+"frozen enough" as it is blocked on 'system_transition_mutex', which will be released
 only after the entire suspend/hibernation sequence is complete.
 So, to summarize, use [un]lock_system_sleep() instead of directly using
-mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
+mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
 
 V. Miscellaneous
 /sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt
index 6f55eb960a6d..a8751b8df10e 100644
--- a/Documentation/power/suspend-and-cpuhotplug.txt
+++ b/Documentation/power/suspend-and-cpuhotplug.txt
@@ -32,7 +32,7 @@ More details follow:
                                     sysfs file
                                         |
                                         v
-                               Acquire pm_mutex lock
+                               Acquire system_transition_mutex lock
                                         |
                                         v
                              Send PM_SUSPEND_PREPARE
@@ -96,10 +96,10 @@ execution during resume):
 
 * thaw tasks
 * send PM_POST_SUSPEND notifications
-* Release pm_mutex lock.
+* Release system_transition_mutex lock.
 
 
-It is to be noted here that the pm_mutex lock is acquired at the very
+It is to be noted here that the system_transition_mutex lock is acquired at the very
 beginning, when we are just starting out to suspend, and then released only
 after the entire cycle is complete (i.e., suspend + resume).
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 440b62f7502e..5a28ac9284f0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -414,7 +414,7 @@ static inline bool hibernation_available(void) { return false; }
 #define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
 #define PM_POST_RESTORE		0x0006 /* Restore failed */
 
-extern struct mutex pm_mutex;
+extern struct mutex system_transition_mutex;
 
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 6f56a9e219fa..b162b74611e4 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -15,7 +15,9 @@
 atomic_t system_freezing_cnt = ATOMIC_INIT(0);
 EXPORT_SYMBOL(system_freezing_cnt);
 
-/* indicate whether PM freezing is in effect, protected by pm_mutex */
+/* indicate whether PM freezing is in effect, protected by
+ * system_transition_mutex
+ */
 bool pm_freezing;
 bool pm_nosig_freezing;
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 38f3f96b4b12..abef759de7c8 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -338,7 +338,7 @@ static int create_image(int platform_mode)
  * hibernation_snapshot - Quiesce devices and create a hibernation image.
  * @platform_mode: If set, use platform driver to prepare for the transition.
  *
- * This routine must be called with pm_mutex held.
+ * This routine must be called with system_transition_mutex held.
  */
 int hibernation_snapshot(int platform_mode)
 {
@@ -500,8 +500,9 @@ static int resume_target_kernel(bool platform_mode)
  * hibernation_restore - Quiesce devices and restore from a hibernation image.
  * @platform_mode: If set, use platform driver to prepare for the transition.
  *
- * This routine must be called with pm_mutex held.  If it is successful, control
- * reappears in the restored target kernel in hibernation_snapshot().
+ * This routine must be called with system_transition_mutex held.  If it is
+ * successful, control reappears in the restored target kernel in
+ * hibernation_snapshot().
  */
 int hibernation_restore(int platform_mode)
 {
@@ -806,13 +807,13 @@ static int software_resume(void)
 	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
 	 * is configured into the kernel. Since the regular hibernate
 	 * trigger path is via sysfs which takes a buffer mutex before
-	 * calling hibernate functions (which take pm_mutex) this can
-	 * cause lockdep to complain about a possible ABBA deadlock
+	 * calling hibernate functions (which take system_transition_mutex)
+	 * this can cause lockdep to complain about a possible ABBA deadlock
 	 * which cannot happen since we're in the boot code here and
 	 * sysfs can't be invoked yet. Therefore, we use a subclass
 	 * here to avoid lockdep complaining.
 	 */
-	mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);
 
 	if (swsusp_resume_device)
 		goto Check_image;
@@ -900,7 +901,7 @@ static int software_resume(void)
 	atomic_inc(&snapshot_device_available);
 	/* For success case, the suspend path will release the lock */
  Unlock:
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 	pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
 	return error;
  Close_Finish:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d9706da10930..35b50823d83b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,17 +15,16 @@
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/suspend.h>
 
 #include "power.h"
 
-DEFINE_MUTEX(pm_mutex);
-
 #ifdef CONFIG_PM_SLEEP
 
 void lock_system_sleep(void)
 {
 	current->flags |= PF_FREEZER_SKIP;
-	mutex_lock(&pm_mutex);
+	mutex_lock(&system_transition_mutex);
 }
 EXPORT_SYMBOL_GPL(lock_system_sleep);
 
@@ -37,8 +36,9 @@ void unlock_system_sleep(void)
 	 *
 	 * Reason:
 	 * Fundamentally, we just don't need it, because freezing condition
-	 * doesn't come into effect until we release the pm_mutex lock,
-	 * since the freezer always works with pm_mutex held.
+	 * doesn't come into effect until we release the
+	 * system_transition_mutex lock, since the freezer always works with
+	 * system_transition_mutex held.
 	 *
 	 * More importantly, in the case of hibernation,
 	 * unlock_system_sleep() gets called in snapshot_read() and
@@ -47,7 +47,7 @@ void unlock_system_sleep(void)
 	 * enter the refrigerator, thus causing hibernation to lockup.
 	 */
 	current->flags &= ~PF_FREEZER_SKIP;
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 }
 EXPORT_SYMBOL_GPL(unlock_system_sleep);
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 87331565e505..9e13afe65a14 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -556,7 +556,7 @@ static int enter_state(suspend_state_t state)
 	} else if (!valid_state(state)) {
 		return -EINVAL;
 	}
-	if (!mutex_trylock(&pm_mutex))
+	if (!mutex_trylock(&system_transition_mutex))
 		return -EBUSY;
 
 	if (state == PM_SUSPEND_TO_IDLE)
@@ -590,7 +590,7 @@ static int enter_state(suspend_state_t state)
 	pm_pr_dbg("Finishing wakeup.\n");
 	suspend_finish();
  Unlock:
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 	return error;
 }
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index abd225550271..2d8b60a3c86b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -216,7 +216,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!mutex_trylock(&pm_mutex))
+	if (!mutex_trylock(&system_transition_mutex))
 		return -EBUSY;
 
 	lock_device_hotplug();
@@ -394,7 +394,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 	}
 
 	unlock_device_hotplug();
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 
 	return error;
 }
diff --git a/kernel/reboot.c b/kernel/reboot.c
index e4ced883d8de..8fb44dec9ad7 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -294,7 +294,7 @@ void kernel_power_off(void)
 }
 EXPORT_SYMBOL_GPL(kernel_power_off);
 
-static DEFINE_MUTEX(reboot_mutex);
+DEFINE_MUTEX(system_transition_mutex);
 
 /*
  * Reboot system call: for obvious reasons only root may call it,
@@ -338,7 +338,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 	if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
 		cmd = LINUX_REBOOT_CMD_HALT;
 
-	mutex_lock(&reboot_mutex);
+	mutex_lock(&system_transition_mutex);
 	switch (cmd) {
 	case LINUX_REBOOT_CMD_RESTART:
 		kernel_restart(NULL);
@@ -389,7 +389,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 		ret = -EINVAL;
 		break;
 	}
-	mutex_unlock(&reboot_mutex);
+	mutex_unlock(&system_transition_mutex);
 	return ret;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1521100f1e63..e47f9e467408 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -155,16 +155,17 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype)
  * The following functions are used by the suspend/hibernate code to temporarily
  * change gfp_allowed_mask in order to avoid using I/O during memory allocations
  * while devices are suspended.  To avoid races with the suspend/hibernate code,
- * they should always be called with pm_mutex held (gfp_allowed_mask also should
- * only be modified with pm_mutex held, unless the suspend/hibernate code is
- * guaranteed not to run in parallel with that modification).
+ * they should always be called with system_transition_mutex held
+ * (gfp_allowed_mask also should only be modified with system_transition_mutex
+ * held, unless the suspend/hibernate code is guaranteed not to run in parallel
+ * with that modification).
  */
 
 static gfp_t saved_gfp_mask;
 
 void pm_restore_gfp_mask(void)
 {
-	WARN_ON(!mutex_is_locked(&pm_mutex));
+	WARN_ON(!mutex_is_locked(&system_transition_mutex));
 	if (saved_gfp_mask) {
 		gfp_allowed_mask = saved_gfp_mask;
 		saved_gfp_mask = 0;
@@ -173,7 +174,7 @@ void pm_restore_gfp_mask(void)
 
 void pm_restrict_gfp_mask(void)
 {
-	WARN_ON(!mutex_is_locked(&pm_mutex));
+	WARN_ON(!mutex_is_locked(&system_transition_mutex));
 	WARN_ON(saved_gfp_mask);
 	saved_gfp_mask = gfp_allowed_mask;
 	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
-- 
cgit v1.2.3


From 86dafed50e2b9b18198cc663775ece819ff113ba Mon Sep 17 00:00:00 2001
From: KarimAllah Ahmed <karahmed@amazon.de>
Date: Tue, 10 Jul 2018 11:27:19 +0200
Subject: KVM: Switch 'requests' to be 64-bit (explicitly)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switch 'requests' to be explicitly 64-bit and update BUILD_BUG_ON check to
use the size of "requests" instead of the hard-coded '32'.

That gives us a bit more room again for arch-specific requests as we
already ran out of space for x86 due to the hard-coded check.

The only exception here is ARM32 as it is still 32-bits.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5f138b40e433..2840ac74a3d7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -130,7 +130,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQUEST_ARCH_BASE     8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
-	BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \
+	BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
 	(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
 })
 #define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
@@ -224,7 +224,7 @@ struct kvm_vcpu {
 	int vcpu_id;
 	int srcu_idx;
 	int mode;
-	unsigned long requests;
+	u64 requests;
 	unsigned long guest_debug;
 
 	int pre_pcpu;
@@ -1131,7 +1131,7 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 	 * caller.  Paired with the smp_mb__after_atomic in kvm_check_request.
 	 */
 	smp_wmb();
-	set_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
+	set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
 }
 
 static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
@@ -1141,12 +1141,12 @@ static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
 
 static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
 {
-	return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
+	return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
 }
 
 static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu)
 {
-	clear_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
+	clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
 }
 
 static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From b08660e59dbdb600c55953787ed2265a0b510f77 Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Thu, 19 Jul 2018 08:40:17 +0000
Subject: KVM: x86: Add tlb remote flush callback in kvm_x86_ops.

This patch is to provide a way for platforms to register hv tlb remote
flush callback and this helps to optimize operation of tlb flush
among vcpus for nested virtualization case.

Signed-off-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 11 +++++++++++
 include/linux/kvm_host.h        |  7 +++++++
 virt/kvm/kvm_main.c             |  3 ++-
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 75ab578c3f6a..150937e64f63 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -985,6 +985,7 @@ struct kvm_x86_ops {
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+	int  (*tlb_remote_flush)(struct kvm *kvm);
 
 	/*
 	 * Flush any TLB entries associated with the given GVA.
@@ -1145,6 +1146,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 	return kvm_x86_ops->vm_free(kvm);
 }
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+{
+	if (kvm_x86_ops->tlb_remote_flush &&
+	    !kvm_x86_ops->tlb_remote_flush(kvm))
+		return 0;
+	else
+		return -ENOTSUPP;
+}
+
 int kvm_mmu_module_init(void);
 void kvm_mmu_module_exit(void);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2840ac74a3d7..7c7362dd2faa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -834,6 +834,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+{
+	return -ENOTSUPP;
+}
+#endif
+
 #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
 void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
 void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8f461e0ed382..74544d20bbf8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -273,7 +273,8 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
 	 * barrier here.
 	 */
-	if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+	if (!kvm_arch_flush_remote_tlb(kvm)
+	    || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
-- 
cgit v1.2.3


From eac7e072d7e99fad1b6e817c608b03c48205241e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Aug 2018 10:22:35 -0700
Subject: Revert "ata: ahci_platform: allow disabling of hotplug to save power"

This reverts commit aece27a2f01be4bb7683790f69cd1bed3a0929a2.

Causes boot failure on some devices.

 http://lore.kernel.org/r/CA+G9fYuKW_jCFZPqG4tz=QY9ROfHO38KiCp9XTA+KaDOFVtcqQ@mail.gmail.com

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_platform.c    | 11 ++----
 drivers/ata/libahci_platform.c | 82 +++++++++---------------------------------
 include/linux/ahci_platform.h  |  2 --
 3 files changed, 19 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 757729376eda..99f9a895a459 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -68,13 +68,8 @@ disable_resources:
 	return rc;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static const struct dev_pm_ops ahci_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(ahci_platform_suspend, ahci_platform_resume)
-	SET_RUNTIME_PM_OPS(ahci_platform_runtime_suspend,
-			ahci_platform_runtime_resume, NULL)
-};
-#endif
+static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_platform_suspend,
+			 ahci_platform_resume);
 
 static const struct of_device_id ahci_of_match[] = {
 	{ .compatible = "generic-ahci", },
@@ -103,9 +98,7 @@ static struct platform_driver ahci_driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
 		.acpi_match_table = ahci_acpi_match,
-#ifdef CONFIG_PM_SLEEP
 		.pm = &ahci_pm_ops,
-#endif
 	},
 };
 module_platform_driver(ahci_driver);
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 2ceebaf2ed56..8fbb532b62dd 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -257,7 +257,7 @@ static void ahci_platform_put_resources(struct device *dev, void *res)
 	int c;
 
 	if (hpriv->got_runtime_pm) {
-		pm_runtime_allow(dev);
+		pm_runtime_put_sync(dev);
 		pm_runtime_disable(dev);
 	}
 
@@ -477,10 +477,8 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
 		if (rc == -EPROBE_DEFER)
 			goto err_out;
 	}
-
-	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
-	pm_runtime_forbid(dev);
+	pm_runtime_get_sync(dev);
 	hpriv->got_runtime_pm = true;
 
 	devres_remove_group(dev, NULL);
@@ -709,21 +707,6 @@ int ahci_platform_resume_host(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
 
-static int _ahci_platform_suspend(struct device *dev)
-{
-	struct ata_host *host = dev_get_drvdata(dev);
-	struct ahci_host_priv *hpriv = host->private_data;
-	int rc;
-
-	rc = ahci_platform_suspend_host(dev);
-	if (rc)
-		return rc;
-
-	ahci_platform_disable_resources(hpriv);
-
-	return 0;
-}
-
 /**
  * ahci_platform_suspend - Suspend an ahci-platform device
  * @dev: the platform device to suspend
@@ -735,45 +718,20 @@ static int _ahci_platform_suspend(struct device *dev)
  * 0 on success otherwise a negative error code
  */
 int ahci_platform_suspend(struct device *dev)
-{
-	return _ahci_platform_suspend(dev);
-}
-EXPORT_SYMBOL_GPL(ahci_platform_suspend);
-
-/**
- * ahci_platform_runtime_suspend - Runtime suspend an ahci-platform device
- * @dev: the platform device to suspend
- *
- * This function suspends the host associated with the device, followed by
- * disabling all the resources of the device.
- *
- * RETURNS:
- * 0 on success otherwise a negative error code
- */
-int ahci_platform_runtime_suspend(struct device *dev)
-{
-	return _ahci_platform_suspend(dev);
-}
-EXPORT_SYMBOL_GPL(ahci_platform_runtime_suspend);
-
-static int _ahci_platform_resume(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = ahci_platform_enable_resources(hpriv);
+	rc = ahci_platform_suspend_host(dev);
 	if (rc)
 		return rc;
 
-	rc = ahci_platform_resume_host(dev);
-	if (rc) {
-		ahci_platform_disable_resources(hpriv);
-		return rc;
-	}
+	ahci_platform_disable_resources(hpriv);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ahci_platform_suspend);
 
 /**
  * ahci_platform_resume - Resume an ahci-platform device
@@ -787,37 +745,31 @@ static int _ahci_platform_resume(struct device *dev)
  */
 int ahci_platform_resume(struct device *dev)
 {
+	struct ata_host *host = dev_get_drvdata(dev);
+	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = _ahci_platform_resume(dev);
+	rc = ahci_platform_enable_resources(hpriv);
 	if (rc)
 		return rc;
 
+	rc = ahci_platform_resume_host(dev);
+	if (rc)
+		goto disable_resources;
+
 	/* We resumed so update PM runtime state */
 	pm_runtime_disable(dev);
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
 	return 0;
-}
-EXPORT_SYMBOL_GPL(ahci_platform_resume);
 
-/**
- * ahci_platform_runtime_resume - Runtime resume an ahci-platform device
- * @dev: the platform device to resume
- *
- * This function enables all the resources of the device followed by
- * resuming the host associated with the device.
- *
- * RETURNS:
- * 0 on success otherwise a negative error code
- */
-int ahci_platform_runtime_resume(struct device *dev)
-{
-	return _ahci_platform_resume(dev);
-}
-EXPORT_SYMBOL_GPL(ahci_platform_runtime_resume);
+disable_resources:
+	ahci_platform_disable_resources(hpriv);
 
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ahci_platform_resume);
 #endif
 
 MODULE_DESCRIPTION("AHCI SATA platform library");
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6396e6982103..1b0a17b22cd3 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -42,7 +42,5 @@ int ahci_platform_suspend_host(struct device *dev);
 int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
-int ahci_platform_runtime_suspend(struct device *dev);
-int ahci_platform_runtime_resume(struct device *dev);
 
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From bc2d8d262cba5736332cbc866acb11b1c5748aa9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Aug 2018 08:19:57 +0200
Subject: cpu/hotplug: Fix SMT supported evaluation

Josh reported that the late SMT evaluation in cpu_smt_state_init() sets
cpu_smt_control to CPU_SMT_NOT_SUPPORTED in case that 'nosmt' was supplied
on the kernel command line as it cannot differentiate between SMT disabled
by BIOS and SMT soft disable via 'nosmt'. That wreckages the state and
makes the sysfs interface unusable.

Rework this so that during bringup of the non boot CPUs the availability of
SMT is determined in cpu_smt_allowed(). If a newly booted CPU is not a
'primary' thread then set the local cpu_smt_available marker and evaluate
this explicitely right after the initial SMP bringup has finished.

SMT evaulation on x86 is a trainwreck as the firmware has all the
information _before_ booting the kernel, but there is no interface to query
it.

Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/bugs.c |  2 +-
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 41 ++++++++++++++++++++++++++++-------------
 kernel/smp.c               |  2 ++
 4 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 86f4549b7c9f..be79b7599dda 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -63,7 +63,7 @@ void __init check_bugs(void)
 	 * identify_boot_cpu() initialized SMT support information, let the
 	 * core code know.
 	 */
-	cpu_smt_check_topology();
+	cpu_smt_check_topology_early();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ccdf3a67ce56..b216bd5bfd20 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -178,10 +178,12 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology_early(void);
 extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology_early(void) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2bc3d2f5b2a5..9d2512dd263c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -347,6 +347,8 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
 
+static bool cpu_smt_available __read_mostly;
+
 void __init cpu_smt_disable(bool force)
 {
 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
@@ -363,14 +365,28 @@ void __init cpu_smt_disable(bool force)
 
 /*
  * The decision whether SMT is supported can only be done after the full
- * CPU identification. Called from architecture code.
+ * CPU identification. Called from architecture code before non boot CPUs
+ * are brought up.
  */
-void __init cpu_smt_check_topology(void)
+void __init cpu_smt_check_topology_early(void)
 {
 	if (!topology_smt_supported())
 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 }
 
+/*
+ * If SMT was disabled by BIOS, detect it here, after the CPUs have been
+ * brought online. This ensures the smt/l1tf sysfs entries are consistent
+ * with reality. cpu_smt_available is set to true during the bringup of non
+ * boot CPUs when a SMT sibling is detected. Note, this may overwrite
+ * cpu_smt_control's previous setting.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!cpu_smt_available)
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -380,10 +396,18 @@ early_param("nosmt", smt_cmdline_disable);
 
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+	if (topology_is_primary_thread(cpu))
 		return true;
 
-	if (topology_is_primary_thread(cpu))
+	/*
+	 * If the CPU is not a 'primary' thread and the booted_once bit is
+	 * set then the processor has SMT support. Store this information
+	 * for the late check of SMT support in cpu_smt_check_topology().
+	 */
+	if (per_cpu(cpuhp_state, cpu).booted_once)
+		cpu_smt_available = true;
+
+	if (cpu_smt_control == CPU_SMT_ENABLED)
 		return true;
 
 	/*
@@ -2125,15 +2149,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	/*
-	 * If SMT was disabled by BIOS, detect it here, after the CPUs have
-	 * been brought online.  This ensures the smt/l1tf sysfs entries are
-	 * consistent with reality.  Note this may overwrite cpu_smt_control's
-	 * previous setting.
-	 */
-	if (topology_max_smt_threads() == 1)
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index 084c8b3a2681..d86eec5f51c1 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -584,6 +584,8 @@ void __init smp_init(void)
 		num_nodes, (num_nodes > 1 ? "s" : ""),
 		num_cpus,  (num_cpus  > 1 ? "s" : ""));
 
+	/* Final decision about SMT support */
+	cpu_smt_check_topology();
 	/* Any cleanup work */
 	smp_cpus_done(setup_max_cpus);
 }
-- 
cgit v1.2.3


From 4231aba000f5a4583dd9f67057aadb68c3eca99d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 27 Jul 2018 21:48:17 +1000
Subject: powerpc/64s: Fix page table fragment refcount race vs speculative
 references

The page table fragment allocator uses the main page refcount racily
with respect to speculative references. A customer observed a BUG due
to page table page refcount underflow in the fragment allocator. This
can be caused by the fragment allocator set_page_count stomping on a
speculative reference, and then the speculative failure handler
decrements the new reference, and the underflow eventually pops when
the page tables are freed.

Fix this by using a dedicated field in the struct page for the page
table fragment allocator.

Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
Cc: stable@vger.kernel.org # v3.10+
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_book3s64.c |  8 ++++----
 arch/powerpc/mm/pgtable-book3s64.c     | 17 +++++++++++------
 include/linux/mm_types.h               |  5 ++++-
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 8b24168ea8c4..4a892d894a0f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -200,9 +200,9 @@ static void pte_frag_destroy(void *pte_frag)
 	/* drop all the pending references */
 	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
 	/* We allow PTE_FRAG_NR fragments from a PTE page */
-	if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+	if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
 		pgtable_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
@@ -215,9 +215,9 @@ static void pmd_frag_destroy(void *pmd_frag)
 	/* drop all the pending references */
 	count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
 	/* We allow PTE_FRAG_NR fragments from a PTE page */
-	if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) {
+	if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
 		pgtable_pmd_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 4afbfbb64bfd..78d0b3d5ebad 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -270,6 +270,8 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
 		return NULL;
 	}
 
+	atomic_set(&page->pt_frag_refcount, 1);
+
 	ret = page_address(page);
 	/*
 	 * if we support only one fragment just return the
@@ -285,7 +287,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
 	 * count.
 	 */
 	if (likely(!mm->context.pmd_frag)) {
-		set_page_count(page, PMD_FRAG_NR);
+		atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
 		mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -308,9 +310,10 @@ void pmd_fragment_free(unsigned long *pmd)
 {
 	struct page *page = virt_to_page(pmd);
 
-	if (put_page_testzero(page)) {
+	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		pgtable_pmd_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
@@ -352,6 +355,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 			return NULL;
 	}
 
+	atomic_set(&page->pt_frag_refcount, 1);
 
 	ret = page_address(page);
 	/*
@@ -367,7 +371,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	 * count.
 	 */
 	if (likely(!mm->context.pte_frag)) {
-		set_page_count(page, PTE_FRAG_NR);
+		atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
 		mm->context.pte_frag = ret + PTE_FRAG_SIZE;
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -390,10 +394,11 @@ void pte_fragment_free(unsigned long *table, int kernel)
 {
 	struct page *page = virt_to_page(table);
 
-	if (put_page_testzero(page)) {
+	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		if (!kernel)
 			pgtable_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 99ce070e7dcb..22651e124071 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -139,7 +139,10 @@ struct page {
 			unsigned long _pt_pad_1;	/* compound_head */
 			pgtable_t pmd_huge_pte; /* protected by page->ptl */
 			unsigned long _pt_pad_2;	/* mapping */
-			struct mm_struct *pt_mm;	/* x86 pgds only */
+			union {
+				struct mm_struct *pt_mm; /* x86 pgds only */
+				atomic_t pt_frag_refcount; /* powerpc */
+			};
 #if ALLOC_SPLIT_PTLOCKS
 			spinlock_t *ptl;
 #else
-- 
cgit v1.2.3


From 6fdecfe32f903d9f5f35ee4464fd32ede470dcad Mon Sep 17 00:00:00 2001
From: Arun Parameswaran <arun.parameswaran@broadcom.com>
Date: Tue, 7 Aug 2018 10:02:44 -0700
Subject: net: phy: Add support for Broadcom Omega internal Combo GPHY

Add support for the Broadcom Omega SoC internal Combo Ethernet
GPHY to the bcm7xxx phy driver.

Signed-off-by: Arun Parameswaran <arun.parameswaran@broadcom.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 2 ++
 include/linux/brcmphy.h   | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 01d2ff2f6241..b2b6307d64a4 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -229,6 +229,7 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 	phy_read(phydev, MII_BMSR);
 
 	switch (rev) {
+	case 0xa0:
 	case 0xb0:
 		ret = bcm7xxx_28nm_b0_afe_config_init(phydev);
 		break;
@@ -659,6 +660,7 @@ static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
+	BCM7XXX_28NM_GPHY(PHY_ID_BCM_OMEGA, "Broadcom Omega Combo GPHY"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7346, "Broadcom BCM7346"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7362, "Broadcom BCM7362"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"),
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index daa9234a9baf..949e9af8d9d6 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -45,6 +45,7 @@
 #define PHY_ID_BCM7445			0x600d8510
 
 #define PHY_ID_BCM_CYGNUS		0xae025200
+#define PHY_ID_BCM_OMEGA		0xae025100
 
 #define PHY_BCM_OUI_MASK		0xfffffc00
 #define PHY_BCM_OUI_1			0x00206000
-- 
cgit v1.2.3


From d88e61faad526a5850e9330c846641b91cf971e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:36:26 +0200
Subject: iommu: Remove the ->map_sg indirection

All iommu drivers use the default_iommu_map_sg implementation, and there
is no good reason to ever override it.  Just expose it as iommu_map_sg
directly and remove the indirection, specially in our post-spectre world
where indirect calls are horribly expensive.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c      |  1 -
 drivers/iommu/arm-smmu-v3.c    |  1 -
 drivers/iommu/arm-smmu.c       |  1 -
 drivers/iommu/exynos-iommu.c   |  1 -
 drivers/iommu/intel-iommu.c    |  1 -
 drivers/iommu/iommu.c          |  6 +++---
 drivers/iommu/ipmmu-vmsa.c     |  1 -
 drivers/iommu/msm_iommu.c      |  1 -
 drivers/iommu/mtk_iommu.c      |  1 -
 drivers/iommu/mtk_iommu_v1.c   |  1 -
 drivers/iommu/omap-iommu.c     |  1 -
 drivers/iommu/qcom_iommu.c     |  1 -
 drivers/iommu/rockchip-iommu.c |  1 -
 drivers/iommu/tegra-gart.c     |  1 -
 drivers/iommu/tegra-smmu.c     |  1 -
 include/linux/iommu.h          | 16 ++--------------
 16 files changed, 5 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 596b95c50051..a23c6a4014a5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3192,7 +3192,6 @@ const struct iommu_ops amd_iommu_ops = {
 	.detach_dev = amd_iommu_detach_device,
 	.map = amd_iommu_map,
 	.unmap = amd_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.add_device = amd_iommu_add_device,
 	.remove_device = amd_iommu_remove_device,
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 1d647104bccc..f1dc294f8e08 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1997,7 +1997,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
-	.map_sg			= default_iommu_map_sg,
 	.flush_iotlb_all	= arm_smmu_iotlb_sync,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f7a96bcf94a6..644fd7ec8ac7 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1562,7 +1562,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
-	.map_sg			= default_iommu_map_sg,
 	.flush_iotlb_all	= arm_smmu_iotlb_sync,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 85879cfec52f..19e55cf6a9dd 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1332,7 +1332,6 @@ static const struct iommu_ops exynos_iommu_ops = {
 	.detach_dev = exynos_iommu_detach_device,
 	.map = exynos_iommu_map,
 	.unmap = exynos_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
 	.device_group = generic_device_group,
 	.add_device = exynos_iommu_add_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 14e4b3722428..afb6c872f203 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5308,7 +5308,6 @@ const struct iommu_ops intel_iommu_ops = {
 	.detach_dev		= intel_iommu_detach_device,
 	.map			= intel_iommu_map,
 	.unmap			= intel_iommu_unmap,
-	.map_sg			= default_iommu_map_sg,
 	.iova_to_phys		= intel_iommu_iova_to_phys,
 	.add_device		= intel_iommu_add_device,
 	.remove_device		= intel_iommu_remove_device,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f3698006cb53..8c15c5980299 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1673,8 +1673,8 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
 
-size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-			 struct scatterlist *sg, unsigned int nents, int prot)
+size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+		    struct scatterlist *sg, unsigned int nents, int prot)
 {
 	struct scatterlist *s;
 	size_t mapped = 0;
@@ -1714,7 +1714,7 @@ out_err:
 	return 0;
 
 }
-EXPORT_SYMBOL_GPL(default_iommu_map_sg);
+EXPORT_SYMBOL_GPL(iommu_map_sg);
 
 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
 			       phys_addr_t paddr, u64 size, int prot)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 40ae6e87cb88..c9ce27cfa7e8 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -889,7 +889,6 @@ static const struct iommu_ops ipmmu_ops = {
 	.unmap = ipmmu_unmap,
 	.flush_iotlb_all = ipmmu_iotlb_sync,
 	.iotlb_sync = ipmmu_iotlb_sync,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = ipmmu_iova_to_phys,
 	.add_device = ipmmu_add_device,
 	.remove_device = ipmmu_remove_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 0d3350463a3f..cdf6ab22ede9 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -708,7 +708,6 @@ static struct iommu_ops msm_iommu_ops = {
 	.detach_dev = msm_iommu_detach_dev,
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.add_device = msm_iommu_add_device,
 	.remove_device = msm_iommu_remove_device,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f2832a10fcea..f9f69f7111a9 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -495,7 +495,6 @@ static struct iommu_ops mtk_iommu_ops = {
 	.detach_dev	= mtk_iommu_detach_device,
 	.map		= mtk_iommu_map,
 	.unmap		= mtk_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.flush_iotlb_all = mtk_iommu_iotlb_sync,
 	.iotlb_sync	= mtk_iommu_iotlb_sync,
 	.iova_to_phys	= mtk_iommu_iova_to_phys,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a7c2a973784f..676c029494e4 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -531,7 +531,6 @@ static struct iommu_ops mtk_iommu_ops = {
 	.detach_dev	= mtk_iommu_detach_device,
 	.map		= mtk_iommu_map,
 	.unmap		= mtk_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= mtk_iommu_iova_to_phys,
 	.add_device	= mtk_iommu_add_device,
 	.remove_device	= mtk_iommu_remove_device,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index af4a8e7fcd27..86d64ff507fa 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1548,7 +1548,6 @@ static const struct iommu_ops omap_iommu_ops = {
 	.detach_dev	= omap_iommu_detach_dev,
 	.map		= omap_iommu_map,
 	.unmap		= omap_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= omap_iommu_iova_to_phys,
 	.add_device	= omap_iommu_add_device,
 	.remove_device	= omap_iommu_remove_device,
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index fe88a4880d3a..6e914b3a2b4b 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -590,7 +590,6 @@ static const struct iommu_ops qcom_iommu_ops = {
 	.detach_dev	= qcom_iommu_detach_dev,
 	.map		= qcom_iommu_map,
 	.unmap		= qcom_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.flush_iotlb_all = qcom_iommu_iotlb_sync,
 	.iotlb_sync	= qcom_iommu_iotlb_sync,
 	.iova_to_phys	= qcom_iommu_iova_to_phys,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..90625cdd3b66 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1110,7 +1110,6 @@ static const struct iommu_ops rk_iommu_ops = {
 	.detach_dev = rk_iommu_detach_device,
 	.map = rk_iommu_map,
 	.unmap = rk_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.add_device = rk_iommu_add_device,
 	.remove_device = rk_iommu_remove_device,
 	.iova_to_phys = rk_iommu_iova_to_phys,
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index a004f6da35f2..7b1361d57a17 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -377,7 +377,6 @@ static const struct iommu_ops gart_iommu_ops = {
 	.remove_device	= gart_iommu_remove_device,
 	.device_group	= generic_device_group,
 	.map		= gart_iommu_map,
-	.map_sg		= default_iommu_map_sg,
 	.unmap		= gart_iommu_unmap,
 	.iova_to_phys	= gart_iommu_iova_to_phys,
 	.pgsize_bitmap	= GART_IOMMU_PGSIZES,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 44d40bc771b5..0d03341317c4 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -876,7 +876,6 @@ static const struct iommu_ops tegra_smmu_ops = {
 	.device_group = tegra_smmu_device_group,
 	.map = tegra_smmu_map,
 	.unmap = tegra_smmu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = tegra_smmu_iova_to_phys,
 	.of_xlate = tegra_smmu_of_xlate,
 	.pgsize_bitmap = SZ_4K,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7447b0b0579a..87994c265bf5 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -166,8 +166,6 @@ struct iommu_resv_region {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
- * @map_sg: map a scatter-gather list of physically contiguous memory chunks
- *          to an iommu domain
  * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
  * @tlb_range_add: Add a given iova range to the flush queue for this domain
  * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -201,8 +199,6 @@ struct iommu_ops {
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
-	size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova,
-			 struct scatterlist *sg, unsigned int nents, int prot);
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
 	void (*iotlb_range_add)(struct iommu_domain *domain,
 				unsigned long iova, size_t size);
@@ -303,9 +299,8 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 			  size_t size);
 extern size_t iommu_unmap_fast(struct iommu_domain *domain,
 			       unsigned long iova, size_t size);
-extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-				struct scatterlist *sg,unsigned int nents,
-				int prot);
+extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+			   struct scatterlist *sg,unsigned int nents, int prot);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
@@ -378,13 +373,6 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain)
 		domain->ops->iotlb_sync(domain);
 }
 
-static inline size_t iommu_map_sg(struct iommu_domain *domain,
-				  unsigned long iova, struct scatterlist *sg,
-				  unsigned int nents, int prot)
-{
-	return domain->ops->map_sg(domain, iova, sg, nents, prot);
-}
-
 /* PCI device grouping function */
 extern struct iommu_group *pci_device_group(struct device *dev);
 /* Generic device grouping function */
-- 
cgit v1.2.3


From 8b92d0e3d400390660a26ef7f475524700fb86cf Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Wed, 8 Aug 2018 08:35:29 +0200
Subject: nvme.h: fixup ANA group descriptor format

ANA Phase 3 draft had the 'reserved' field in the group descriptor
format set to '23:17' (so that the first namespace identifier started
at byte 24), but that got move with the approved TP to '31:17'
(so that the first namespace identifier started at byte 32).

Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 64c9175723de..a661861e9d56 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -446,7 +446,7 @@ struct nvme_ana_group_desc {
 	__le32	nnsids;
 	__le64	chgcnt;
 	__u8	state;
-	__u8	rsvd17[7];
+	__u8	rsvd17[15];
 	__le32	nsids[];
 };
 
-- 
cgit v1.2.3


From 93045d5942da60801e71764597d448cf37a798c1 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Tue, 7 Aug 2018 23:01:05 -0700
Subject: nvme.h: add support for ns write protect definitions

Add various definitions from NVMe 1.3 TP 4005.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a661861e9d56..68e91ef5494c 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -259,7 +259,7 @@ struct nvme_id_ctrl {
 	__le16			awun;
 	__le16			awupf;
 	__u8			nvscc;
-	__u8			rsvd531;
+	__u8			nwpc;
 	__le16			acwu;
 	__u8			rsvd534[2];
 	__le32			sgls;
@@ -320,7 +320,9 @@ struct nvme_id_ns {
 	__u8			nvmcap[16];
 	__u8			rsvd64[28];
 	__le32			anagrpid;
-	__u8			rsvd96[8];
+	__u8			rsvd96[3];
+	__u8			nsattr;
+	__u8			rsvd100[4];
 	__u8			nguid[16];
 	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
@@ -794,6 +796,7 @@ enum {
 	NVME_FEAT_HOST_ID	= 0x81,
 	NVME_FEAT_RESV_MASK	= 0x82,
 	NVME_FEAT_RESV_PERSIST	= 0x83,
+	NVME_FEAT_WRITE_PROTECT	= 0x84,
 	NVME_LOG_ERROR		= 0x01,
 	NVME_LOG_SMART		= 0x02,
 	NVME_LOG_FW_SLOT	= 0x03,
@@ -807,6 +810,14 @@ enum {
 	NVME_FWACT_ACTV		= (2 << 3),
 };
 
+/* NVMe Namespace Write Protect State */
+enum {
+	NVME_NS_NO_WRITE_PROTECT = 0,
+	NVME_NS_WRITE_PROTECT,
+	NVME_NS_WRITE_PROTECT_POWER_CYCLE,
+	NVME_NS_WRITE_PROTECT_PERMANENT,
+};
+
 #define NVME_MAX_CHANGED_NAMESPACES	1024
 
 struct nvme_identify {
@@ -1153,6 +1164,8 @@ enum {
 	NVME_SC_SGL_INVALID_OFFSET	= 0x16,
 	NVME_SC_SGL_INVALID_SUBTYPE	= 0x17,
 
+	NVME_SC_NS_WRITE_PROTECTED	= 0x20,
+
 	NVME_SC_LBA_RANGE		= 0x80,
 	NVME_SC_CAP_EXCEEDED		= 0x81,
 	NVME_SC_NS_NOT_READY		= 0x82,
-- 
cgit v1.2.3


From 0c66847793d1982d1083dc6f7adad60fa265ce9c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 1 Aug 2018 14:25:39 -0700
Subject: overflow.h: Add arithmetic shift helper

Add shift_overflow() helper to assist driver authors in ensuring that
shift operations don't cause overflows or other odd conditions.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
[kees: tweaked comments and commit log, dropped unneeded assignment]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/overflow.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 8712ff70995f..40b48e2133cb 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -202,6 +202,37 @@
 
 #endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
 
+/** check_shl_overflow() - Calculate a left-shifted value and check overflow
+ *
+ * @a: Value to be shifted
+ * @s: How many bits left to shift
+ * @d: Pointer to where to store the result
+ *
+ * Computes *@d = (@a << @s)
+ *
+ * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+ * make sense. Example conditions:
+ * - 'a << s' causes bits to be lost when stored in *d.
+ * - 's' is garbage (e.g. negative) or so large that the result of
+ *   'a << s' is guaranteed to be 0.
+ * - 'a' is negative.
+ * - 'a << s' sets the sign bit, if any, in '*d'.
+ *
+ * '*d' will hold the results of the attempted shift, but is not
+ * considered "safe for use" if false is returned.
+ */
+#define check_shl_overflow(a, s, d) ({					\
+	typeof(a) _a = a;						\
+	typeof(s) _s = s;						\
+	typeof(d) _d = d;						\
+	u64 _a_full = _a;						\
+	unsigned int _to_shift =					\
+		_s >= 0 && _s < 8 * sizeof(*d) ? _s : 0;		\
+	*_d = (_a_full << _to_shift);					\
+	(_to_shift != _s || *_d < 0 || _a < 0 ||			\
+		(*_d >> _to_shift) != _a);				\
+})
+
 /**
  * array_size() - Calculate size of 2-dimensional array.
  *
-- 
cgit v1.2.3


From 212dfd909ea8b630e5d6fa4d25aeec9c4b4b14a5 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Tue, 7 Aug 2018 22:06:52 +0200
Subject: netfilter: nfnetlink_osf: add missing enum in nfnetlink_osf uapi
 header

xt_osf_window_size_options was originally part of
include/uapi/linux/netfilter/xt_osf.h, restore it.

Fixes: bfb15f2a95cb ("netfilter: extract Passive OS fingerprint infrastructure from xt_osf")
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_osf.h      | 12 ------------
 include/uapi/linux/netfilter/nfnetlink_osf.h | 12 ++++++++++++
 include/uapi/linux/netfilter/xt_osf.h        |  1 +
 3 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
index a7311bc03d3a..ecf7dab81e9e 100644
--- a/include/linux/netfilter/nfnetlink_osf.h
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -4,18 +4,6 @@
 
 #include <uapi/linux/netfilter/nfnetlink_osf.h>
 
-/* Initial window size option state machine: multiple of mss, mtu or
- * plain numeric value. Can also be made as plain numeric value which
- * is not a multiple of specified value.
- */
-enum nf_osf_window_size_options {
-	OSF_WSS_PLAIN   = 0,
-	OSF_WSS_MSS,
-	OSF_WSS_MTU,
-	OSF_WSS_MODULO,
-	OSF_WSS_MAX,
-};
-
 enum osf_fmatch_states {
 	/* Packet does not match the fingerprint */
 	FMATCH_WRONG = 0,
diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h
index 3b93fbb9fc24..76a3527df5dd 100644
--- a/include/uapi/linux/netfilter/nfnetlink_osf.h
+++ b/include/uapi/linux/netfilter/nfnetlink_osf.h
@@ -88,6 +88,18 @@ enum iana_options {
 	OSFOPT_EMPTY = 255,
 };
 
+/* Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum nf_osf_window_size_options {
+	OSF_WSS_PLAIN	= 0,
+	OSF_WSS_MSS,
+	OSF_WSS_MTU,
+	OSF_WSS_MODULO,
+	OSF_WSS_MAX,
+};
+
 enum nf_osf_attr_type {
 	OSF_ATTR_UNSPEC,
 	OSF_ATTR_FINGER,
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index c56c59605c2b..24102b5286ec 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -46,6 +46,7 @@
 #define xt_osf_finger		nf_osf_finger
 #define xt_osf_nlmsg		nf_osf_nlmsg
 
+#define xt_osf_window_size_options	nf_osf_window_size_options
 #define xt_osf_attr_type	nf_osf_attr_type
 #define xt_osf_msg_types	nf_osf_msg_types
 
-- 
cgit v1.2.3


From 4717be73c2843a3d6d8546872177a19358f6b7b5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 25 Jul 2018 17:39:25 +0300
Subject: i2c: core: Parse SDA hold time from firmware

There are two drivers already using the SDA hold time setting.
It might be more in the future, thus, make I2C core to parse the setting
for us if provided by firmware.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 2 ++
 include/linux/i2c.h         | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index a26b3e9cc441..043c4aadaa44 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1576,6 +1576,8 @@ void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_de
 	ret = device_property_read_u32(dev, "i2c-sda-falling-time-ns", &t->sda_fall_ns);
 	if (ret && use_defaults)
 		t->sda_fall_ns = t->scl_fall_ns;
+
+	device_property_read_u32(dev, "i2c-sda-hold-time-ns", &t->sda_hold_ns);
 }
 EXPORT_SYMBOL_GPL(i2c_parse_fw_timings);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 2a98d0886d2e..36f357ecdf67 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -564,6 +564,7 @@ struct i2c_lock_operations {
  * @scl_fall_ns: time SCL signal takes to fall in ns; t(f) in the I2C specification
  * @scl_int_delay_ns: time IP core additionally needs to setup SCL in ns
  * @sda_fall_ns: time SDA signal takes to fall in ns; t(f) in the I2C specification
+ * @sda_hold_ns: time IP core additionally needs to hold SDA in ns
  */
 struct i2c_timings {
 	u32 bus_freq_hz;
@@ -571,6 +572,7 @@ struct i2c_timings {
 	u32 scl_fall_ns;
 	u32 scl_int_delay_ns;
 	u32 sda_fall_ns;
+	u32 sda_hold_ns;
 };
 
 /**
-- 
cgit v1.2.3


From 342ac8448f1fb213908656ae5581d0f37a5954e8 Mon Sep 17 00:00:00 2001
From: Denis Drozdov <denisd@mellanox.com>
Date: Wed, 8 Aug 2018 16:23:48 -0700
Subject: net/mlx5: Use max_num_eqs for calculation of required MSIX vectors

New firmware has defined new HCA capability field called "max_num_eqs",
that is the number of available EQs after subtracting reserved FW EQs.

Before this capability the FW reported the EQ number in "log_max_eqs",
the reported value also contained FW reserved EQs, but the driver might
be failing to load on 320 cpus systems due to the fact that FW
reserved EQs were not available to the driver.

Now the driver has to obtain max_num_eqs value from new FW to get real
number of EQs available.

Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 +++-
 include/linux/mlx5/mlx5_ifc.h                  | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 03b9c6733eed..cf3e4a659052 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -323,7 +323,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 {
 	struct mlx5_priv *priv = &dev->priv;
 	struct mlx5_eq_table *table = &priv->eq_table;
-	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
+	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+		      MLX5_CAP_GEN(dev, max_num_eqs) :
+		      1 << MLX5_CAP_GEN(dev, log_max_eq);
 	int nvec;
 	int err;
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 60c2308fe062..63e1ce4c4826 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1133,7 +1133,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         general_obj_types[0x40];
 
-	u8         reserved_at_440[0x40];
+	u8         reserved_at_440[0x20];
+
+	u8         reserved_at_460[0x10];
+	u8         max_num_eqs[0x10];
 
 	u8         reserved_at_480[0x3];
 	u8         log_max_l2_table[0x5];
-- 
cgit v1.2.3


From cc9c82a8668cf98748bfbaa83c5c092d5115c25b Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Wed, 8 Aug 2018 16:23:49 -0700
Subject: net/mlx5: Rename modify/query_vport state related enums

Modify and query vport state commands share the same admin_state and
op_mod values, rename the enums to fit them both.

In addition, remove the esw prefix from the admin state enum as this
also applied for vnic.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c     | 12 ++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c      |  8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c     | 10 +++++-----
 include/linux/mlx5/device.h                           |  6 +++---
 include/linux/mlx5/mlx5_ifc.h                         |  4 ++--
 6 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a2fb21ca5767..2731ba2d8049 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -228,7 +228,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 	u8 port_state;
 
 	port_state = mlx5_query_vport_state(mdev,
-					    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT,
+					    MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
 					    0);
 
 	if (port_state == VPORT_STATE_UP) {
@@ -3916,9 +3916,9 @@ static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
 static int mlx5_vport_link2ifla(u8 esw_link)
 {
 	switch (esw_link) {
-	case MLX5_ESW_VPORT_ADMIN_STATE_DOWN:
+	case MLX5_VPORT_ADMIN_STATE_DOWN:
 		return IFLA_VF_LINK_STATE_DISABLE;
-	case MLX5_ESW_VPORT_ADMIN_STATE_UP:
+	case MLX5_VPORT_ADMIN_STATE_UP:
 		return IFLA_VF_LINK_STATE_ENABLE;
 	}
 	return IFLA_VF_LINK_STATE_AUTO;
@@ -3928,11 +3928,11 @@ static int mlx5_ifla_link2vport(u8 ifla_link)
 {
 	switch (ifla_link) {
 	case IFLA_VF_LINK_STATE_DISABLE:
-		return MLX5_ESW_VPORT_ADMIN_STATE_DOWN;
+		return MLX5_VPORT_ADMIN_STATE_DOWN;
 	case IFLA_VF_LINK_STATE_ENABLE:
-		return MLX5_ESW_VPORT_ADMIN_STATE_UP;
+		return MLX5_VPORT_ADMIN_STATE_UP;
 	}
-	return MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
+	return MLX5_VPORT_ADMIN_STATE_AUTO;
 }
 
 static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8e3c5b4b90ab..c9cc9747d21d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -698,8 +698,8 @@ static int mlx5e_rep_open(struct net_device *dev)
 		goto unlock;
 
 	if (!mlx5_modify_vport_admin_state(priv->mdev,
-					   MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-					   rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP))
+					   MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+					   rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
 		netif_carrier_on(dev);
 
 unlock:
@@ -716,8 +716,8 @@ static int mlx5e_rep_close(struct net_device *dev)
 
 	mutex_lock(&priv->state_lock);
 	mlx5_modify_vport_admin_state(priv->mdev,
-				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-				      rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+				      rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
 	ret = mlx5e_close_locked(dev);
 	mutex_unlock(&priv->state_lock);
 	return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 4d316cc9b008..35ded91203f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -74,7 +74,7 @@ static int mlx5e_test_link_state(struct mlx5e_priv *priv)
 	if (!netif_carrier_ok(priv->netdev))
 		return 1;
 
-	port_state = mlx5_query_vport_state(priv->mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
+	port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0);
 	return port_state == VPORT_STATE_UP ? 0 : 1;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 40dba9e8af92..cd0760a8d45a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1469,7 +1469,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 		return;
 
 	mlx5_modify_vport_admin_state(esw->dev,
-				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 				      vport_num,
 				      vport->info.link_state);
 	mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
@@ -1582,9 +1582,9 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 	esw_vport_disable_qos(esw, vport_num);
 	if (vport_num && esw->mode == SRIOV_LEGACY) {
 		mlx5_modify_vport_admin_state(esw->dev,
-					      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+					      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 					      vport_num,
-					      MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+					      MLX5_VPORT_ADMIN_STATE_DOWN);
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
 		esw_vport_destroy_drop_counters(vport);
@@ -1736,7 +1736,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		struct mlx5_vport *vport = &esw->vports[vport_num];
 
 		vport->vport = vport_num;
-		vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
+		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
 		vport->dev = dev;
 		INIT_WORK(&vport->vport_change_handler,
 			  esw_vport_change_handler);
@@ -1860,7 +1860,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 	evport = &esw->vports[vport];
 
 	err = mlx5_modify_vport_admin_state(esw->dev,
-					    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+					    MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 					    vport, link_state);
 	if (err) {
 		mlx5_core_warn(esw->dev,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index d489494b0a84..11fa4e66afc5 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -946,9 +946,9 @@ enum {
 };
 
 enum {
-	MLX5_ESW_VPORT_ADMIN_STATE_DOWN  = 0x0,
-	MLX5_ESW_VPORT_ADMIN_STATE_UP    = 0x1,
-	MLX5_ESW_VPORT_ADMIN_STATE_AUTO  = 0x2,
+	MLX5_VPORT_ADMIN_STATE_DOWN  = 0x0,
+	MLX5_VPORT_ADMIN_STATE_UP    = 0x1,
+	MLX5_VPORT_ADMIN_STATE_AUTO  = 0x2,
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 63e1ce4c4826..6ead9c1a5396 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3764,8 +3764,8 @@ struct mlx5_ifc_query_vport_state_out_bits {
 };
 
 enum {
-	MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT  = 0x0,
-	MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT   = 0x1,
+	MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT  = 0x0,
+	MLX5_VPORT_STATE_OP_MOD_ESW_VPORT   = 0x1,
 };
 
 struct mlx5_ifc_query_vport_state_in_bits {
-- 
cgit v1.2.3


From 29d8ebd44de8999e292dbb4de76744d4a41039ec Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 8 Aug 2018 16:23:51 -0700
Subject: net/mlx5: Remove unused mlx5_query_vport_admin_state

mlx5_query_vport_admin_state() is not used anywhere. Remove it.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 10 ----------
 include/linux/mlx5/vport.h                      |  2 --
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 7eecd5b07bb1..fa6fa171b94c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -64,16 +64,6 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_vport_state);
 
-u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
-	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
-
-	_mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
-
-	return MLX5_GET(query_vport_state_out, out, admin_state);
-}
-EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
-
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state)
 {
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 9208cb8809ac..7e7c6dfcfb09 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -43,8 +43,6 @@ enum {
 };
 
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
-u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
-				u16 vport);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
-- 
cgit v1.2.3


From 74fe7b551f3385fa585d92616c85b3a575b2b2cb Mon Sep 17 00:00:00 2001
From: Crestez Dan Leonard <leonard.crestez@intel.com>
Date: Tue, 7 Aug 2018 17:52:17 +0300
Subject: regmap: Add regmap_noinc_read API

The regmap API usually assumes that bulk read operations will read a
range of registers but some I2C/SPI devices have certain registers for
which a such a read operation will return data from an internal FIFO
instead. Add an explicit API to support bulk read without range semantics.

Some linux drivers use regmap_bulk_read or regmap_raw_read for such
registers, for example mpu6050 or bmi150 from IIO. This only happens to
work because when caching is disabled a single regmap read op will map
to a single bus read op (as desired). This breaks if caching is enabled and
reg+1 happens to be a cacheable register.

Without regmap support refactoring a driver to enable regmap caching
requires separate I2C and SPI paths. This is exactly what regmap is
supposed to help avoid.

Suggested-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Crestez Dan Leonard <leonard.crestez@intel.com>
Signed-off-by: Stefan Popa <stefan.popa@analog.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h |  3 ++
 drivers/base/regmap/regmap.c   | 79 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/regmap.h         | 19 ++++++++++
 3 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 53785e0e297a..a6bf34d6394e 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -94,10 +94,12 @@ struct regmap {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	bool (*readable_noinc_reg)(struct device *dev, unsigned int reg);
 	const struct regmap_access_table *wr_table;
 	const struct regmap_access_table *rd_table;
 	const struct regmap_access_table *volatile_table;
 	const struct regmap_access_table *precious_table;
+	const struct regmap_access_table *rd_noinc_table;
 
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
@@ -181,6 +183,7 @@ bool regmap_writeable(struct regmap *map, unsigned int reg);
 bool regmap_readable(struct regmap *map, unsigned int reg);
 bool regmap_volatile(struct regmap *map, unsigned int reg);
 bool regmap_precious(struct regmap *map, unsigned int reg);
+bool regmap_readable_noinc(struct regmap *map, unsigned int reg);
 
 int _regmap_write(struct regmap *map, unsigned int reg,
 		  unsigned int val);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 3bc84885eb91..0360a90ad6b6 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -168,6 +168,17 @@ bool regmap_precious(struct regmap *map, unsigned int reg)
 	return false;
 }
 
+bool regmap_readable_noinc(struct regmap *map, unsigned int reg)
+{
+	if (map->readable_noinc_reg)
+		return map->readable_noinc_reg(map->dev, reg);
+
+	if (map->rd_noinc_table)
+		return regmap_check_range_table(map, reg, map->rd_noinc_table);
+
+	return true;
+}
+
 static bool regmap_volatile_range(struct regmap *map, unsigned int reg,
 	size_t num)
 {
@@ -766,10 +777,12 @@ struct regmap *__regmap_init(struct device *dev,
 	map->rd_table = config->rd_table;
 	map->volatile_table = config->volatile_table;
 	map->precious_table = config->precious_table;
+	map->rd_noinc_table = config->rd_noinc_table;
 	map->writeable_reg = config->writeable_reg;
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
 	map->precious_reg = config->precious_reg;
+	map->readable_noinc_reg = config->readable_noinc_reg;
 	map->cache_type = config->cache_type;
 
 	spin_lock_init(&map->async_lock);
@@ -1285,6 +1298,7 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
 	map->precious_reg = config->precious_reg;
+	map->readable_noinc_reg = config->readable_noinc_reg;
 	map->cache_type = config->cache_type;
 
 	regmap_debugfs_init(map, config->name);
@@ -2564,7 +2578,70 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 EXPORT_SYMBOL_GPL(regmap_raw_read);
 
 /**
- * regmap_field_read() - Read a value to a single register field
+ * regmap_noinc_read(): Read data from a register without incrementing the
+ *			register number
+ *
+ * @map: Register map to read from
+ * @reg: Register to read from
+ * @val: Pointer to data buffer
+ * @val_len: Length of output buffer in bytes.
+ *
+ * The regmap API usually assumes that bulk bus read operations will read a
+ * range of registers. Some devices have certain registers for which a read
+ * operation read will read from an internal FIFO.
+ *
+ * The target register must be volatile but registers after it can be
+ * completely unrelated cacheable registers.
+ *
+ * This will attempt multiple reads as required to read val_len bytes.
+ *
+ * A value of zero will be returned on success, a negative errno will be
+ * returned in error cases.
+ */
+int regmap_noinc_read(struct regmap *map, unsigned int reg,
+		      void *val, size_t val_len)
+{
+	size_t read_len;
+	int ret;
+
+	if (!map->bus)
+		return -EINVAL;
+	if (!map->bus->read)
+		return -ENOTSUPP;
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+	if (!IS_ALIGNED(reg, map->reg_stride))
+		return -EINVAL;
+	if (val_len == 0)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	if (!regmap_volatile(map, reg) || !regmap_readable_noinc(map, reg)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	while (val_len) {
+		if (map->max_raw_read && map->max_raw_read < val_len)
+			read_len = map->max_raw_read;
+		else
+			read_len = val_len;
+		ret = _regmap_raw_read(map, reg, val, read_len);
+		if (ret)
+			goto out_unlock;
+		val = ((u8 *)val) + read_len;
+		val_len -= read_len;
+	}
+
+out_unlock:
+	map->unlock(map->lock_arg);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_noinc_read);
+
+/**
+ * regmap_field_read(): Read a value to a single register field
  *
  * @field: Register field to read from
  * @val: Pointer to store read value
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4f38068ffb71..19df946499d2 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -268,6 +268,13 @@ typedef void (*regmap_unlock)(void *);
  *                field is NULL but precious_table (see below) is not, the
  *                check is performed on such table (a register is precious if
  *                it belongs to one of the ranges specified by precious_table).
+ * @readable_noinc_reg: Optional callback returning true if the register
+ *			supports multiple read operations without incrementing
+ *			the register number. If this field is NULL but
+ *			rd_noinc_table (see below) is not, the check is
+ *			performed on such table (a register is no increment
+ *			readable if it belongs to one of the ranges specified
+ *			by rd_noinc_table).
  * @disable_locking: This regmap is either protected by external means or
  *                   is guaranteed not be be accessed from multiple threads.
  *                   Don't use any locking mechanisms.
@@ -295,6 +302,7 @@ typedef void (*regmap_unlock)(void *);
  * @rd_table:     As above, for read access.
  * @volatile_table: As above, for volatile registers.
  * @precious_table: As above, for precious registers.
+ * @rd_noinc_table: As above, for no increment readable registers.
  * @reg_defaults: Power on reset values for registers (for use with
  *                register cache support).
  * @num_reg_defaults: Number of elements in reg_defaults.
@@ -344,6 +352,7 @@ struct regmap_config {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	bool (*readable_noinc_reg)(struct device *dev, unsigned int reg);
 
 	bool disable_locking;
 	regmap_lock lock;
@@ -360,6 +369,7 @@ struct regmap_config {
 	const struct regmap_access_table *rd_table;
 	const struct regmap_access_table *volatile_table;
 	const struct regmap_access_table *precious_table;
+	const struct regmap_access_table *rd_noinc_table;
 	const struct reg_default *reg_defaults;
 	unsigned int num_reg_defaults;
 	enum regcache_type cache_type;
@@ -946,6 +956,8 @@ int regmap_raw_write_async(struct regmap *map, unsigned int reg,
 int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
 int regmap_raw_read(struct regmap *map, unsigned int reg,
 		    void *val, size_t val_len);
+int regmap_noinc_read(struct regmap *map, unsigned int reg,
+		      void *val, size_t val_len);
 int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		     size_t val_count);
 int regmap_update_bits_base(struct regmap *map, unsigned int reg,
@@ -1196,6 +1208,13 @@ static inline int regmap_raw_read(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_noinc_read(struct regmap *map, unsigned int reg,
+				    void *val, size_t val_len)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_bulk_read(struct regmap *map, unsigned int reg,
 				   void *val, size_t val_count)
 {
-- 
cgit v1.2.3


From b1f4267cc5448d20ae0c515a74141e74365e78a3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 9 Aug 2018 07:47:28 -0700
Subject: block: Remove two superfluous #include directives

Commit 12f5b9314545 ("blk-mq: Remove generation seqeunce") removed the
only seqcount_t and u64_stats_sync instances from <linux/blkdev.h> but
did not remove the corresponding #include directives. Since these
include directives are no longer needed, remove them.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.com>,
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 050d599f5ea9..d6869e0e2b64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,8 +27,6 @@
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
-#include <linux/seqlock.h>
-#include <linux/u64_stats_sync.h>
 
 struct module;
 struct scsi_ioctl_command;
-- 
cgit v1.2.3


From 6bad9b210a228d2fe0e0efe26d9b115348529cee Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 9 Aug 2018 07:53:36 -0700
Subject: blkcg: Introduce blkg_root_lookup()

This new function will be used in a later patch to verify whether a
queue has been dissociated from the cgroup controller before being
released.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Alexandru Moise <00moses.alexander00@gmail.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index f7b910768306..1361cfc9b878 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -341,6 +341,23 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 	return __blkg_lookup(blkcg, q, false);
 }
 
+/**
+ * blkg_lookup - look up blkg for the specified request queue
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for @q at the root level. See also blkg_lookup().
+ */
+static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q)
+{
+	struct blkcg_gq *blkg;
+
+	rcu_read_lock();
+	blkg = blkg_lookup(&blkcg_root, q);
+	rcu_read_unlock();
+
+	return blkg;
+}
+
 /**
  * blkg_to_pdata - get policy private data
  * @blkg: blkg of interest
@@ -864,6 +881,7 @@ static inline bool blk_cgroup_congested(void) { return false; }
 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) { return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-- 
cgit v1.2.3


From 209b43759d65b2cc99ce7757249aacc82b03c4e2 Mon Sep 17 00:00:00 2001
From: Michael Büsch <m@bues.ch>
Date: Tue, 31 Jul 2018 22:15:09 +0200
Subject: ssb: Remove SSB_WARN_ON, SSB_BUG_ON and SSB_DEBUG

Use the standard WARN_ON instead.
If a small kernel is desired, WARN_ON can be disabled globally.

Also remove SSB_DEBUG. Besides WARN_ON it only adds a tiny debug check.
Include this check unconditionally.

Signed-off-by: Michael Buesch <m@bues.ch>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 arch/mips/configs/bcm47xx_defconfig |  1 -
 arch/powerpc/configs/wii_defconfig  |  1 -
 drivers/ssb/Kconfig                 |  9 ---------
 drivers/ssb/driver_chipcommon.c     |  8 ++++----
 drivers/ssb/driver_chipcommon_pmu.c | 10 +++++-----
 drivers/ssb/driver_gpio.c           |  4 ++--
 drivers/ssb/driver_pcicore.c        |  6 +++---
 drivers/ssb/embedded.c              | 10 +++++-----
 drivers/ssb/host_soc.c              | 12 ++++++------
 drivers/ssb/main.c                  | 38 +++++++++++++++++--------------------
 drivers/ssb/pci.c                   | 19 ++++++-------------
 drivers/ssb/pcmcia.c                | 14 +++++++-------
 drivers/ssb/scan.c                  |  4 ++--
 drivers/ssb/sdio.c                  | 12 ++++++------
 drivers/ssb/ssb_private.h           |  9 ---------
 include/linux/ssb/ssb.h             |  2 --
 16 files changed, 63 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index fad8e964f14c..ba800a892384 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -66,7 +66,6 @@ CONFIG_HW_RANDOM=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_WATCHDOG=y
 CONFIG_BCM47XX_WDT=y
-CONFIG_SSB_DEBUG=y
 CONFIG_SSB_DRIVER_GIGE=y
 CONFIG_BCMA_DRIVER_GMAC_CMN=y
 CONFIG_USB=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 10940533da71..f5c366b02828 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -78,7 +78,6 @@ CONFIG_GPIO_HLWD=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_GPIO=y
 # CONFIG_HWMON is not set
-CONFIG_SSB_DEBUG=y
 CONFIG_FB=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 6c438c819eb9..df30e1323252 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -89,15 +89,6 @@ config SSB_HOST_SOC
 
 	  If unsure, say N
 
-config SSB_DEBUG
-	bool "SSB debugging"
-	depends on SSB
-	help
-	  This turns on additional runtime checks and debugging
-	  messages. Turn this on for SSB troubleshooting.
-
-	  If unsure, say N
-
 config SSB_SERIAL
 	bool
 	depends on SSB
diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 48050c6fd847..99a4656d113d 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -56,7 +56,7 @@ void ssb_chipco_set_clockmode(struct ssb_chipcommon *cc,
 
 	if (cc->capabilities & SSB_CHIPCO_CAP_PMU)
 		return; /* PMU controls clockmode, separated function needed */
-	SSB_WARN_ON(ccdev->id.revision >= 20);
+	WARN_ON(ccdev->id.revision >= 20);
 
 	/* chipcommon cores prior to rev6 don't support dynamic clock control */
 	if (ccdev->id.revision < 6)
@@ -111,7 +111,7 @@ void ssb_chipco_set_clockmode(struct ssb_chipcommon *cc,
 		}
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 
@@ -164,7 +164,7 @@ static int chipco_pctl_clockfreqlimit(struct ssb_chipcommon *cc, int get_max)
 			divisor = 32;
 			break;
 		default:
-			SSB_WARN_ON(1);
+			WARN_ON(1);
 		}
 	} else if (cc->dev->id.revision < 10) {
 		switch (clocksrc) {
@@ -277,7 +277,7 @@ static void calc_fast_powerup_delay(struct ssb_chipcommon *cc)
 	minfreq = chipco_pctl_clockfreqlimit(cc, 0);
 	pll_on_delay = chipco_read32(cc, SSB_CHIPCO_PLLONDELAY);
 	tmp = (((pll_on_delay + 2) * 1000000) + (minfreq - 1)) / minfreq;
-	SSB_WARN_ON(tmp & ~0xFFFF);
+	WARN_ON(tmp & ~0xFFFF);
 
 	cc->fast_pwrup_delay = tmp;
 }
diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c
index e28682a53cdf..0f60e90ded26 100644
--- a/drivers/ssb/driver_chipcommon_pmu.c
+++ b/drivers/ssb/driver_chipcommon_pmu.c
@@ -128,7 +128,7 @@ static void ssb_pmu0_pllinit_r0(struct ssb_chipcommon *cc,
 			      ~(1 << SSB_PMURES_5354_BB_PLL_PU));
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	for (i = 1500; i; i--) {
 		tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
@@ -265,7 +265,7 @@ static void ssb_pmu1_pllinit_r0(struct ssb_chipcommon *cc,
 		buffer_strength = 0x222222;
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	for (i = 1500; i; i--) {
 		tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
@@ -501,7 +501,7 @@ static void ssb_pmu_resources_init(struct ssb_chipcommon *cc)
 					      ~(depend_tab[i].depend));
 				break;
 			default:
-				SSB_WARN_ON(1);
+				WARN_ON(1);
 			}
 		}
 	}
@@ -568,12 +568,12 @@ void ssb_pmu_set_ldo_voltage(struct ssb_chipcommon *cc,
 			mask = 0x3F;
 			break;
 		default:
-			SSB_WARN_ON(1);
+			WARN_ON(1);
 			return;
 		}
 		break;
 	case 0x4312:
-		if (SSB_WARN_ON(id != LDO_PAREF))
+		if (WARN_ON(id != LDO_PAREF))
 			return;
 		addr = 0;
 		shift = 21;
diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index 6ce4abf7d473..e809dae4c470 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -461,7 +461,7 @@ int ssb_gpio_init(struct ssb_bus *bus)
 	else if (ssb_extif_available(&bus->extif))
 		return ssb_gpio_extif_init(bus);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 
 	return -1;
 }
@@ -473,7 +473,7 @@ int ssb_gpio_unregister(struct ssb_bus *bus)
 		gpiochip_remove(&bus->gpio);
 		return 0;
 	} else {
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	return -1;
diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index ae80b3171523..6a5622e0ded5 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -115,7 +115,7 @@ static int ssb_extpci_read_config(struct ssb_pcicore *pc,
 	u32 addr, val;
 	void __iomem *mmio;
 
-	SSB_WARN_ON(!pc->hostmode);
+	WARN_ON(!pc->hostmode);
 	if (unlikely(len != 1 && len != 2 && len != 4))
 		goto out;
 	addr = get_cfgspace_addr(pc, bus, dev, func, off);
@@ -161,7 +161,7 @@ static int ssb_extpci_write_config(struct ssb_pcicore *pc,
 	u32 addr, val = 0;
 	void __iomem *mmio;
 
-	SSB_WARN_ON(!pc->hostmode);
+	WARN_ON(!pc->hostmode);
 	if (unlikely(len != 1 && len != 2 && len != 4))
 		goto out;
 	addr = get_cfgspace_addr(pc, bus, dev, func, off);
@@ -702,7 +702,7 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc,
 		/* Calculate the "coremask" for the device. */
 		coremask = (1 << dev->core_index);
 
-		SSB_WARN_ON(bus->bustype != SSB_BUSTYPE_PCI);
+		WARN_ON(bus->bustype != SSB_BUSTYPE_PCI);
 		err = pci_read_config_dword(bus->host_pci, SSB_PCI_IRQMASK, &tmp);
 		if (err)
 			goto out;
diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c
index c39ddf8988c1..8254ed25e063 100644
--- a/drivers/ssb/embedded.c
+++ b/drivers/ssb/embedded.c
@@ -77,7 +77,7 @@ u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_in(&bus->extif, mask);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -95,7 +95,7 @@ u32 ssb_gpio_out(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_out(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -113,7 +113,7 @@ u32 ssb_gpio_outen(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_outen(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -145,7 +145,7 @@ u32 ssb_gpio_intmask(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_intmask(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -163,7 +163,7 @@ u32 ssb_gpio_polarity(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_polarity(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
diff --git a/drivers/ssb/host_soc.c b/drivers/ssb/host_soc.c
index eadaedf466f6..3b438480515c 100644
--- a/drivers/ssb/host_soc.c
+++ b/drivers/ssb/host_soc.c
@@ -61,7 +61,7 @@ static void ssb_host_soc_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u16): {
 		__le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			*buf = (__force __le16)__raw_readw(addr);
 			buf++;
@@ -72,7 +72,7 @@ static void ssb_host_soc_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u32): {
 		__le32 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			*buf = (__force __le32)__raw_readl(addr);
 			buf++;
@@ -81,7 +81,7 @@ static void ssb_host_soc_block_read(struct ssb_device *dev, void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 #endif /* CONFIG_SSB_BLOCKIO */
@@ -134,7 +134,7 @@ static void ssb_host_soc_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u16): {
 		const __le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			__raw_writew((__force u16)(*buf), addr);
 			buf++;
@@ -145,7 +145,7 @@ static void ssb_host_soc_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u32): {
 		const __le32 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			__raw_writel((__force u32)(*buf), addr);
 			buf++;
@@ -154,7 +154,7 @@ static void ssb_host_soc_block_write(struct ssb_device *dev, const void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 #endif /* CONFIG_SSB_BLOCKIO */
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 9da56d2fdbd3..0a26984acb2c 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -209,7 +209,7 @@ int ssb_devices_freeze(struct ssb_bus *bus, struct ssb_freeze_context *ctx)
 
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->bus = bus;
-	SSB_WARN_ON(bus->nr_devices > ARRAY_SIZE(ctx->device_frozen));
+	WARN_ON(bus->nr_devices > ARRAY_SIZE(ctx->device_frozen));
 
 	for (i = 0; i < bus->nr_devices; i++) {
 		sdev = ssb_device_get(&bus->devices[i]);
@@ -220,7 +220,7 @@ int ssb_devices_freeze(struct ssb_bus *bus, struct ssb_freeze_context *ctx)
 			continue;
 		}
 		sdrv = drv_to_ssb_drv(sdev->dev->driver);
-		if (SSB_WARN_ON(!sdrv->remove))
+		if (WARN_ON(!sdrv->remove))
 			continue;
 		sdrv->remove(sdev);
 		ctx->device_frozen[i] = 1;
@@ -248,10 +248,10 @@ int ssb_devices_thaw(struct ssb_freeze_context *ctx)
 			continue;
 		sdev = &bus->devices[i];
 
-		if (SSB_WARN_ON(!sdev->dev || !sdev->dev->driver))
+		if (WARN_ON(!sdev->dev || !sdev->dev->driver))
 			continue;
 		sdrv = drv_to_ssb_drv(sdev->dev->driver);
-		if (SSB_WARN_ON(!sdrv || !sdrv->probe))
+		if (WARN_ON(!sdrv || !sdrv->probe))
 			continue;
 
 		err = sdrv->probe(sdev, &sdev->id);
@@ -861,13 +861,13 @@ u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m)
 	case SSB_PLLTYPE_2: /* 48Mhz, 4 dividers */
 		n1 += SSB_CHIPCO_CLK_T2_BIAS;
 		n2 += SSB_CHIPCO_CLK_T2_BIAS;
-		SSB_WARN_ON(!((n1 >= 2) && (n1 <= 7)));
-		SSB_WARN_ON(!((n2 >= 5) && (n2 <= 23)));
+		WARN_ON(!((n1 >= 2) && (n1 <= 7)));
+		WARN_ON(!((n2 >= 5) && (n2 <= 23)));
 		break;
 	case SSB_PLLTYPE_5: /* 25Mhz, 4 dividers */
 		return 100000000;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	switch (plltype) {
@@ -916,9 +916,9 @@ u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m)
 		m1 += SSB_CHIPCO_CLK_T2_BIAS;
 		m2 += SSB_CHIPCO_CLK_T2M2_BIAS;
 		m3 += SSB_CHIPCO_CLK_T2_BIAS;
-		SSB_WARN_ON(!((m1 >= 2) && (m1 <= 7)));
-		SSB_WARN_ON(!((m2 >= 3) && (m2 <= 10)));
-		SSB_WARN_ON(!((m3 >= 2) && (m3 <= 7)));
+		WARN_ON(!((m1 >= 2) && (m1 <= 7)));
+		WARN_ON(!((m2 >= 3) && (m2 <= 10)));
+		WARN_ON(!((m3 >= 2) && (m3 <= 7)));
 
 		if (!(mc & SSB_CHIPCO_CLK_T2MC_M1BYP))
 			clock /= m1;
@@ -928,7 +928,7 @@ u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m)
 			clock /= m3;
 		return clock;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	return 0;
 }
@@ -1169,9 +1169,7 @@ int ssb_bus_may_powerdown(struct ssb_bus *bus)
 	if (err)
 		goto error;
 out:
-#ifdef CONFIG_SSB_DEBUG
 	bus->powered_up = 0;
-#endif
 	return err;
 error:
 	pr_err("Bus powerdown failed\n");
@@ -1188,9 +1186,7 @@ int ssb_bus_powerup(struct ssb_bus *bus, bool dynamic_pctl)
 	if (err)
 		goto error;
 
-#ifdef CONFIG_SSB_DEBUG
 	bus->powered_up = 1;
-#endif
 
 	mode = dynamic_pctl ? SSB_CLKMODE_DYNAMIC : SSB_CLKMODE_FAST;
 	ssb_chipco_set_clockmode(&bus->chipco, mode);
@@ -1242,15 +1238,15 @@ u32 ssb_admatch_base(u32 adm)
 		base = (adm & SSB_ADM_BASE0);
 		break;
 	case SSB_ADM_TYPE1:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		base = (adm & SSB_ADM_BASE1);
 		break;
 	case SSB_ADM_TYPE2:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		base = (adm & SSB_ADM_BASE2);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	return base;
@@ -1266,15 +1262,15 @@ u32 ssb_admatch_size(u32 adm)
 		size = ((adm & SSB_ADM_SZ0) >> SSB_ADM_SZ0_SHIFT);
 		break;
 	case SSB_ADM_TYPE1:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		size = ((adm & SSB_ADM_SZ1) >> SSB_ADM_SZ1_SHIFT);
 		break;
 	case SSB_ADM_TYPE2:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		size = ((adm & SSB_ADM_SZ2) >> SSB_ADM_SZ2_SHIFT);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	size = (1 << (size + 1));
 
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index ad4308529eba..84807a9b4b13 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -946,7 +946,6 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_SSB_DEBUG
 static int ssb_pci_assert_buspower(struct ssb_bus *bus)
 {
 	if (likely(bus->powered_up))
@@ -960,12 +959,6 @@ static int ssb_pci_assert_buspower(struct ssb_bus *bus)
 
 	return -ENODEV;
 }
-#else /* DEBUG */
-static inline int ssb_pci_assert_buspower(struct ssb_bus *bus)
-{
-	return 0;
-}
-#endif /* DEBUG */
 
 static u8 ssb_pci_read8(struct ssb_device *dev, u16 offset)
 {
@@ -1024,15 +1017,15 @@ static void ssb_pci_block_read(struct ssb_device *dev, void *buffer,
 		ioread8_rep(addr, buffer, count);
 		break;
 	case sizeof(u16):
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		ioread16_rep(addr, buffer, count >> 1);
 		break;
 	case sizeof(u32):
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		ioread32_rep(addr, buffer, count >> 2);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	return;
@@ -1098,15 +1091,15 @@ static void ssb_pci_block_write(struct ssb_device *dev, const void *buffer,
 		iowrite8_rep(addr, buffer, count);
 		break;
 	case sizeof(u16):
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		iowrite16_rep(addr, buffer, count >> 1);
 		break;
 	case sizeof(u32):
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		iowrite32_rep(addr, buffer, count >> 2);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 #endif /* CONFIG_SSB_BLOCKIO */
diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c
index 20f63cc88e70..567013f8a8be 100644
--- a/drivers/ssb/pcmcia.c
+++ b/drivers/ssb/pcmcia.c
@@ -169,7 +169,7 @@ int ssb_pcmcia_switch_segment(struct ssb_bus *bus, u8 seg)
 	int err;
 	u8 val;
 
-	SSB_WARN_ON((seg != 0) && (seg != 1));
+	WARN_ON((seg != 0) && (seg != 1));
 	while (1) {
 		err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_MEMSEG, seg);
 		if (err)
@@ -299,7 +299,7 @@ static void ssb_pcmcia_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u16): {
 		__le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			*buf = (__force __le16)__raw_readw(addr);
 			buf++;
@@ -310,7 +310,7 @@ static void ssb_pcmcia_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u32): {
 		__le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			*buf = (__force __le16)__raw_readw(addr);
 			buf++;
@@ -321,7 +321,7 @@ static void ssb_pcmcia_block_read(struct ssb_device *dev, void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 unlock:
 	spin_unlock_irqrestore(&bus->bar_lock, flags);
@@ -399,7 +399,7 @@ static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u16): {
 		const __le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			__raw_writew((__force u16)(*buf), addr);
 			buf++;
@@ -410,7 +410,7 @@ static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u32): {
 		const __le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			__raw_writew((__force u16)(*buf), addr);
 			buf++;
@@ -421,7 +421,7 @@ static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 unlock:
 	mmiowb();
diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 71fbb4b3eb6a..6ceee98ed6ff 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -210,7 +210,7 @@ void ssb_iounmap(struct ssb_bus *bus)
 #ifdef CONFIG_SSB_PCIHOST
 		pci_iounmap(bus->host_pci, bus->mmio);
 #else
-		SSB_BUG_ON(1); /* Can't reach this code. */
+		WARN_ON(1); /* Can't reach this code. */
 #endif
 		break;
 	case SSB_BUSTYPE_SDIO:
@@ -236,7 +236,7 @@ static void __iomem *ssb_ioremap(struct ssb_bus *bus,
 #ifdef CONFIG_SSB_PCIHOST
 		mmio = pci_iomap(bus->host_pci, 0, ~0UL);
 #else
-		SSB_BUG_ON(1); /* Can't reach this code. */
+		WARN_ON(1); /* Can't reach this code. */
 #endif
 		break;
 	case SSB_BUSTYPE_SDIO:
diff --git a/drivers/ssb/sdio.c b/drivers/ssb/sdio.c
index 1aedc5fbb62f..7fe0afb42234 100644
--- a/drivers/ssb/sdio.c
+++ b/drivers/ssb/sdio.c
@@ -316,18 +316,18 @@ static void ssb_sdio_block_read(struct ssb_device *dev, void *buffer,
 		break;
 	}
 	case sizeof(u16): {
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		error = sdio_readsb(bus->host_sdio, buffer, offset, count);
 		break;
 	}
 	case sizeof(u32): {
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
 		error = sdio_readsb(bus->host_sdio, buffer, offset, count);
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	if (!error)
 		goto out;
@@ -423,18 +423,18 @@ static void ssb_sdio_block_write(struct ssb_device *dev, const void *buffer,
 				     (void *)buffer, count);
 		break;
 	case sizeof(u16):
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		error = sdio_writesb(bus->host_sdio, offset,
 				     (void *)buffer, count);
 		break;
 	case sizeof(u32):
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
 		error = sdio_writesb(bus->host_sdio, offset,
 				     (void *)buffer, count);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	if (!error)
 		goto out;
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 885e278c4487..5f31bdfbe77f 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -9,15 +9,6 @@
 #include <linux/types.h>
 #include <linux/bcm47xx_wdt.h>
 
-#ifdef CONFIG_SSB_DEBUG
-# define SSB_WARN_ON(x)		WARN_ON(x)
-# define SSB_BUG_ON(x)		BUG_ON(x)
-#else
-static inline int __ssb_do_nothing(int x) { return x; }
-# define SSB_WARN_ON(x)		__ssb_do_nothing(unlikely(!!(x)))
-# define SSB_BUG_ON(x)		__ssb_do_nothing(unlikely(!!(x)))
-#endif
-
 
 /* pci.c */
 #ifdef CONFIG_SSB_PCIHOST
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 3b43655cabe6..0d5a2691e7e9 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -499,11 +499,9 @@ struct ssb_bus {
 
 	/* Internal-only stuff follows. Do not touch. */
 	struct list_head list;
-#ifdef CONFIG_SSB_DEBUG
 	/* Is the bus already powered up? */
 	bool powered_up;
 	int power_warn_count;
-#endif /* DEBUG */
 };
 
 enum ssb_quirks {
-- 
cgit v1.2.3


From cb95deea0b4aa5c7c7423f4e075a3ddcd59e710b Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:29 -0400
Subject: NFS OFFLOAD_CANCEL xdr

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42xdr.c         | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c         |  1 +
 fs/nfs/nfs4xdr.c          |  1 +
 include/linux/nfs4.h      |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   | 12 +++++++++
 6 files changed, 84 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 5966e1e7b1f5..09a540d035b8 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -26,6 +26,9 @@
 					 NFS42_WRITE_RES_SIZE + \
 					 1 /* cr_consecutive */ + \
 					 1 /* cr_synchronous */)
+#define encode_offload_cancel_maxsz	(op_encode_hdr_maxsz + \
+					 XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_offload_cancel_maxsz	(op_decode_hdr_maxsz)
 #define encode_deallocate_maxsz		(op_encode_hdr_maxsz + \
 					 encode_fallocate_maxsz)
 #define decode_deallocate_maxsz		(op_decode_hdr_maxsz)
@@ -75,6 +78,12 @@
 					 decode_putfh_maxsz + \
 					 decode_copy_maxsz + \
 					 decode_commit_maxsz)
+#define NFS4_enc_offload_cancel_sz	(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_offload_cancel_maxsz)
+#define NFS4_dec_offload_cancel_sz	(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_offload_cancel_maxsz)
 #define NFS4_enc_deallocate_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
 					 encode_deallocate_maxsz + \
@@ -145,6 +154,14 @@ static void encode_copy(struct xdr_stream *xdr,
 	encode_uint32(xdr, 0); /* src server list */
 }
 
+static void encode_offload_cancel(struct xdr_stream *xdr,
+				  const struct nfs42_offload_status_args *args,
+				  struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_OFFLOAD_CANCEL, decode_offload_cancel_maxsz, hdr);
+	encode_nfs4_stateid(xdr, &args->osa_stateid);
+}
+
 static void encode_deallocate(struct xdr_stream *xdr,
 			      const struct nfs42_falloc_args *args,
 			      struct compound_hdr *hdr)
@@ -260,6 +277,25 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode OFFLOAD_CANEL request
+ */
+static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					const void *data)
+{
+	const struct nfs42_offload_status_args *args = data;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->osa_seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->osa_seq_args, &hdr);
+	encode_putfh(xdr, args->osa_src_fh, &hdr);
+	encode_offload_cancel(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * Encode DEALLOCATE request
  */
@@ -413,6 +449,12 @@ static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
 	return decode_copy_requirements(xdr, res);
 }
 
+static int decode_offload_cancel(struct xdr_stream *xdr,
+				 struct nfs42_offload_status_res *res)
+{
+	return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
+}
+
 static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 {
 	return decode_op_hdr(xdr, OP_DEALLOCATE);
@@ -512,6 +554,32 @@ out:
 	return status;
 }
 
+/*
+ * Decode OFFLOAD_CANCEL response
+ */
+static int nfs4_xdr_dec_offload_cancel(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
+				       void *data)
+{
+	struct nfs42_offload_status_res *res = data;
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->osr_seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_offload_cancel(xdr, res);
+
+out:
+	return status;
+}
+
 /*
  * Decode DEALLOCATE request
  */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 124a9f97c366..b22b41eff9c0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9620,6 +9620,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_LGOPEN
 		| NFS_CAP_ALLOCATE
 		| NFS_CAP_COPY
+		| NFS_CAP_OFFLOAD_CANCEL
 		| NFS_CAP_DEALLOCATE
 		| NFS_CAP_SEEK
 		| NFS_CAP_LAYOUTSTATS
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cd41d2577a04..b7bde12d8cd5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7789,6 +7789,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
 	PROC42(LAYOUTSTATS,	enc_layoutstats,	dec_layoutstats),
 	PROC42(CLONE,		enc_clone,		dec_clone),
 	PROC42(COPY,		enc_copy,		dec_copy),
+	PROC42(OFFLOAD_CANCEL,	enc_offload_cancel,	dec_offload_cancel),
 	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 57ffaa20d564..c44b87293229 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -527,6 +527,7 @@ enum {
 	NFSPROC4_CLNT_LAYOUTSTATS,
 	NFSPROC4_CLNT_CLONE,
 	NFSPROC4_CLNT_COPY,
+	NFSPROC4_CLNT_OFFLOAD_CANCEL,
 
 	NFSPROC4_CLNT_LOOKUPP,
 };
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2c18d618604e..fbc735f08d7e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -255,5 +255,6 @@ struct nfs_server {
 #define NFS_CAP_LAYOUTSTATS	(1U << 22)
 #define NFS_CAP_CLONE		(1U << 23)
 #define NFS_CAP_COPY		(1U << 24)
+#define NFS_CAP_OFFLOAD_CANCEL	(1U << 25)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3b7325cfb291..85e928a56cef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1403,6 +1403,18 @@ struct nfs42_copy_res {
 	struct nfs_commitres		commit_res;
 };
 
+struct nfs42_offload_status_args {
+	struct nfs4_sequence_args	osa_seq_args;
+	struct nfs_fh			*osa_src_fh;
+	nfs4_stateid			osa_stateid;
+};
+
+struct nfs42_offload_status_res {
+	struct nfs4_sequence_res	osr_seq_res;
+	uint64_t			osr_count;
+	int				osr_status;
+};
+
 struct nfs42_seek_args {
 	struct nfs4_sequence_args	seq_args;
 
-- 
cgit v1.2.3


From 67aa7444c4beb40aafedd8d2c60bbcc54987adda Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:30 -0400
Subject: NFS COPY xdr handle async reply

If server returns async reply, it must include a callback stateid,
wr_callback_id in the write_response4.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42xdr.c       | 22 ++++++++++++----------
 include/linux/nfs_xdr.h |  1 +
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 09a540d035b8..205c3567ea08 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -389,21 +389,23 @@ static int decode_write_response(struct xdr_stream *xdr,
 				 struct nfs42_write_res *res)
 {
 	__be32 *p;
+	int status, count;
 
-	p = xdr_inline_decode(xdr, 4 + 8 + 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(!p))
 		goto out_overflow;
-
-	/*
-	 * We never use asynchronous mode, so warn if a server returns
-	 * a stateid.
-	 */
-	if (unlikely(*p != 0)) {
-		pr_err_once("%s: server has set unrequested "
-				"asynchronous mode\n", __func__);
+	count = be32_to_cpup(p);
+	if (count > 1)
 		return -EREMOTEIO;
+	else if (count == 1) {
+		status = decode_opaque_fixed(xdr, &res->stateid,
+				NFS4_STATEID_SIZE);
+		if (unlikely(status))
+			goto out_overflow;
 	}
-	p++;
+	p = xdr_inline_decode(xdr, 8 + 4);
+	if (unlikely(!p))
+		goto out_overflow;
 	p = xdr_decode_hyper(p, &res->count);
 	res->verifier.committed = be32_to_cpup(p);
 	return decode_verifier(xdr, &res->verifier.verifier);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 85e928a56cef..06ddfa31cbef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1391,6 +1391,7 @@ struct nfs42_copy_args {
 };
 
 struct nfs42_write_res {
+	nfs4_stateid		stateid;
 	u64			count;
 	struct nfs_writeverf	verifier;
 };
-- 
cgit v1.2.3


From 62164f317972fcd36590578888f33a1994dda519 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:31 -0400
Subject: NFS add support for asynchronous COPY

Change xdr to always send COPY asynchronously.

Keep the list copies send in a list under a server structure.
Once copy is sent, it waits on a completion structure that will
be signalled by the callback thread that receives CB_OFFLOAD.

If CB_OFFLOAD returned an error and even if it returned partial
bytes, ignore them (as we can't commit without a verifier to
match) and return an error.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_proc.c    | 38 +++++++++++++++++++++++++++++++-
 fs/nfs/client.c           |  1 +
 fs/nfs/nfs42proc.c        | 55 ++++++++++++++++++++++++++++++++++++++++++-----
 fs/nfs/nfs42xdr.c         |  8 ++++---
 include/linux/nfs_fs.h    |  9 ++++++++
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 7 files changed, 104 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d6f45bd176a9..acdda259912e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -662,9 +662,45 @@ __be32 nfs4_callback_notify_lock(void *argp, void *resp,
 }
 #endif /* CONFIG_NFS_V4_1 */
 #ifdef CONFIG_NFS_V4_2
-__be32 nfs4_callback_offload(void *args, void *dummy,
+static void nfs4_copy_cb_args(struct nfs4_copy_state *cp_state,
+				struct cb_offloadargs *args)
+{
+	cp_state->count = args->wr_count;
+	cp_state->error = args->error;
+	if (!args->error) {
+		cp_state->verf.committed = args->wr_writeverf.committed;
+		memcpy(&cp_state->verf.verifier.data[0],
+			&args->wr_writeverf.verifier.data[0],
+			NFS4_VERIFIER_SIZE);
+	}
+}
+
+__be32 nfs4_callback_offload(void *data, void *dummy,
 			     struct cb_process_state *cps)
 {
+	struct cb_offloadargs *args = data;
+	struct nfs_server *server;
+	struct nfs4_copy_state *copy;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &cps->clp->cl_superblocks,
+				client_link) {
+		spin_lock(&server->nfs_client->cl_lock);
+		list_for_each_entry(copy, &server->ss_copies, copies) {
+			if (memcmp(args->coa_stateid.other,
+					copy->stateid.other,
+					sizeof(args->coa_stateid.other)))
+				continue;
+			nfs4_copy_cb_args(copy, args);
+			complete(&copy->completion);
+			spin_unlock(&server->nfs_client->cl_lock);
+			goto out;
+		}
+		spin_unlock(&server->nfs_client->cl_lock);
+	}
+out:
+	rcu_read_unlock();
+
 	return 0;
 }
 #endif /* CONFIG_NFS_V4_2 */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 377a61654a88..96d5f8135eb9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -886,6 +886,7 @@ struct nfs_server *nfs_alloc_server(void)
 	INIT_LIST_HEAD(&server->delegations);
 	INIT_LIST_HEAD(&server->layouts);
 	INIT_LIST_HEAD(&server->state_owners_lru);
+	INIT_LIST_HEAD(&server->ss_copies);
 
 	atomic_set(&server->active, 0);
 
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 5f59b6f65a42..023aea8f6cf1 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -130,6 +130,37 @@ out_unlock:
 	return err;
 }
 
+static int handle_async_copy(struct nfs42_copy_res *res,
+			     struct nfs_server *server,
+			     struct file *src,
+			     struct file *dst,
+			     nfs4_stateid *src_stateid)
+{
+	struct nfs4_copy_state *copy;
+	int status = NFS4_OK;
+
+	copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+	if (!copy)
+		return -ENOMEM;
+	memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
+	init_completion(&copy->completion);
+
+	spin_lock(&server->nfs_client->cl_lock);
+	list_add_tail(&copy->copies, &server->ss_copies);
+	spin_unlock(&server->nfs_client->cl_lock);
+
+	wait_for_completion_interruptible(&copy->completion);
+	spin_lock(&server->nfs_client->cl_lock);
+	list_del_init(&copy->copies);
+	spin_unlock(&server->nfs_client->cl_lock);
+	res->write_res.count = copy->count;
+	memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
+	status = -copy->error;
+
+	kfree(copy);
+	return status;
+}
+
 static ssize_t _nfs42_proc_copy(struct file *src,
 				struct nfs_lock_context *src_lock,
 				struct file *dst,
@@ -168,9 +199,13 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	if (status)
 		return status;
 
-	res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
-	if (!res->commit_res.verf)
-		return -ENOMEM;
+	res->commit_res.verf = NULL;
+	if (args->sync) {
+		res->commit_res.verf =
+			kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+		if (!res->commit_res.verf)
+			return -ENOMEM;
+	}
 	status = nfs4_call_sync(server->client, server, &msg,
 				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
@@ -178,18 +213,27 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	if (status)
 		goto out;
 
-	if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+	if (args->sync &&
+		nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
 				    &res->commit_res.verf->verifier)) {
 		status = -EAGAIN;
 		goto out;
 	}
 
+	if (!res->synchronous) {
+		status = handle_async_copy(res, server, src, dst,
+				&args->src_stateid);
+		if (status)
+			return status;
+	}
+
 	truncate_pagecache_range(dst_inode, pos_dst,
 				 pos_dst + res->write_res.count);
 
 	status = res->write_res.count;
 out:
-	kfree(res->commit_res.verf);
+	if (args->sync)
+		kfree(res->commit_res.verf);
 	return status;
 }
 
@@ -206,6 +250,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 		.dst_fh		= NFS_FH(file_inode(dst)),
 		.dst_pos	= pos_dst,
 		.count		= count,
+		.sync		= false,
 	};
 	struct nfs42_copy_res res;
 	struct nfs4_exception src_exception = {
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 205c3567ea08..69f72ed2bf87 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -150,7 +150,7 @@ static void encode_copy(struct xdr_stream *xdr,
 	encode_uint64(xdr, args->count);
 
 	encode_uint32(xdr, 1); /* consecutive = true */
-	encode_uint32(xdr, 1); /* synchronous = true */
+	encode_uint32(xdr, args->sync);
 	encode_uint32(xdr, 0); /* src server list */
 }
 
@@ -273,7 +273,8 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 	encode_savefh(xdr, &hdr);
 	encode_putfh(xdr, args->dst_fh, &hdr);
 	encode_copy(xdr, args, &hdr);
-	encode_copy_commit(xdr, args, &hdr);
+	if (args->sync)
+		encode_copy_commit(xdr, args, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -551,7 +552,8 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
 	status = decode_copy(xdr, res);
 	if (status)
 		goto out;
-	status = decode_commit(xdr, &res->commit_res);
+	if (res->commit_res.verf)
+		status = decode_commit(xdr, &res->commit_res);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2f129bbfaae8..645ad8e342f6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,6 +185,15 @@ struct nfs_inode {
 	struct inode		vfs_inode;
 };
 
+struct nfs4_copy_state {
+	struct list_head	copies;
+	nfs4_stateid		stateid;
+	struct completion	completion;
+	uint64_t		count;
+	struct nfs_writeverf	verf;
+	int			error;
+};
+
 /*
  * Access bit flags
  */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fbc735f08d7e..f88952d7b9fb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -208,6 +208,7 @@ struct nfs_server {
 	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
+	struct list_head	ss_copies;
 
 	unsigned long		mig_gen;
 	unsigned long		mig_status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 06ddfa31cbef..bd1c889a9ed9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1388,6 +1388,7 @@ struct nfs42_copy_args {
 	u64				dst_pos;
 
 	u64				count;
+	bool				sync;
 };
 
 struct nfs42_write_res {
-- 
cgit v1.2.3


From bc0c9079b48ddcf1f8a6e1aaa277288b263c78d8 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:32 -0400
Subject: NFS handle COPY reply CB_OFFLOAD call race

It's possible that server replies back with CB_OFFLOAD call and
COPY reply at the same time such that client will process
CB_OFFLOAD before reply to COPY. For that keep a list of pending
callback stateids received and then before waiting on completion
check the pending list.

Cleanup any pending copies on the client shutdown.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_proc.c    | 17 ++++++++++++++---
 fs/nfs/nfs42proc.c        | 22 ++++++++++++++++++++--
 fs/nfs/nfs4client.c       | 15 +++++++++++++++
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 50 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index acdda259912e..cd733649646b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -681,11 +681,12 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 	struct cb_offloadargs *args = data;
 	struct nfs_server *server;
 	struct nfs4_copy_state *copy;
+	bool found = false;
 
+	spin_lock(&cps->clp->cl_lock);
 	rcu_read_lock();
 	list_for_each_entry_rcu(server, &cps->clp->cl_superblocks,
 				client_link) {
-		spin_lock(&server->nfs_client->cl_lock);
 		list_for_each_entry(copy, &server->ss_copies, copies) {
 			if (memcmp(args->coa_stateid.other,
 					copy->stateid.other,
@@ -693,13 +694,23 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 				continue;
 			nfs4_copy_cb_args(copy, args);
 			complete(&copy->completion);
-			spin_unlock(&server->nfs_client->cl_lock);
+			found = true;
 			goto out;
 		}
-		spin_unlock(&server->nfs_client->cl_lock);
 	}
 out:
 	rcu_read_unlock();
+	if (!found) {
+		copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+		if (!copy) {
+			spin_unlock(&cps->clp->cl_lock);
+			return htonl(NFS4ERR_SERVERFAULT);
+		}
+		memcpy(&copy->stateid, &args->coa_stateid, NFS4_STATEID_SIZE);
+		nfs4_copy_cb_args(copy, args);
+		list_add_tail(&copy->copies, &cps->clp->pending_cb_stateids);
+	}
+	spin_unlock(&cps->clp->cl_lock);
 
 	return 0;
 }
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 023aea8f6cf1..c7d31f72070e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -138,14 +138,31 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 {
 	struct nfs4_copy_state *copy;
 	int status = NFS4_OK;
+	bool found_pending = false;
+
+	spin_lock(&server->nfs_client->cl_lock);
+	list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids,
+				copies) {
+		if (memcmp(&res->write_res.stateid, &copy->stateid,
+				NFS4_STATEID_SIZE))
+			continue;
+		found_pending = true;
+		list_del(&copy->copies);
+		break;
+	}
+	if (found_pending) {
+		spin_unlock(&server->nfs_client->cl_lock);
+		goto out;
+	}
 
 	copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
-	if (!copy)
+	if (!copy) {
+		spin_unlock(&server->nfs_client->cl_lock);
 		return -ENOMEM;
+	}
 	memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
 	init_completion(&copy->completion);
 
-	spin_lock(&server->nfs_client->cl_lock);
 	list_add_tail(&copy->copies, &server->ss_copies);
 	spin_unlock(&server->nfs_client->cl_lock);
 
@@ -153,6 +170,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	spin_lock(&server->nfs_client->cl_lock);
 	list_del_init(&copy->copies);
 	spin_unlock(&server->nfs_client->cl_lock);
+out:
 	res->write_res.count = copy->count;
 	memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
 	status = -copy->error;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d7124fb12041..146e30862234 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -156,9 +156,23 @@ nfs4_shutdown_ds_clients(struct nfs_client *clp)
 	}
 }
 
+static void
+nfs4_cleanup_callback(struct nfs_client *clp)
+{
+	struct nfs4_copy_state *cp_state;
+
+	while (!list_empty(&clp->pending_cb_stateids)) {
+		cp_state = list_entry(clp->pending_cb_stateids.next,
+					struct nfs4_copy_state, copies);
+		list_del(&cp_state->copies);
+		kfree(cp_state);
+	}
+}
+
 void nfs41_shutdown_client(struct nfs_client *clp)
 {
 	if (nfs4_has_session(clp)) {
+		nfs4_cleanup_callback(clp);
 		nfs4_shutdown_ds_clients(clp);
 		nfs4_destroy_session(clp->cl_session);
 		nfs4_destroy_clientid(clp);
@@ -202,6 +216,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
 #if IS_ENABLED(CONFIG_NFS_V4_1)
 	init_waitqueue_head(&clp->cl_lock_waitq);
 #endif
+	INIT_LIST_HEAD(&clp->pending_cb_stateids);
 	return clp;
 
 error:
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index f88952d7b9fb..bf39d9c92201 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -121,6 +121,7 @@ struct nfs_client {
 #endif
 
 	struct net		*cl_net;
+	struct list_head	pending_cb_stateids;
 };
 
 /*
-- 
cgit v1.2.3


From c3ad2c3b02e953ead2b8d52a0c9e70312930c3d0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 15:20:37 -0500
Subject: signal: Don't restart fork when signals come in.

Wen Yang <wen.yang99@zte.com.cn> and majiang <ma.jiang@zte.com.cn>
report that a periodic signal received during fork can cause fork to
continually restart preventing an application from making progress.

The code was being overly pessimistic.  Fork needs to guarantee that a
signal sent to multiple processes is logically delivered before the
fork and just to the forking process or logically delivered after the
fork to both the forking process and it's newly spawned child.  For
signals like periodic timers that are always delivered to a single
process fork can safely complete and let them appear to logically
delivered after the fork().

While examining this issue I also discovered that fork today will miss
signals delivered to multiple processes during the fork and handled by
another thread.  Similarly the current code will also miss blocked
signals that are delivered to multiple process, as those signals will
not appear pending during fork.

Add a list of each thread that is currently forking, and keep on that
list a signal set that records all of the signals sent to multiple
processes.  When fork completes initialize the new processes
shared_pending signal set with it.  The calculate_sigpending function
will see those signals and set TIF_SIGPENDING causing the new task to
take the slow path to userspace to handle those signals.  Making it
appear as if those signals were received immediately after the fork.

It is not possible to send real time signals to multiple processes and
exceptions don't go to multiple processes, which means that that are
no signals sent to multiple processes that require siginfo.  This
means it is safe to not bother collecting siginfo on signals sent
during fork.

The sigaction of a child of fork is initially the same as the
sigaction of the parent process.  So a signal the parent ignores the
child will also initially ignore.  Therefore it is safe to ignore
signals sent to multiple processes and ignored by the forking process.

Signals sent to only a single process or only a single thread and delivered
during fork are treated as if they are received after the fork, and generally
not dealt with.  They won't cause any problems.

V2: Added removal from the multiprocess list on failure.
V3: Use -ERESTARTNOINTR directly
V4: - Don't queue both SIGCONT and SIGSTOP
    - Initialize signal_struct.multiprocess in init_task
    - Move setting of shared_pending to before the new task
      is visible to signals.  This prevents signals from comming
      in before shared_pending.signal is set to delayed.signal
      and being lost.
V5: - rework list add and delete to account for idle threads
v6: - Use sigdelsetmask when removing stop signals

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200447
Reported-by: Wen Yang <wen.yang99@zte.com.cn> and
Reported-by: majiang <ma.jiang@zte.com.cn>
Fixes: 4a2c7a7837da ("[PATCH] make fork() atomic wrt pgrp/session signals")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  8 ++++++++
 init/init_task.c             |  1 +
 kernel/fork.c                | 43 +++++++++++++++++++++++++------------------
 kernel/signal.c              | 15 +++++++++++++++
 4 files changed, 49 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ae2b0b81be25..4e9b77fb702d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -69,6 +69,11 @@ struct thread_group_cputimer {
 	bool checking_timer;
 };
 
+struct multiprocess_signals {
+	sigset_t signal;
+	struct hlist_node node;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
@@ -90,6 +95,9 @@ struct signal_struct {
 	/* shared signal handling: */
 	struct sigpending	shared_pending;
 
+	/* For collecting multiprocess signals during fork */
+	struct hlist_head	multiprocess;
+
 	/* thread group exit support */
 	int			group_exit_code;
 	/* overloaded:
diff --git a/init/init_task.c b/init/init_task.c
index 4f97846256d7..5aebe3be4d7c 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -22,6 +22,7 @@ static struct signal_struct init_signals = {
 		.list = LIST_HEAD_INIT(init_signals.shared_pending.list),
 		.signal =  {{0}}
 	},
+	.multiprocess	= HLIST_HEAD_INIT,
 	.rlim		= INIT_RLIMITS,
 	.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
 #ifdef CONFIG_POSIX_TIMERS
diff --git a/kernel/fork.c b/kernel/fork.c
index ab731e15a600..411e34acace7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1456,6 +1456,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	init_waitqueue_head(&sig->wait_chldexit);
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
+	INIT_HLIST_HEAD(&sig->multiprocess);
 	seqlock_init(&sig->stats_lock);
 	prev_cputime_init(&sig->prev_cputime);
 
@@ -1602,6 +1603,7 @@ static __latent_entropy struct task_struct *copy_process(
 {
 	int retval;
 	struct task_struct *p;
+	struct multiprocess_signals delayed;
 
 	/*
 	 * Don't allow sharing the root directory with processes in a different
@@ -1649,6 +1651,24 @@ static __latent_entropy struct task_struct *copy_process(
 			return ERR_PTR(-EINVAL);
 	}
 
+	/*
+	 * Force any signals received before this point to be delivered
+	 * before the fork happens.  Collect up signals sent to multiple
+	 * processes that happen during the fork and delay them so that
+	 * they appear to happen after the fork.
+	 */
+	sigemptyset(&delayed.signal);
+	INIT_HLIST_NODE(&delayed.node);
+
+	spin_lock_irq(&current->sighand->siglock);
+	if (!(clone_flags & CLONE_THREAD))
+		hlist_add_head(&delayed.node, &current->signal->multiprocess);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	retval = -ERESTARTNOINTR;
+	if (signal_pending(current))
+		goto fork_out;
+
 	retval = -ENOMEM;
 	p = dup_task_struct(current, node);
 	if (!p)
@@ -1934,22 +1954,6 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cancel_cgroup;
 	}
 
-	if (!(clone_flags & CLONE_THREAD)) {
-		/*
-		 * Process group and session signals need to be delivered to just the
-		 * parent before the fork or both the parent and the child after the
-		 * fork. Restart if a signal comes in before we add the new process to
-		 * it's process group.
-		 * A fatal signal pending means that current will exit, so the new
-		 * thread can't slip out of an OOM kill (or normal SIGKILL).
-		 */
-		recalc_sigpending();
-		if (signal_pending(current)) {
-			retval = -ERESTARTNOINTR;
-			goto bad_fork_cancel_cgroup;
-		}
-	}
-
 
 	init_task_pid_links(p);
 	if (likely(p->pid)) {
@@ -1965,7 +1969,7 @@ static __latent_entropy struct task_struct *copy_process(
 				ns_of_pid(pid)->child_reaper = p;
 				p->signal->flags |= SIGNAL_UNKILLABLE;
 			}
-
+			p->signal->shared_pending.signal = delayed.signal;
 			p->signal->tty = tty_kref_get(current->signal->tty);
 			/*
 			 * Inherit has_child_subreaper flag under the same
@@ -1993,8 +1997,8 @@ static __latent_entropy struct task_struct *copy_process(
 		attach_pid(p, PIDTYPE_PID);
 		nr_threads++;
 	}
-
 	total_forks++;
+	hlist_del_init(&delayed.node);
 	spin_unlock(&current->sighand->siglock);
 	syscall_tracepoint_update(p);
 	write_unlock_irq(&tasklist_lock);
@@ -2059,6 +2063,9 @@ bad_fork_free:
 	put_task_stack(p);
 	free_task(p);
 fork_out:
+	spin_lock_irq(&current->sighand->siglock);
+	hlist_del_init(&delayed.node);
+	spin_unlock_irq(&current->sighand->siglock);
 	return ERR_PTR(retval);
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 9f0eafb6d474..cfa9d10e731a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1121,6 +1121,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 out_set:
 	signalfd_notify(t, sig);
 	sigaddset(&pending->signal, sig);
+
+	/* Let multiprocess signals appear after on-going forks */
+	if (type > PIDTYPE_TGID) {
+		struct multiprocess_signals *delayed;
+		hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
+			sigset_t *signal = &delayed->signal;
+			/* Can't queue both a stop and a continue signal */
+			if (sig == SIGCONT)
+				sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
+			else if (sig_kernel_stop(sig))
+				sigdelset(signal, SIGCONT);
+			sigaddset(signal, sig);
+		}
+	}
+
 	complete_signal(sig, t, type);
 ret:
 	trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);
-- 
cgit v1.2.3


From 624c0f0239f04cce78c86afa95eb2841c84fbab1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Aug 2018 15:38:38 +0200
Subject: phylink: add helper for configuring 2500BaseX modes

Add a helper for MAC drivers to use in their validate callback to deal
with 2500BaseX vs 1000BaseX modes, where the hardware supports both
but it is not possible to automatically select between them.

This helper defaults to 1000BaseX, as that is the 802.3 standard, and
will allow users to select 2500BaseX either by forcing the speed if
AN is disabled, or by changing the advertising mask if AN is enabled.
Disabling AN is not recommended as it is only the speed that we're
interested in controlling, not the duplex or pause mode parameters.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 30 ++++++++++++++++++++++++++++++
 include/linux/phylink.h   |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index af4dc4425be2..3ba5cf2a8a5f 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1688,4 +1688,34 @@ static const struct sfp_upstream_ops sfp_phylink_ops = {
 	.disconnect_phy = phylink_sfp_disconnect_phy,
 };
 
+/* Helpers for MAC drivers */
+
+/**
+ * phylink_helper_basex_speed() - 1000BaseX/2500BaseX helper
+ * @state: a pointer to a &struct phylink_link_state
+ *
+ * Inspect the interface mode, advertising mask or forced speed and
+ * decide whether to run at 2.5Gbit or 1Gbit appropriately, switching
+ * the interface mode to suit.  @state->interface is appropriately
+ * updated, and the advertising mask has the "other" baseX_Full flag
+ * cleared.
+ */
+void phylink_helper_basex_speed(struct phylink_link_state *state)
+{
+	if (phy_interface_mode_is_8023z(state->interface)) {
+		bool want_2500 = state->an_enabled ?
+			phylink_test(state->advertising, 2500baseX_Full) :
+			state->speed == SPEED_2500;
+
+		if (want_2500) {
+			phylink_clear(state->advertising, 1000baseX_Full);
+			state->interface = PHY_INTERFACE_MODE_2500BASEX;
+		} else {
+			phylink_clear(state->advertising, 2500baseX_Full);
+			state->interface = PHY_INTERFACE_MODE_1000BASEX;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(phylink_helper_basex_speed);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 50eeae025f1e..021fc6595856 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -234,5 +234,6 @@ int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
 #define phylink_test(bm, mode)	__phylink_do_bit(test_bit, bm, mode)
 
 void phylink_set_port_modes(unsigned long *bits);
+void phylink_helper_basex_speed(struct phylink_link_state *state);
 
 #endif
-- 
cgit v1.2.3


From 48ae5554a076c1bca31448d60263e4038def9f6f Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Wed, 8 Aug 2018 09:04:29 +0100
Subject: net: stmmac: Add XGMAC 2.10 HWIF entry

Add a new entry to HWIF table for XGMAC 2.10. For now we fill it with
empty callbacks which will be added in posterior patches.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h | 14 +++++++------
 drivers/net/ethernet/stmicro/stmmac/hwif.c   | 31 ++++++++++++++++++++++++++--
 include/linux/stmmac.h                       |  1 +
 3 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 78fd0f8b8e81..3fb81acbd274 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -36,12 +36,14 @@
 #include "mmc.h"
 
 /* Synopsys Core versions */
-#define	DWMAC_CORE_3_40	0x34
-#define	DWMAC_CORE_3_50	0x35
-#define	DWMAC_CORE_4_00	0x40
-#define DWMAC_CORE_4_10	0x41
-#define DWMAC_CORE_5_00 0x50
-#define DWMAC_CORE_5_10 0x51
+#define	DWMAC_CORE_3_40		0x34
+#define	DWMAC_CORE_3_50		0x35
+#define	DWMAC_CORE_4_00		0x40
+#define DWMAC_CORE_4_10		0x41
+#define DWMAC_CORE_5_00		0x50
+#define DWMAC_CORE_5_10		0x51
+#define DWXGMAC_CORE_2_10	0x21
+
 #define STMMAC_CHAN0	0	/* Always supported and default for all chips */
 
 /* These need to be power of two, and >= 4 */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 1f50e83cafb2..24f5ff175aa4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -72,6 +72,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 static const struct stmmac_hwif_entry {
 	bool gmac;
 	bool gmac4;
+	bool xgmac;
 	u32 min_id;
 	const struct stmmac_regs_off regs;
 	const void *desc;
@@ -87,6 +88,7 @@ static const struct stmmac_hwif_entry {
 	{
 		.gmac = false,
 		.gmac4 = false,
+		.xgmac = false,
 		.min_id = 0,
 		.regs = {
 			.ptp_off = PTP_GMAC3_X_OFFSET,
@@ -103,6 +105,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = true,
 		.gmac4 = false,
+		.xgmac = false,
 		.min_id = 0,
 		.regs = {
 			.ptp_off = PTP_GMAC3_X_OFFSET,
@@ -119,6 +122,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = 0,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -135,6 +139,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = DWMAC_CORE_4_00,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -151,6 +156,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = DWMAC_CORE_4_10,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -167,6 +173,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = DWMAC_CORE_5_10,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -180,11 +187,29 @@ static const struct stmmac_hwif_entry {
 		.tc = &dwmac510_tc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
-	}
+	}, {
+		.gmac = false,
+		.gmac4 = false,
+		.xgmac = true,
+		.min_id = DWXGMAC_CORE_2_10,
+		.regs = {
+			.ptp_off = 0,
+			.mmc_off = 0,
+		},
+		.desc = NULL,
+		.dma = NULL,
+		.mac = NULL,
+		.hwtimestamp = NULL,
+		.mode = NULL,
+		.tc = NULL,
+		.setup = NULL,
+		.quirks = NULL,
+	},
 };
 
 int stmmac_hwif_init(struct stmmac_priv *priv)
 {
+	bool needs_xgmac = priv->plat->has_xgmac;
 	bool needs_gmac4 = priv->plat->has_gmac4;
 	bool needs_gmac = priv->plat->has_gmac;
 	const struct stmmac_hwif_entry *entry;
@@ -195,7 +220,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
 
 	if (needs_gmac) {
 		id = stmmac_get_id(priv, GMAC_VERSION);
-	} else if (needs_gmac4) {
+	} else if (needs_gmac4 || needs_xgmac) {
 		id = stmmac_get_id(priv, GMAC4_VERSION);
 	} else {
 		id = 0;
@@ -229,6 +254,8 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
 			continue;
 		if (needs_gmac4 ^ entry->gmac4)
 			continue;
+		if (needs_xgmac ^ entry->xgmac)
+			continue;
 		/* Use synopsys_id var because some setups can override this */
 		if (priv->synopsys_id < entry->min_id)
 			continue;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 32feac5bbd75..c43e9a01b892 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -190,5 +190,6 @@ struct plat_stmmacenet_data {
 	bool tso_en;
 	int mac_port_sel_speed;
 	bool en_tx_lpi_clockgating;
+	int has_xgmac;
 };
 #endif
-- 
cgit v1.2.3


From 64bed6cbe38bc95689fb9399872d9ce250192f90 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 13 Jul 2018 17:22:24 +0300
Subject: nfsd: fix leaked file lock with nfs exported overlayfs

nfsd and lockd call vfs_lock_file() to lock/unlock the inode
returned by locks_inode(file).

Many places in nfsd/lockd code use the inode returned by
file_inode(file) for lock manipulation. With Overlayfs, file_inode()
(the underlying inode) is not the same object as locks_inode() (the
overlay inode). This can result in "Leaked POSIX lock" messages
and eventually to a kernel crash as reported by Eddie Horng:
https://marc.info/?l=linux-unionfs&m=153086643202072&w=2

Fix all the call sites in nfsd/lockd that should use locks_inode().
This is a correctness bug that manifested when overlayfs gained
NFS export support in v4.16.

Reported-by: Eddie Horng <eddiehorng.tw@gmail.com>
Tested-by: Eddie Horng <eddiehorng.tw@gmail.com>
Cc: Jeff Layton <jlayton@kernel.org>
Fixes: 8383f1748829 ("ovl: wire up NFS export operations")
Cc: stable@vger.kernel.org
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/clntlock.c         |  2 +-
 fs/lockd/clntproc.c         |  2 +-
 fs/lockd/svclock.c          | 16 ++++++++--------
 fs/lockd/svcsubs.c          |  4 ++--
 fs/nfsd/nfs4state.c         |  2 +-
 include/linux/lockd/lockd.h |  4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 96c1d14c18f1..c2a128678e6e 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -187,7 +187,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
 			continue;
 		if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
 			continue;
-		if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0)
+		if (nfs_compare_fh(NFS_FH(locks_inode(fl_blocked->fl_file)), fh) != 0)
 			continue;
 		/* Alright, we found a lock. Set the return status
 		 * and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a2c0dfc6fdc0..d20b92f271c2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -128,7 +128,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	char *nodename = req->a_host->h_rpcclnt->cl_nodename;
 
 	nlmclnt_next_cookie(&argp->cookie);
-	memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
+	memcpy(&lock->fh, NFS_FH(locks_inode(fl->fl_file)), sizeof(struct nfs_fh));
 	lock->caller  = nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3701bccab478..74330daeab71 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -405,8 +405,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_type, lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end,
@@ -511,8 +511,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_type,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -566,8 +566,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
 	int	error;
 
 	dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -595,8 +595,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
 	int status = 0;
 
 	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 4ec3d6e03e76..899360ba3b84 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -44,7 +44,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 
 static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
 {
-	struct inode *inode = file_inode(file->f_file);
+	struct inode *inode = locks_inode(file->f_file);
 
 	dprintk("lockd: %s %s/%ld\n",
 		msg, inode->i_sb->s_id, inode->i_ino);
@@ -414,7 +414,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
 {
 	struct super_block *sb = datap;
 
-	return sb == file_inode(file->f_file)->i_sb;
+	return sb == locks_inode(file->f_file)->i_sb;
 }
 
 /**
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 19c4d29917ee..c8e4a8f42bbe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6322,7 +6322,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 		return status;
 	}
 
-	inode = file_inode(filp);
+	inode = locks_inode(filp);
 	flctx = inode->i_flctx;
 
 	if (flctx && !list_empty_careful(&flctx->flc_posix)) {
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 4fd95dbeb52f..b065ef406770 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -299,7 +299,7 @@ int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
 
 static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 {
-	return file_inode(file->f_file);
+	return locks_inode(file->f_file);
 }
 
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
@@ -359,7 +359,7 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 static inline int nlm_compare_locks(const struct file_lock *fl1,
 				    const struct file_lock *fl2)
 {
-	return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
+	return locks_inode(fl1->fl_file) == locks_inode(fl2->fl_file)
 	     && fl1->fl_pid   == fl2->fl_pid
 	     && fl1->fl_owner == fl2->fl_owner
 	     && fl1->fl_start == fl2->fl_start
-- 
cgit v1.2.3


From 3fd9557aec919e2db99365ad5a2c00d04ae8893c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Jul 2018 11:19:05 -0400
Subject: NFSD: Refactor the generic write vector fill helper

fill_in_write_vector() is nearly the same logic as
svc_fill_write_vector(), but there are a few differences so that
the former can handle multiple WRITE payloads in a single COMPOUND.

svc_fill_write_vector() can be adjusted so that it can be used in
the NFSv4 WRITE code path too. Instead of assuming the pages are
coming from rq_args.pages, have the caller pass in the page list.

The immediate benefit is a reduction of code duplication. It also
prevents the NFSv4 WRITE decoder from passing an empty vector
element when the transport has provided the payload in the xdr_buf's
page array.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c         |  3 ++-
 fs/nfsd/nfs4proc.c         | 23 ++++-------------------
 fs/nfsd/nfsproc.c          |  3 ++-
 include/linux/sunrpc/svc.h |  1 +
 net/sunrpc/svc.c           | 11 ++++-------
 5 files changed, 13 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6259a4b8579f..8d1c2d1a159b 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -202,7 +202,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
 
 	fh_copy(&resp->fh, &argp->fh);
 	resp->committed = argp->stable;
-	nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+				      &argp->first, cnt);
 	if (!nvecs)
 		RETURN_STATUS(nfserr_io);
 	nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3652f9b1fb68..b7bc6e1a85ac 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -986,24 +986,6 @@ out:
 	return status;
 }
 
-static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
-{
-        int i = 1;
-        int buflen = write->wr_buflen;
-
-        vec[0].iov_base = write->wr_head.iov_base;
-        vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
-        buflen -= vec[0].iov_len;
-
-        while (buflen) {
-                vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
-                vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
-                buflen -= vec[i].iov_len;
-                i++;
-        }
-        return i;
-}
-
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    union nfsd4_op_u *u)
@@ -1031,7 +1013,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	write->wr_how_written = write->wr_stable_how;
 	gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
 
-	nvecs = fill_in_write_vector(rqstp->rq_vec, write);
+	nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
+				      &write->wr_head, write->wr_buflen);
+	if (!nvecs)
+		return nfserr_io;
 	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
 	status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index f107f9fa8e15..a6faee562b31 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -218,7 +218,8 @@ nfsd_proc_write(struct svc_rqst *rqstp)
 		SVCFH_fmt(&argp->fh),
 		argp->len, argp->offset);
 
-	nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+				      &argp->first, cnt);
 	if (!nvecs)
 		return nfserr_io;
 	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 574368e8a16f..43f88bd7b601 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -496,6 +496,7 @@ void		   svc_reserve(struct svc_rqst *rqstp, int space);
 struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
 unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
+					 struct page **pages,
 					 struct kvec *first, size_t total);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, size_t total);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 30a4226baf03..2194ed507991 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1537,16 +1537,16 @@ EXPORT_SYMBOL_GPL(svc_max_payload);
 /**
  * svc_fill_write_vector - Construct data argument for VFS write call
  * @rqstp: svc_rqst to operate on
+ * @pages: list of pages containing data payload
  * @first: buffer containing first section of write payload
  * @total: total number of bytes of write payload
  *
- * Returns the number of elements populated in the data argument array.
+ * Fills in rqstp::rq_vec, and returns the number of elements.
  */
-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
-				   size_t total)
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
+				   struct kvec *first, size_t total)
 {
 	struct kvec *vec = rqstp->rq_vec;
-	struct page **pages;
 	unsigned int i;
 
 	/* Some types of transport can present the write payload
@@ -1560,14 +1560,11 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
 		++i;
 	}
 
-	WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
-	pages = rqstp->rq_arg.pages;
 	while (total) {
 		vec[i].iov_base = page_address(*pages);
 		vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
 		total -= vec[i].iov_len;
 		++i;
-
 		++pages;
 	}
 
-- 
cgit v1.2.3


From 11b4d66ea3313d9b03a83b80458ddee64990e3c3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Jul 2018 11:19:10 -0400
Subject: NFSD: Handle full-length symlinks

I've given up on the idea of zero-copy handling of SYMLINK on the
server side. This is because the Linux VFS symlink API requires the
symlink pathname to be in a NUL-terminated kmalloc'd buffer. The
NUL-termination is going to be problematic (watching out for
landing on a page boundary and dealing with a 4096-byte pathname).

I don't believe that SYMLINK creation is on a performance path or is
requested frequently enough that it will cause noticeable CPU cache
pollution due to data copies.

There will be two places where a transport callout will be necessary
to fill in the rqstp: one will be in the svc_fill_symlink_pathname()
helper that is used by NFSv2 and NFSv3, and the other will be in
nfsd4_decode_create().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c         |  2 ++
 fs/nfsd/nfsproc.c          |  2 ++
 include/linux/sunrpc/svc.h |  3 ++-
 net/sunrpc/svc.c           | 67 +++++++++++++++++-----------------------------
 4 files changed, 31 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 8d1c2d1a159b..9eb8086ea841 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -290,6 +290,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
 		RETURN_STATUS(nfserr_nametoolong);
 
 	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+						page_address(rqstp->rq_arg.pages[0]),
 						argp->tlen);
 	if (IS_ERR(argp->tname))
 		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
@@ -303,6 +304,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
 	fh_init(&resp->fh, NFS3_FHSIZE);
 	nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
 						   argp->tname, &resp->fh);
+	kfree(argp->tname);
 	RETURN_STATUS(nfserr);
 }
 
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a6faee562b31..0d20fd161225 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -454,6 +454,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
 		return nfserr_nametoolong;
 
 	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+						page_address(rqstp->rq_arg.pages[0]),
 						argp->tlen);
 	if (IS_ERR(argp->tname))
 		return nfserrno(PTR_ERR(argp->tname));
@@ -466,6 +467,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
 	nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
 						 argp->tname, &newfh);
 
+	kfree(argp->tname);
 	fh_put(&argp->ffh);
 	fh_put(&newfh);
 	return nfserr;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 43f88bd7b601..73e130a840ce 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -499,7 +499,8 @@ unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
 					 struct page **pages,
 					 struct kvec *first, size_t total);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
-					     struct kvec *first, size_t total);
+					     struct kvec *first, void *p,
+					     size_t total);
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2194ed507991..d13e05f1a990 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1577,65 +1577,48 @@ EXPORT_SYMBOL_GPL(svc_fill_write_vector);
  * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
  * @rqstp: svc_rqst to operate on
  * @first: buffer containing first section of pathname
+ * @p: buffer containing remaining section of pathname
  * @total: total length of the pathname argument
  *
- * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
- * released automatically when @rqstp is recycled.
+ * The VFS symlink API demands a NUL-terminated pathname in mapped memory.
+ * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free
+ * the returned string.
  */
 char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
-				size_t total)
+				void *p, size_t total)
 {
-	struct xdr_buf *arg = &rqstp->rq_arg;
-	struct page **pages;
-	char *result;
-
-	/* VFS API demands a NUL-terminated pathname. This function
-	 * uses a page from @rqstp as the pathname buffer, to enable
-	 * direct placement. Thus the total buffer size is PAGE_SIZE.
-	 * Space in this buffer for NUL-termination requires that we
-	 * cap the size of the returned symlink pathname just a
-	 * little early.
-	 */
-	if (total > PAGE_SIZE - 1)
-		return ERR_PTR(-ENAMETOOLONG);
+	size_t len, remaining;
+	char *result, *dst;
 
-	/* Some types of transport can present the pathname entirely
-	 * in rq_arg.pages. If not, then copy the pathname into one
-	 * page.
-	 */
-	pages = arg->pages;
-	WARN_ON_ONCE(arg->page_base != 0);
-	if (first->iov_base == 0) {
-		result = page_address(*pages);
-		result[total] = '\0';
-	} else {
-		size_t len, remaining;
-		char *dst;
+	result = kmalloc(total + 1, GFP_KERNEL);
+	if (!result)
+		return ERR_PTR(-ESERVERFAULT);
 
-		result = page_address(*(rqstp->rq_next_page++));
-		dst = result;
-		remaining = total;
+	dst = result;
+	remaining = total;
 
-		len = min_t(size_t, total, first->iov_len);
+	len = min_t(size_t, total, first->iov_len);
+	if (len) {
 		memcpy(dst, first->iov_base, len);
 		dst += len;
 		remaining -= len;
+	}
 
-		/* No more than one page left */
-		if (remaining) {
-			len = min_t(size_t, remaining, PAGE_SIZE);
-			memcpy(dst, page_address(*pages), len);
-			dst += len;
-		}
-
-		*dst = '\0';
+	if (remaining) {
+		len = min_t(size_t, remaining, PAGE_SIZE);
+		memcpy(dst, p, len);
+		dst += len;
 	}
 
-	/* Sanity check: we don't allow the pathname argument to
+	*dst = '\0';
+
+	/* Sanity check: Linux doesn't allow the pathname argument to
 	 * contain a NUL byte.
 	 */
-	if (strlen(result) != total)
+	if (strlen(result) != total) {
+		kfree(result);
 		return ERR_PTR(-EINVAL);
+	}
 	return result;
 }
 EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
-- 
cgit v1.2.3


From 2d2917f7747805a1f4188672f308d82a8ba01700 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 9 Aug 2018 14:04:14 -0600
Subject: PCI: Export pcie_has_flr()

pcie_flr() suggests pcie_has_flr() to ensure that PCIe FLR support is
present prior to calling.  pcie_flr() is exported while pcie_has_flr()
is not.  Resolve this.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c   | 3 ++-
 include/linux/pci.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d123c2b173da..85e5b80a69a7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4039,7 +4039,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
  * Returns true if the device advertises support for PCIe function level
  * resets.
  */
-static bool pcie_has_flr(struct pci_dev *dev)
+bool pcie_has_flr(struct pci_dev *dev)
 {
 	u32 cap;
 
@@ -4049,6 +4049,7 @@ static bool pcie_has_flr(struct pci_dev *dev)
 	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
 	return cap & PCI_EXP_DEVCAP_FLR;
 }
+EXPORT_SYMBOL_GPL(pcie_has_flr);
 
 /**
  * pcie_flr - initiate a PCIe function level reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 307b336496f6..bace761deff2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1090,6 +1090,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
 			     enum pci_bus_speed *speed,
 			     enum pcie_link_width *width);
 void pcie_print_link_status(struct pci_dev *dev);
+bool pcie_has_flr(struct pci_dev *dev);
 int pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
-- 
cgit v1.2.3


From 5e7baf0fcb2a3aef7329f3c7543d4695a46bd321 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@cavium.com>
Date: Thu, 9 Aug 2018 11:13:49 -0700
Subject: qed/qede: Multi CoS support.

This patch adds support for tc mqprio offload,
using this different traffic classes on the adapter
can be utilized based on configured priority to tc map.

For example -

tc qdisc add dev eth0 root mqprio num_tc 4 map 0 1 2 3

This will cause SKBs with priority 0,1,2,3 to transmit
over tc 0,1,2,3 hardware queues respectively.

Signed-off-by: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c        |   9 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c      |   5 +-
 drivers/net/ethernet/qlogic/qede/qede.h         |  13 +++
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |  48 ++++++--
 drivers/net/ethernet/qlogic/qede/qede_fp.c      |  29 +++--
 drivers/net/ethernet/qlogic/qede/qede_main.c    | 139 +++++++++++++++++++-----
 include/linux/qed/qed_eth_if.h                  |   6 +
 7 files changed, 200 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 5ede6408649d..82a1bd1f8a8c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2188,16 +2188,17 @@ out:
 static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 				 struct qed_dev_eth_info *info)
 {
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	int i;
 
 	memset(info, 0, sizeof(*info));
 
-	info->num_tc = 1;
-
 	if (IS_PF(cdev)) {
 		int max_vf_vlan_filters = 0;
 		int max_vf_mac_filters = 0;
 
+		info->num_tc = p_hwfn->hw_info.num_hw_tc;
+
 		if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
 			u16 num_queues = 0;
 
@@ -2248,6 +2249,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 	} else {
 		u16 total_cids = 0;
 
+		info->num_tc = 1;
+
 		/* Determine queues &  XDP support */
 		for_each_hwfn(cdev, i) {
 			struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -2554,7 +2557,7 @@ static int qed_start_txq(struct qed_dev *cdev,
 
 	rc = qed_eth_tx_queue_start(p_hwfn,
 				    p_hwfn->hw_info.opaque_fid,
-				    p_params, 0,
+				    p_params, p_params->tc,
 				    pbl_addr, pbl_size, ret_params);
 
 	if (rc) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index dbe81310c0b6..2094d86a7a08 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -948,13 +948,14 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 		params->eth_pf_params.num_arfs_filters = 0;
 
 	/* In case we might support RDMA, don't allow qede to be greedy
-	 * with the L2 contexts. Allow for 64 queues [rx, tx, xdp] per hwfn.
+	 * with the L2 contexts. Allow for 64 queues [rx, tx cos, xdp]
+	 * per hwfn.
 	 */
 	if (QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) {
 		u16 *num_cons;
 
 		num_cons = &params->eth_pf_params.num_cons;
-		*num_cons = min_t(u16, *num_cons, 192);
+		*num_cons = min_t(u16, *num_cons, QED_MAX_L2_CONS);
 	}
 
 	for (i = 0; i < cdev->num_hwfns; i++) {
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index d7ed0d3dbf71..e90c60a8fb01 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -386,6 +386,15 @@ struct qede_tx_queue {
 #define QEDE_TXQ_XDP_TO_IDX(edev, txq)	((txq)->index - \
 					 QEDE_MAX_TSS_CNT(edev))
 #define QEDE_TXQ_IDX_TO_XDP(edev, idx)	((idx) + QEDE_MAX_TSS_CNT(edev))
+#define QEDE_NDEV_TXQ_ID_TO_FP_ID(edev, idx)	((edev)->fp_num_rx + \
+						 ((idx) % QEDE_TSS_COUNT(edev)))
+#define QEDE_NDEV_TXQ_ID_TO_TXQ_COS(edev, idx)	((idx) / QEDE_TSS_COUNT(edev))
+#define QEDE_TXQ_TO_NDEV_TXQ_ID(edev, txq)	((QEDE_TSS_COUNT(edev) * \
+						 (txq)->cos) + (txq)->index)
+#define QEDE_NDEV_TXQ_ID_TO_TXQ(edev, idx)	\
+	(&((edev)->fp_array[QEDE_NDEV_TXQ_ID_TO_FP_ID(edev, idx)].txq \
+	[QEDE_NDEV_TXQ_ID_TO_TXQ_COS(edev, idx)]))
+#define QEDE_FP_TC0_TXQ(fp)	(&((fp)->txq[0]))
 
 	/* Regular Tx requires skb + metadata for release purpose,
 	 * while XDP requires the pages and the mapped address.
@@ -399,6 +408,8 @@ struct qede_tx_queue {
 
 	/* Slowpath; Should be kept in end [unless missing padding] */
 	void *handle;
+	u16 cos;
+	u16 ndev_txq_id;
 };
 
 #define BD_UNMAP_ADDR(bd)		HILO_U64(le32_to_cpu((bd)->addr.hi), \
@@ -541,5 +552,7 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
 #define QEDE_RX_HDR_SIZE		256
 #define QEDE_MAX_JUMBO_PACKET_SIZE	9600
 #define	for_each_queue(i) for (i = 0; i < edev->num_queues; i++)
+#define for_each_cos_in_txq(edev, var) \
+	for ((var) = 0; (var) < (edev)->dev_info.num_tc; (var)++)
 
 #endif /* _QEDE_H_ */
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index b37857f3f950..2bd84d6d9b15 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -222,7 +222,7 @@ static void qede_get_strings_stats_txq(struct qede_dev *edev,
 				QEDE_TXQ_XDP_TO_IDX(edev, txq),
 				qede_tqstats_arr[i].string);
 		else
-			sprintf(*buf, "%d: %s", txq->index,
+			sprintf(*buf, "%d_%d: %s", txq->index, txq->cos,
 				qede_tqstats_arr[i].string);
 		*buf += ETH_GSTRING_LEN;
 	}
@@ -262,8 +262,13 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
 		if (fp->type & QEDE_FASTPATH_XDP)
 			qede_get_strings_stats_txq(edev, fp->xdp_tx, &buf);
 
-		if (fp->type & QEDE_FASTPATH_TX)
-			qede_get_strings_stats_txq(edev, fp->txq, &buf);
+		if (fp->type & QEDE_FASTPATH_TX) {
+			int cos;
+
+			for_each_cos_in_txq(edev, cos)
+				qede_get_strings_stats_txq(edev,
+							   &fp->txq[cos], &buf);
+		}
 	}
 
 	/* Account for non-queue statistics */
@@ -338,8 +343,12 @@ static void qede_get_ethtool_stats(struct net_device *dev,
 		if (fp->type & QEDE_FASTPATH_XDP)
 			qede_get_ethtool_stats_txq(fp->xdp_tx, &buf);
 
-		if (fp->type & QEDE_FASTPATH_TX)
-			qede_get_ethtool_stats_txq(fp->txq, &buf);
+		if (fp->type & QEDE_FASTPATH_TX) {
+			int cos;
+
+			for_each_cos_in_txq(edev, cos)
+				qede_get_ethtool_stats_txq(&fp->txq[cos], &buf);
+		}
 	}
 
 	for (i = 0; i < QEDE_NUM_STATS; i++) {
@@ -366,7 +375,8 @@ static int qede_get_sset_count(struct net_device *dev, int stringset)
 				num_stats--;
 
 		/* Account for the Regular Tx statistics */
-		num_stats += QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS;
+		num_stats += QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS *
+				edev->dev_info.num_tc;
 
 		/* Account for the Regular Rx statistics */
 		num_stats += QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS;
@@ -741,9 +751,17 @@ static int qede_get_coalesce(struct net_device *dev,
 		}
 
 		for_each_queue(i) {
+			struct qede_tx_queue *txq;
+
 			fp = &edev->fp_array[i];
+
+			/* All TX queues of given fastpath uses same
+			 * coalescing value, so no need to iterate over
+			 * all TCs, TC0 txq should suffice.
+			 */
 			if (fp->type & QEDE_FASTPATH_TX) {
-				tx_handle = fp->txq->handle;
+				txq = QEDE_FP_TC0_TXQ(fp);
+				tx_handle = txq->handle;
 				break;
 			}
 		}
@@ -801,9 +819,17 @@ static int qede_set_coalesce(struct net_device *dev,
 		}
 
 		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			struct qede_tx_queue *txq;
+
+			/* All TX queues of given fastpath uses same
+			 * coalescing value, so no need to iterate over
+			 * all TCs, TC0 txq should suffice.
+			 */
+			txq = QEDE_FP_TC0_TXQ(fp);
+
 			rc = edev->ops->common->set_coalesce(edev->cdev,
 							     0, txc,
-							     fp->txq->handle);
+							     txq->handle);
 			if (rc) {
 				DP_INFO(edev,
 					"Set TX coalesce error, rc = %d\n", rc);
@@ -1385,8 +1411,10 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 	u16 val;
 
 	for_each_queue(i) {
-		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-			txq = edev->fp_array[i].txq;
+		struct qede_fastpath *fp = &edev->fp_array[i];
+
+		if (fp->type & QEDE_FASTPATH_TX) {
+			txq = QEDE_FP_TC0_TXQ(fp);
 			break;
 		}
 	}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 8c9e95ba9917..1a78027de071 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -408,12 +408,12 @@ static void qede_xdp_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 
 static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
+	unsigned int pkts_compl = 0, bytes_compl = 0;
 	struct netdev_queue *netdev_txq;
 	u16 hw_bd_cons;
-	unsigned int pkts_compl = 0, bytes_compl = 0;
 	int rc;
 
-	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->index);
+	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
 
 	hw_bd_cons = le16_to_cpu(*txq->hw_cons_ptr);
 	barrier();
@@ -1365,9 +1365,14 @@ static bool qede_poll_is_more_work(struct qede_fastpath *fp)
 		if (qede_txq_has_work(fp->xdp_tx))
 			return true;
 
-	if (likely(fp->type & QEDE_FASTPATH_TX))
-		if (qede_txq_has_work(fp->txq))
-			return true;
+	if (likely(fp->type & QEDE_FASTPATH_TX)) {
+		int cos;
+
+		for_each_cos_in_txq(fp->edev, cos) {
+			if (qede_txq_has_work(&fp->txq[cos]))
+				return true;
+		}
+	}
 
 	return false;
 }
@@ -1382,8 +1387,14 @@ int qede_poll(struct napi_struct *napi, int budget)
 	struct qede_dev *edev = fp->edev;
 	int rx_work_done = 0;
 
-	if (likely(fp->type & QEDE_FASTPATH_TX) && qede_txq_has_work(fp->txq))
-		qede_tx_int(edev, fp->txq);
+	if (likely(fp->type & QEDE_FASTPATH_TX)) {
+		int cos;
+
+		for_each_cos_in_txq(fp->edev, cos) {
+			if (qede_txq_has_work(&fp->txq[cos]))
+				qede_tx_int(edev, &fp->txq[cos]);
+		}
+	}
 
 	if ((fp->type & QEDE_FASTPATH_XDP) && qede_txq_has_work(fp->xdp_tx))
 		qede_xdp_tx_int(edev, fp->xdp_tx);
@@ -1444,8 +1455,8 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	/* Get tx-queue context and netdev index */
 	txq_index = skb_get_queue_mapping(skb);
-	WARN_ON(txq_index >= QEDE_TSS_COUNT(edev));
-	txq = edev->fp_array[edev->fp_num_rx + txq_index].txq;
+	WARN_ON(txq_index >= QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc);
+	txq = QEDE_NDEV_TXQ_ID_TO_TXQ(edev, txq_index);
 	netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
 	WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 6a796040a32c..d7299afa902c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -536,6 +536,43 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return 0;
 }
 
+int qede_setup_tc(struct net_device *ndev, u8 num_tc)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+	int cos, count, offset;
+
+	if (num_tc > edev->dev_info.num_tc)
+		return -EINVAL;
+
+	netdev_reset_tc(ndev);
+	netdev_set_num_tc(ndev, num_tc);
+
+	for_each_cos_in_txq(edev, cos) {
+		count = QEDE_TSS_COUNT(edev);
+		offset = cos * QEDE_TSS_COUNT(edev);
+		netdev_set_tc_queue(ndev, cos, count, offset);
+	}
+
+	return 0;
+}
+
+static int
+qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type,
+		      void *type_data)
+{
+	struct tc_mqprio_qopt *mqprio;
+
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		mqprio = type_data;
+
+		mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+		return qede_setup_tc(dev, mqprio->num_tc);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops qede_netdev_ops = {
 	.ndo_open = qede_open,
 	.ndo_stop = qede_close,
@@ -568,6 +605,7 @@ static const struct net_device_ops qede_netdev_ops = {
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer = qede_rx_flow_steer,
 #endif
+	.ndo_setup_tc = qede_setup_tc_offload,
 };
 
 static const struct net_device_ops qede_netdev_vf_ops = {
@@ -621,7 +659,8 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
 	struct qede_dev *edev;
 
 	ndev = alloc_etherdev_mqs(sizeof(*edev),
-				  info->num_queues, info->num_queues);
+				  info->num_queues * info->num_tc,
+				  info->num_queues);
 	if (!ndev) {
 		pr_err("etherdev allocation failed\n");
 		return NULL;
@@ -830,7 +869,8 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			fp->txq = kzalloc(sizeof(*fp->txq), GFP_KERNEL);
+			fp->txq = kcalloc(edev->dev_info.num_tc,
+					  sizeof(*fp->txq), GFP_KERNEL);
 			if (!fp->txq)
 				goto err;
 		}
@@ -879,10 +919,15 @@ static void qede_sp_task(struct work_struct *work)
 static void qede_update_pf_params(struct qed_dev *cdev)
 {
 	struct qed_pf_params pf_params;
+	u16 num_cons;
 
 	/* 64 rx + 64 tx + 64 XDP */
 	memset(&pf_params, 0, sizeof(struct qed_pf_params));
-	pf_params.eth_pf_params.num_cons = (MAX_SB_PER_PF_MIMD - 1) * 3;
+
+	/* 1 rx + 1 xdp + max tx cos */
+	num_cons = QED_MIN_L2_CONS;
+
+	pf_params.eth_pf_params.num_cons = (MAX_SB_PER_PF_MIMD - 1) * num_cons;
 
 	/* Same for VFs - make sure they'll have sufficient connections
 	 * to support XDP Tx queues.
@@ -1363,8 +1408,12 @@ static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 	if (fp->type & QEDE_FASTPATH_XDP)
 		qede_free_mem_txq(edev, fp->xdp_tx);
 
-	if (fp->type & QEDE_FASTPATH_TX)
-		qede_free_mem_txq(edev, fp->txq);
+	if (fp->type & QEDE_FASTPATH_TX) {
+		int cos;
+
+		for_each_cos_in_txq(edev, cos)
+			qede_free_mem_txq(edev, &fp->txq[cos]);
+	}
 }
 
 /* This function allocates all memory needed for a single fp (i.e. an entity
@@ -1391,9 +1440,13 @@ static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 	}
 
 	if (fp->type & QEDE_FASTPATH_TX) {
-		rc = qede_alloc_mem_txq(edev, fp->txq);
-		if (rc)
-			goto out;
+		int cos;
+
+		for_each_cos_in_txq(edev, cos) {
+			rc = qede_alloc_mem_txq(edev, &fp->txq[cos]);
+			if (rc)
+				goto out;
+		}
 	}
 
 out:
@@ -1466,10 +1519,23 @@ static void qede_init_fp(struct qede_dev *edev)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			fp->txq->index = txq_index++;
-			if (edev->dev_info.is_legacy)
-				fp->txq->is_legacy = 1;
-			fp->txq->dev = &edev->pdev->dev;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				struct qede_tx_queue *txq = &fp->txq[cos];
+				u16 ndev_tx_id;
+
+				txq->cos = cos;
+				txq->index = txq_index;
+				ndev_tx_id = QEDE_TXQ_TO_NDEV_TXQ_ID(edev, txq);
+				txq->ndev_txq_id = ndev_tx_id;
+
+				if (edev->dev_info.is_legacy)
+					txq->is_legacy = 1;
+				txq->dev = &edev->pdev->dev;
+			}
+
+			txq_index++;
 		}
 
 		snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
@@ -1483,7 +1549,9 @@ static int qede_set_real_num_queues(struct qede_dev *edev)
 {
 	int rc = 0;
 
-	rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_COUNT(edev));
+	rc = netif_set_real_num_tx_queues(edev->ndev,
+					  QEDE_TSS_COUNT(edev) *
+					  edev->dev_info.num_tc);
 	if (rc) {
 		DP_NOTICE(edev, "Failed to set real number of Tx queues\n");
 		return rc;
@@ -1685,9 +1753,13 @@ static int qede_stop_queues(struct qede_dev *edev)
 		fp = &edev->fp_array[i];
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			rc = qede_drain_txq(edev, fp->txq, true);
-			if (rc)
-				return rc;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				rc = qede_drain_txq(edev, &fp->txq[cos], true);
+				if (rc)
+					return rc;
+			}
 		}
 
 		if (fp->type & QEDE_FASTPATH_XDP) {
@@ -1703,9 +1775,13 @@ static int qede_stop_queues(struct qede_dev *edev)
 
 		/* Stop the Tx Queue(s) */
 		if (fp->type & QEDE_FASTPATH_TX) {
-			rc = qede_stop_txq(edev, fp->txq, i);
-			if (rc)
-				return rc;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				rc = qede_stop_txq(edev, &fp->txq[cos], i);
+				if (rc)
+					return rc;
+			}
 		}
 
 		/* Stop the Rx Queue */
@@ -1758,6 +1834,7 @@ static int qede_start_txq(struct qede_dev *edev,
 
 	params.p_sb = fp->sb_info;
 	params.sb_idx = sb_idx;
+	params.tc = txq->cos;
 
 	rc = edev->ops->q_tx_start(edev->cdev, rss_id, &params, phys_table,
 				   page_cnt, &ret_params);
@@ -1877,9 +1954,14 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			rc = qede_start_txq(edev, fp, fp->txq, i, TX_PI(0));
-			if (rc)
-				goto out;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				rc = qede_start_txq(edev, fp, &fp->txq[cos], i,
+						    TX_PI(cos));
+				if (rc)
+					goto out;
+			}
 		}
 	}
 
@@ -1973,6 +2055,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 		     bool is_locked)
 {
 	struct qed_link_params link_params;
+	u8 num_tc;
 	int rc;
 
 	DP_INFO(edev, "Starting qede load\n");
@@ -2019,6 +2102,10 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 		goto err4;
 	DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n");
 
+	num_tc = netdev_get_num_tc(edev->ndev);
+	num_tc = num_tc ? num_tc : edev->dev_info.num_tc;
+	qede_setup_tc(edev->ndev, num_tc);
+
 	/* Program un-configured VLANs */
 	qede_configure_vlan_filters(edev);
 
@@ -2143,7 +2230,7 @@ static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
 
-	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->index);
+	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
 	if (netif_xmit_stopped(netdev_txq))
 		return true;
 
@@ -2208,9 +2295,11 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
 	for_each_queue(i) {
 		fp = &edev->fp_array[i];
 		if (fp->type & QEDE_FASTPATH_TX) {
-			if (fp->txq->sw_tx_cons != fp->txq->sw_tx_prod)
+			struct qede_tx_queue *txq = QEDE_FP_TC0_TXQ(fp);
+
+			if (txq->sw_tx_cons != txq->sw_tx_prod)
 				etlv->txqs_empty = false;
-			if (qede_is_txq_full(edev, fp->txq))
+			if (qede_is_txq_full(edev, txq))
 				etlv->num_txqs_full++;
 		}
 		if (fp->type & QEDE_FASTPATH_RX) {
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 2978fa4add42..a1310482c4ed 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -39,6 +39,10 @@
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_iov_if.h>
 
+/* 64 max queues * (1 rx + 4 tx-cos + 1 xdp) */
+#define QED_MIN_L2_CONS (2 + NUM_PHYS_TCS_4PORT_K2)
+#define QED_MAX_L2_CONS (64 * (QED_MIN_L2_CONS))
+
 struct qed_queue_start_common_params {
 	/* Should always be relative to entity sending this. */
 	u8 vport_id;
@@ -49,6 +53,8 @@ struct qed_queue_start_common_params {
 
 	struct qed_sb_info *p_sb;
 	u8 sb_idx;
+
+	u8 tc;
 };
 
 struct qed_rxq_start_ret_params {
-- 
cgit v1.2.3


From 15693fd37fc3a49da356164ed15b571c175efc34 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 8 Aug 2018 19:40:31 +0800
Subject: net: skbuff.h: fix using plain integer as NULL warning

Fixes the following sparse warning:
./include/linux/skbuff.h:2365:58: warning: Using plain integer as NULL pointer

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ebdf158a795..7e237a63a70c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2362,7 +2362,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 	if (skb_transport_header_was_set(skb))
 		return;
 
-	if (skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
+	if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
 		skb_set_transport_header(skb, keys.control.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
-- 
cgit v1.2.3


From bd2e9567db72e37f7f4b90faa5133bc7365b5f65 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 9 Aug 2018 16:19:52 -0500
Subject: PCI: Hide ACS quirk declarations inside PCI core

Move declarations for these functions:

  pci_dev_specific_acs_enabled()
  pci_dev_specific_enable_acs()

from include/linux/pci.h to drivers/pci/pci.h because nothing outside the
PCI core needs to use them.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 14 ++++++++++++++
 include/linux/pci.h | 11 -----------
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c358e7a07f3f..a1224fef3409 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -352,6 +352,20 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
 }
 
 void pci_enable_acs(struct pci_dev *dev);
+#ifdef CONFIG_PCI_QUIRKS
+int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
+int pci_dev_specific_enable_acs(struct pci_dev *dev);
+#else
+static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
+					       u16 acs_flags)
+{
+	return -ENOTTY;
+}
+static inline int pci_dev_specific_enable_acs(struct pci_dev *dev)
+{
+	return -ENOTTY;
+}
+#endif
 
 /* PCI error reporting and recovery */
 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..1e1ed049dd1c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1876,20 +1876,9 @@ enum pci_fixup_pass {
 
 #ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
-int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
-int pci_dev_specific_enable_acs(struct pci_dev *dev);
 #else
 static inline void pci_fixup_device(enum pci_fixup_pass pass,
 				    struct pci_dev *dev) { }
-static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
-					       u16 acs_flags)
-{
-	return -ENOTTY;
-}
-static inline int pci_dev_specific_enable_acs(struct pci_dev *dev)
-{
-	return -ENOTTY;
-}
 #endif
 
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
-- 
cgit v1.2.3


From b0768a86585d4d951a30ff565f19598dbbd67897 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:09 +0900
Subject: net: Export skb_headers_offset_update

This is needed for veth XDP which does skb_copy_expand()-like operation.

v2:
- Drop skb_copy_header part because it has already been exported now.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skbuff.h | 1 +
 net/core/skbuff.c      | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ebdf158a795..e93b157f526c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1038,6 +1038,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 }
 
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+void skb_headers_offset_update(struct sk_buff *skb, int off);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
 void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8d574a88125d..c996c09d095f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1291,7 +1291,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(skb_clone);
 
-static void skb_headers_offset_update(struct sk_buff *skb, int off)
+void skb_headers_offset_update(struct sk_buff *skb, int off)
 {
 	/* Only adjust this if it actually is csum_start rather than csum */
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -1305,6 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
 	skb->inner_network_header += off;
 	skb->inner_mac_header += off;
 }
+EXPORT_SYMBOL(skb_headers_offset_update);
 
 void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
 {
-- 
cgit v1.2.3


From 0b19cc0a8694d4295383f88bc3441765875a57bc Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:15 +0900
Subject: bpf: Make redirect_info accessible from modules

We are going to add kern_flags field in redirect_info for kernel
internal use.
In order to avoid function call to access the flags, make redirect_info
accessible from modules. Also as it is now non-static, add prefix bpf_
to redirect_info.

v6:
- Fix sparse warning around EXPORT_SYMBOL.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h | 10 ++++++++++
 net/core/filter.c      | 29 +++++++++++------------------
 2 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c73dd7396886..4717af8b95e6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -537,6 +537,16 @@ struct sk_msg_buff {
 	struct list_head list;
 };
 
+struct bpf_redirect_info {
+	u32 ifindex;
+	u32 flags;
+	struct bpf_map *map;
+	struct bpf_map *map_to_flush;
+	unsigned long   map_owner;
+};
+
+DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
diff --git a/net/core/filter.c b/net/core/filter.c
index 587bbfbd7db3..2de7dd9f2a57 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2082,19 +2082,12 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
-struct redirect_info {
-	u32 ifindex;
-	u32 flags;
-	struct bpf_map *map;
-	struct bpf_map *map_to_flush;
-	unsigned long   map_owner;
-};
-
-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
 
 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return TC_ACT_SHOT;
@@ -2107,7 +2100,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 
 int skb_do_redirect(struct sk_buff *skb)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct net_device *dev;
 
 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
@@ -3200,7 +3193,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 
 void xdp_do_flush_map(void)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct bpf_map *map = ri->map_to_flush;
 
 	ri->map_to_flush = NULL;
@@ -3245,7 +3238,7 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
@@ -3285,7 +3278,7 @@ err:
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct net_device *fwd;
 	u32 index = ri->ifindex;
 	int err;
@@ -3317,7 +3310,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct xdp_buff *xdp,
 				       struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
@@ -3368,7 +3361,7 @@ err:
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	u32 index = ri->ifindex;
 	struct net_device *fwd;
 	int err = 0;
@@ -3399,7 +3392,7 @@ EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
 
 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
 	if (unlikely(flags))
 		return XDP_ABORTED;
@@ -3423,7 +3416,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
 	   unsigned long, map_owner)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
 	if (unlikely(flags))
 		return XDP_ABORTED;
-- 
cgit v1.2.3


From 2539650fadbf63a431e76535a9de7bff6ea5e409 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:16 +0900
Subject: xdp: Helpers for disabling napi_direct of xdp_return_frame

We need some mechanism to disable napi_direct on calling
xdp_return_frame_rx_napi() from some context.
When veth gets support of XDP_REDIRECT, it will redirects packets which
are redirected from other devices. On redirection veth will reuse
xdp_mem_info of the redirection source device to make return_frame work.
But in this case .ndo_xdp_xmit() called from veth redirection uses
xdp_mem_info which is not guarded by NAPI, because the .ndo_xdp_xmit()
is not called directly from the rxq which owns the xdp_mem_info.

This approach introduces a flag in bpf_redirect_info to indicate that
napi_direct should be disabled even when _rx_napi variant is used as
well as helper functions to use it.

A NAPI handler who wants to use this flag needs to call
xdp_set_return_frame_no_direct() before processing packets, and call
xdp_clear_return_frame_no_direct() after xdp_do_flush_map() before
exiting NAPI.

v4:
- Use bpf_redirect_info for storing the flag instead of xdp_mem_info to
  avoid per-frame copy cost.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h | 25 +++++++++++++++++++++++++
 net/core/xdp.c         |  6 ++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4717af8b95e6..2b072dab32c0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,10 +543,14 @@ struct bpf_redirect_info {
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
 	unsigned long   map_owner;
+	u32 kern_flags;
 };
 
 DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
 
+/* flags for bpf_redirect_info kern_flags */
+#define BPF_RI_F_RF_NO_DIRECT	BIT(0)	/* no napi_direct on return_frame */
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
@@ -775,6 +779,27 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
+static inline bool xdp_return_frame_no_direct(void)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
+}
+
+static inline void xdp_set_return_frame_no_direct(void)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
+}
+
+static inline void xdp_clear_return_frame_no_direct(void)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
+}
+
 static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
 				 unsigned int pktlen)
 {
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 57285383ed00..3dd99e1c04f5 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -330,10 +330,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
-		if (xa)
+		if (xa) {
+			napi_direct &= !xdp_return_frame_no_direct();
 			page_pool_put_page(xa->page_pool, page, napi_direct);
-		else
+		} else {
 			put_page(page);
+		}
 		rcu_read_unlock();
 		break;
 	case MEM_TYPE_PAGE_SHARED:
-- 
cgit v1.2.3


From c9fbb2d25295a566b97d62e6904741e8e1702d83 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 10 Aug 2018 10:47:43 +0200
Subject: net: Provide stub for __netif_set_xps_queue if there is no CONFIG_XPS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building virtio_net driver without CONFIG_XPS fails with:

    drivers/net/virtio_net.c: In function ‘virtnet_set_affinity’:
    drivers/net/virtio_net.c:1910:3: error: implicit declaration of function ‘__netif_set_xps_queue’ [-Werror=implicit-function-declaration]
       __netif_set_xps_queue(vi->dev, mask, i, false);
       ^
Fixes: 4d99f6602cb5 ("net: allow to call netif_reset_xps_queues() under cpus_read_lock")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 282e2e95ad5b..ca5ab98053c8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3412,6 +3412,13 @@ static inline int netif_set_xps_queue(struct net_device *dev,
 {
 	return 0;
 }
+
+static inline int __netif_set_xps_queue(struct net_device *dev,
+					const unsigned long *mask,
+					u16 index, bool is_rxqs_map)
+{
+	return 0;
+}
 #endif
 
 /**
-- 
cgit v1.2.3


From bff1b208a5d1dbb2355822ef859edcb9be0379e4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 6 Aug 2018 15:50:58 -0400
Subject: tracing: Partial revert of "tracing: Centralize preemptirq
 tracepoints and unify their usage"

Joel Fernandes created a nice patch that cleaned up the duplicate hooks used
by lockdep and irqsoff latency tracer. It made both use tracepoints. But it
caused lockdep to trigger several false positives. We have not figured out
why yet, but removing lockdep from using the trace event hooks and just call
its helper functions directly (like it use to), makes the problem go away.

This is a partial revert of the clean up patch c3bc8fd637a9 ("tracing:
Centralize preemptirq tracepoints and unify their usage") that adds direct
calls for lockdep, but also keeps most of the clean up done to get rid of
the horrible preprocessor if statements.

Link: http://lkml.kernel.org/r/20180806155058.5ee875f4@gandalf.local.home

Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Fixes: c3bc8fd637a9 ("tracing: Centralize preemptirq tracepoints and unify their usage")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/irqflags.h        |  8 ++++++--
 include/linux/lockdep.h         |  2 --
 init/main.c                     |  2 --
 kernel/locking/lockdep.c        | 14 ++------------
 kernel/trace/trace_preemptirq.c | 36 ++++++++++++++++++++----------------
 5 files changed, 28 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 50edb9cbbd26..21619c92c377 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -19,9 +19,13 @@
 #ifdef CONFIG_PROVE_LOCKING
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+  extern void lockdep_hardirqs_on(unsigned long ip);
+  extern void lockdep_hardirqs_off(unsigned long ip);
 #else
-# define trace_softirqs_on(ip)	do { } while (0)
-# define trace_softirqs_off(ip)	do { } while (0)
+  static inline void trace_softirqs_on(unsigned long ip) { }
+  static inline void trace_softirqs_off(unsigned long ip) { }
+  static inline void lockdep_hardirqs_on(unsigned long ip) { }
+  static inline void lockdep_hardirqs_off(unsigned long ip) { }
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index a8113357ceeb..b0d0b51c4d85 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -267,7 +267,6 @@ struct held_lock {
  * Initialization, self-test and debugging-output methods:
  */
 extern void lockdep_init(void);
-extern void lockdep_init_early(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
@@ -408,7 +407,6 @@ static inline void lockdep_on(void)
 # define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_init()				do { } while (0)
-# define lockdep_init_early()			do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
diff --git a/init/main.c b/init/main.c
index 44fe43be84c1..5d42e577643a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -649,8 +649,6 @@ asmlinkage __visible void __init start_kernel(void)
 	call_function_init();
 	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
 
-	lockdep_init_early();
-
 	early_boot_irqs_disabled = false;
 	local_irq_enable();
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 03bfaeb9f4e6..e406c5fdb41e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2840,8 +2840,7 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 	debug_atomic_inc(hardirqs_on_events);
 }
 
-static void lockdep_hardirqs_on(void *none, unsigned long ignore,
-				unsigned long ip)
+void lockdep_hardirqs_on(unsigned long ip)
 {
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
@@ -2885,8 +2884,7 @@ static void lockdep_hardirqs_on(void *none, unsigned long ignore,
 /*
  * Hardirqs were disabled:
  */
-static void lockdep_hardirqs_off(void *none, unsigned long ignore,
-				 unsigned long ip)
+void lockdep_hardirqs_off(unsigned long ip)
 {
 	struct task_struct *curr = current;
 
@@ -4315,14 +4313,6 @@ out_restore:
 	raw_local_irq_restore(flags);
 }
 
-void __init lockdep_init_early(void)
-{
-#ifdef CONFIG_PROVE_LOCKING
-	register_trace_prio_irq_disable(lockdep_hardirqs_off, NULL, INT_MAX);
-	register_trace_prio_irq_enable(lockdep_hardirqs_on, NULL, INT_MIN);
-#endif
-}
-
 void __init lockdep_init(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index e76b78bf258e..fa656b25f427 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -19,41 +19,45 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu);
 
 void trace_hardirqs_on(void)
 {
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
+	if (this_cpu_read(tracing_irq_cpu)) {
+		trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+		this_cpu_write(tracing_irq_cpu, 0);
+	}
 
-	trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
-	this_cpu_write(tracing_irq_cpu, 0);
+	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on);
 
 void trace_hardirqs_off(void)
 {
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
+	if (!this_cpu_read(tracing_irq_cpu)) {
+		this_cpu_write(tracing_irq_cpu, 1);
+		trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+	}
 
-	this_cpu_write(tracing_irq_cpu, 1);
-	trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+	lockdep_hardirqs_off(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_off);
 
 __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
 {
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
+	if (this_cpu_read(tracing_irq_cpu)) {
+		trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
+		this_cpu_write(tracing_irq_cpu, 0);
+	}
 
-	trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
-	this_cpu_write(tracing_irq_cpu, 0);
+	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
 
 __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
 {
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
+	if (!this_cpu_read(tracing_irq_cpu)) {
+		this_cpu_write(tracing_irq_cpu, 1);
+		trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+	}
 
-	this_cpu_write(tracing_irq_cpu, 1);
-	trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+	lockdep_hardirqs_off(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_off_caller);
 #endif /* CONFIG_TRACE_IRQFLAGS */
-- 
cgit v1.2.3


From d799a4de0a250f1bdd99765bb8e55a5e2f469a1f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 3 Aug 2018 00:52:18 +0200
Subject: gpio: mmio: Fix up inverted direction registers

The bgpio_init() takes one of two arguments to specify a register
to set the direction of the GPIO line: either dirout that
indicates that a 1 in the bit in that register sets the
corresponding line to output, or dirin which indicates that
a 1 in the bit in that register sets the corresponding line to
input. Conversely setting the bit to 0 on these will turn the
line into input and output respectively. One of these can
be defined but not both.

This means that a platform that sets a bit to 1 for output
only defines dirout and a platform that sets a bit to 0 for
output only defines dirin. In short this defines the polarity
of the direction register.

Both can also be left as NULL meaning the GPIO chip is either
input only or output only.

Tomer Maimon discovered that for get/set chips (those where the
get and set registers are defined but no separate clear register,
and specifying BGPIOF_READ_OUTPUT_REG_SET so that we say we
want to read the output value from the SET register)
we are unconditionally reading the value from the SET register
when the direction bit is 1 and from the DAT register when the
direction bit is 0, not taking the direction bit polarity into
account.

It would be expected that when the direction bit is inverted
(dirin is defined but not dirout) we read the current value from
the DAT register when the bit is 1 and from the SET register
when the bit is 0.

Currently only some versions of ATH79, brcmstb, some versions of
CLP711x, GE, IOP and Loongson use the dirin mode (a 1 in the
register means input). They are unaffected because
BGPIOF_READ_OUTPUT_REG_SET is not set on any of them. (They
do not read back the SET register to figure out the output
value.) So this is no regression with current drivers.

However the behaviour is wrong and does not work with Tomer's
new driver where he needs to use the BGIOF_READ_OUTPUT_REG_SET.
This fixes the above issue by:

- Instead of defining separate functions for the inverted case,
  set up a flag in the gpio_chip that indicates that the
  direction is inverted.
- Remove the special inverted functions for setting
  input/output and getting the direction, rely on the flag
  instead.
- Respect this flag in bgpio_get_set() and
  bgpio_get_set_multiple()

Reported-by: Tomer Maimon <tmaimon77@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mmio.c    | 108 ++++++++++++++++++++++++++------------------
 include/linux/gpio/driver.h |   3 ++
 2 files changed, 66 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index 7b14d6280e44..935292a30c99 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -136,8 +136,20 @@ static unsigned long bgpio_line2mask(struct gpio_chip *gc, unsigned int line)
 static int bgpio_get_set(struct gpio_chip *gc, unsigned int gpio)
 {
 	unsigned long pinmask = bgpio_line2mask(gc, gpio);
+	bool dir = !!(gc->bgpio_dir & pinmask);
 
-	if (gc->bgpio_dir & pinmask)
+	/*
+	 * If the direction is OUT we read the value from the SET
+	 * register, and if the direction is IN we read the value
+	 * from the DAT register.
+	 *
+	 * If the direction bits are inverted, naturally this gets
+	 * inverted too.
+	 */
+	if (gc->bgpio_dir_inverted)
+		dir = !dir;
+
+	if (dir)
 		return !!(gc->read_reg(gc->reg_set) & pinmask);
 	else
 		return !!(gc->read_reg(gc->reg_dat) & pinmask);
@@ -157,8 +169,13 @@ static int bgpio_get_set_multiple(struct gpio_chip *gc, unsigned long *mask,
 	*bits &= ~*mask;
 
 	/* Exploit the fact that we know which directions are set */
-	set_mask = *mask & gc->bgpio_dir;
-	get_mask = *mask & ~gc->bgpio_dir;
+	if (gc->bgpio_dir_inverted) {
+		set_mask = *mask & ~gc->bgpio_dir;
+		get_mask = *mask & gc->bgpio_dir;
+	} else {
+		set_mask = *mask & gc->bgpio_dir;
+		get_mask = *mask & ~gc->bgpio_dir;
+	}
 
 	if (set_mask)
 		*bits |= gc->read_reg(gc->reg_set) & set_mask;
@@ -359,7 +376,10 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
 
 	spin_lock_irqsave(&gc->bgpio_lock, flags);
 
-	gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
+	if (gc->bgpio_dir_inverted)
+		gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
+	else
+		gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
 	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
 
 	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
@@ -370,7 +390,10 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
 static int bgpio_get_dir(struct gpio_chip *gc, unsigned int gpio)
 {
 	/* Return 0 if output, 1 of input */
-	return !(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
+	if (gc->bgpio_dir_inverted)
+		return !!(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
+	else
+		return !(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
 }
 
 static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
@@ -381,37 +404,10 @@ static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 	spin_lock_irqsave(&gc->bgpio_lock, flags);
 
-	gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
-	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
-
-	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
-
-	return 0;
-}
-
-static int bgpio_dir_in_inv(struct gpio_chip *gc, unsigned int gpio)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&gc->bgpio_lock, flags);
-
-	gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
-	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
-
-	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
-
-	return 0;
-}
-
-static int bgpio_dir_out_inv(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-	unsigned long flags;
-
-	gc->set(gc, gpio, val);
-
-	spin_lock_irqsave(&gc->bgpio_lock, flags);
-
-	gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
+	if (gc->bgpio_dir_inverted)
+		gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
+	else
+		gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
 	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
 
 	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
@@ -419,12 +415,6 @@ static int bgpio_dir_out_inv(struct gpio_chip *gc, unsigned int gpio, int val)
 	return 0;
 }
 
-static int bgpio_get_dir_inv(struct gpio_chip *gc, unsigned int gpio)
-{
-	/* Return 0 if output, 1 if input */
-	return !!(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
-}
-
 static int bgpio_setup_accessors(struct device *dev,
 				 struct gpio_chip *gc,
 				 bool byte_be)
@@ -560,9 +550,10 @@ static int bgpio_setup_direction(struct gpio_chip *gc,
 		gc->get_direction = bgpio_get_dir;
 	} else if (dirin) {
 		gc->reg_dir = dirin;
-		gc->direction_output = bgpio_dir_out_inv;
-		gc->direction_input = bgpio_dir_in_inv;
-		gc->get_direction = bgpio_get_dir_inv;
+		gc->direction_output = bgpio_dir_out;
+		gc->direction_input = bgpio_dir_in;
+		gc->get_direction = bgpio_get_dir;
+		gc->bgpio_dir_inverted = true;
 	} else {
 		if (flags & BGPIOF_NO_OUTPUT)
 			gc->direction_output = bgpio_dir_out_err;
@@ -582,6 +573,33 @@ static int bgpio_request(struct gpio_chip *chip, unsigned gpio_pin)
 	return -EINVAL;
 }
 
+/**
+ * bgpio_init() - Initialize generic GPIO accessor functions
+ * @gc: the GPIO chip to set up
+ * @dev: the parent device of the new GPIO chip (compulsory)
+ * @sz: the size (width) of the MMIO registers in bytes, typically 1, 2 or 4
+ * @dat: MMIO address for the register to READ the value of the GPIO lines, it
+ *	is expected that a 1 in the corresponding bit in this register means the
+ *	line is asserted
+ * @set: MMIO address for the register to SET the value of the GPIO lines, it is
+ *	expected that we write the line with 1 in this register to drive the GPIO line
+ *	high.
+ * @clr: MMIO address for the register to CLEAR the value of the GPIO lines, it is
+ *	expected that we write the line with 1 in this register to drive the GPIO line
+ *	low. It is allowed to leave this address as NULL, in that case the SET register
+ *	will be assumed to also clear the GPIO lines, by actively writing the line
+ *	with 0.
+ * @dirout: MMIO address for the register to set the line as OUTPUT. It is assumed
+ *	that setting a line to 1 in this register will turn that line into an
+ *	output line. Conversely, setting the line to 0 will turn that line into
+ *	an input. Either this or @dirin can be defined, but never both.
+ * @dirin: MMIO address for the register to set this line as INPUT. It is assumed
+ *	that setting a line to 1 in this register will turn that line into an
+ *	input line. Conversely, setting the line to 0 will turn that line into
+ *	an output. Either this or @dirout can be defined, but never both.
+ * @flags: Different flags that will affect the behaviour of the device, such as
+ *	endianness etc.
+ */
 int bgpio_init(struct gpio_chip *gc, struct device *dev,
 	       unsigned long sz, void __iomem *dat, void __iomem *set,
 	       void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5382b5183b7e..0ea328e71ec9 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -201,6 +201,8 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip)
  * @reg_set: output set register (out=high) for generic GPIO
  * @reg_clr: output clear register (out=low) for generic GPIO
  * @reg_dir: direction setting register for generic GPIO
+ * @bgpio_dir_inverted: indicates that the direction register is inverted
+ *	(gpiolib private state variable)
  * @bgpio_bits: number of register bits used for a generic GPIO i.e.
  *	<register width> * 8
  * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep
@@ -267,6 +269,7 @@ struct gpio_chip {
 	void __iomem *reg_set;
 	void __iomem *reg_clr;
 	void __iomem *reg_dir;
+	bool bgpio_dir_inverted;
 	int bgpio_bits;
 	spinlock_t bgpio_lock;
 	unsigned long bgpio_data;
-- 
cgit v1.2.3


From 5dc4c4b7d4e8115e7cde96a030f98cb3ab2e458c Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:24 -0700
Subject: bpf: Introduce BPF_MAP_TYPE_REUSEPORT_SOCKARRAY

This patch introduces a new map type BPF_MAP_TYPE_REUSEPORT_SOCKARRAY.

To unleash the full potential of a bpf prog, it is essential for the
userspace to be capable of directly setting up a bpf map which can then
be consumed by the bpf prog to make decision.  In this case, decide which
SO_REUSEPORT sk to serve the incoming request.

By adding BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, the userspace has total control
and visibility on where a SO_REUSEPORT sk should be located in a bpf map.
The later patch will introduce BPF_PROG_TYPE_SK_REUSEPORT such that
the bpf prog can directly select a sk from the bpf map.  That will
raise the programmability of the bpf prog attached to a reuseport
group (a group of sk serving the same IP:PORT).

For example, in UDP, the bpf prog can peek into the payload (e.g.
through the "data" pointer introduced in the later patch) to learn
the application level's connection information and then decide which sk
to pick from a bpf map.  The userspace can tightly couple the sk's location
in a bpf map with the application logic in generating the UDP payload's
connection information.  This connection info contact/API stays within the
userspace.

Also, when used with map-in-map, the userspace can switch the
old-server-process's inner map to a new-server-process's inner map
in one call "bpf_map_update_elem(outer_map, &index, &new_reuseport_array)".
The bpf prog will then direct incoming requests to the new process instead
of the old process.  The old process can finish draining the pending
requests (e.g. by "accept()") before closing the old-fds.  [Note that
deleting a fd from a bpf map does not necessary mean the fd is closed]

During map_update_elem(),
Only SO_REUSEPORT sk (i.e. which has already been added
to a reuse->socks[]) can be used.  That means a SO_REUSEPORT sk that is
"bind()" for UDP or "bind()+listen()" for TCP.  These conditions are
ensured in "reuseport_array_update_check()".

A SO_REUSEPORT sk can only be added once to a map (i.e. the
same sk cannot be added twice even to the same map).  SO_REUSEPORT
already allows another sk to be created for the same IP:PORT.
There is no need to re-create a similar usage in the BPF side.

When a SO_REUSEPORT is deleted from the "reuse->socks[]" (e.g. "close()"),
it will notify the bpf map to remove it from the map also.  It is
done through "bpf_sk_reuseport_detach()" and it will only be called
if >=1 of the "reuse->sock[]" has ever been added to a bpf map.

The map_update()/map_delete() has to be in-sync with the
"reuse->socks[]".  Hence, the same "reuseport_lock" used
by "reuse->socks[]" has to be used here also. Care has
been taken to ensure the lock is only acquired when the
adding sk passes some strict tests. and
freeing the map does not require the reuseport_lock.

The reuseport_array will also support lookup from the syscall
side.  It will return a sock_gen_cookie().  The sock_gen_cookie()
is on-demand (i.e. a sk's cookie is not generated until the very
first map_lookup_elem()).

The lookup cookie is 64bits but it goes against the logical userspace
expectation on 32bits sizeof(fd) (and as other fd based bpf maps do also).
It may catch user in surprise if we enforce value_size=8 while
userspace still pass a 32bits fd during update.  Supporting different
value_size between lookup and update seems unintuitive also.

We also need to consider what if other existing fd based maps want
to return 64bits value from syscall's lookup in the future.
Hence, reuseport_array supports both value_size 4 and 8, and
assuming user will usually use value_size=4.  The syscall's lookup
will return ENOSPC on value_size=4.  It will will only
return 64bits value from sock_gen_cookie() when user consciously
choose value_size=8 (as a signal that lookup is desired) which then
requires a 64bits value in both lookup and update.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h          |  28 ++++
 include/linux/bpf_types.h    |   3 +
 include/uapi/linux/bpf.h     |   1 +
 kernel/bpf/Makefile          |   3 +
 kernel/bpf/arraymap.c        |   2 +-
 kernel/bpf/reuseport_array.c | 363 +++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c         |   6 +
 net/core/sock_reuseport.c    |   8 +
 8 files changed, 413 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/reuseport_array.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..db11662faea6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -524,6 +524,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 }
 
 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+int array_map_alloc_check(union bpf_attr *attr);
 
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -769,6 +770,33 @@ static inline void __xsk_map_flush(struct bpf_map *map)
 }
 #endif
 
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+				       void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags);
+#else
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
+{
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
+						     void *key, void *value)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
+						     void *key, void *value,
+						     u64 map_flags)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_BPF_SYSCALL */
+#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
+
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index add08be53b6f..14fd6c02d258 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -60,4 +60,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 #endif
+#ifdef CONFIG_INET
+BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
+#endif
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..40f584bc7da0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -126,6 +126,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
 	BPF_MAP_TYPE_CGROUP_STORAGE,
+	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e8906cbad81f..0488b8258321 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -23,3 +23,6 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
 obj-$(CONFIG_CGROUP_BPF) += cgroup.o
+ifeq ($(CONFIG_INET),y)
+obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
+endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2aa55d030c77..f6ca3e712831 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -54,7 +54,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
 }
 
 /* Called from syscall */
-static int array_map_alloc_check(union bpf_attr *attr)
+int array_map_alloc_check(union bpf_attr *attr)
 {
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	int numa_node = bpf_map_attr_numa_node(attr);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
new file mode 100644
index 000000000000..18e225de80ff
--- /dev/null
+++ b/kernel/bpf/reuseport_array.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Facebook
+ */
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/sock_diag.h>
+#include <net/sock_reuseport.h>
+
+struct reuseport_array {
+	struct bpf_map map;
+	struct sock __rcu *ptrs[];
+};
+
+static struct reuseport_array *reuseport_array(struct bpf_map *map)
+{
+	return (struct reuseport_array *)map;
+}
+
+/* The caller must hold the reuseport_lock */
+void bpf_sk_reuseport_detach(struct sock *sk)
+{
+	struct sock __rcu **socks;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	socks = sk->sk_user_data;
+	if (socks) {
+		WRITE_ONCE(sk->sk_user_data, NULL);
+		/*
+		 * Do not move this NULL assignment outside of
+		 * sk->sk_callback_lock because there is
+		 * a race with reuseport_array_free()
+		 * which does not hold the reuseport_lock.
+		 */
+		RCU_INIT_POINTER(*socks, NULL);
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int reuseport_array_alloc_check(union bpf_attr *attr)
+{
+	if (attr->value_size != sizeof(u32) &&
+	    attr->value_size != sizeof(u64))
+		return -EINVAL;
+
+	return array_map_alloc_check(attr);
+}
+
+static void *reuseport_array_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	u32 index = *(u32 *)key;
+
+	if (unlikely(index >= array->map.max_entries))
+		return NULL;
+
+	return rcu_dereference(array->ptrs[index]);
+}
+
+/* Called from syscall only */
+static int reuseport_array_delete_elem(struct bpf_map *map, void *key)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	u32 index = *(u32 *)key;
+	struct sock *sk;
+	int err;
+
+	if (index >= map->max_entries)
+		return -E2BIG;
+
+	if (!rcu_access_pointer(array->ptrs[index]))
+		return -ENOENT;
+
+	spin_lock_bh(&reuseport_lock);
+
+	sk = rcu_dereference_protected(array->ptrs[index],
+				       lockdep_is_held(&reuseport_lock));
+	if (sk) {
+		write_lock_bh(&sk->sk_callback_lock);
+		WRITE_ONCE(sk->sk_user_data, NULL);
+		RCU_INIT_POINTER(array->ptrs[index], NULL);
+		write_unlock_bh(&sk->sk_callback_lock);
+		err = 0;
+	} else {
+		err = -ENOENT;
+	}
+
+	spin_unlock_bh(&reuseport_lock);
+
+	return err;
+}
+
+static void reuseport_array_free(struct bpf_map *map)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	struct sock *sk;
+	u32 i;
+
+	synchronize_rcu();
+
+	/*
+	 * ops->map_*_elem() will not be able to access this
+	 * array now. Hence, this function only races with
+	 * bpf_sk_reuseport_detach() which was triggerred by
+	 * close() or disconnect().
+	 *
+	 * This function and bpf_sk_reuseport_detach() are
+	 * both removing sk from "array".  Who removes it
+	 * first does not matter.
+	 *
+	 * The only concern here is bpf_sk_reuseport_detach()
+	 * may access "array" which is being freed here.
+	 * bpf_sk_reuseport_detach() access this "array"
+	 * through sk->sk_user_data _and_ with sk->sk_callback_lock
+	 * held which is enough because this "array" is not freed
+	 * until all sk->sk_user_data has stopped referencing this "array".
+	 *
+	 * Hence, due to the above, taking "reuseport_lock" is not
+	 * needed here.
+	 */
+
+	/*
+	 * Since reuseport_lock is not taken, sk is accessed under
+	 * rcu_read_lock()
+	 */
+	rcu_read_lock();
+	for (i = 0; i < map->max_entries; i++) {
+		sk = rcu_dereference(array->ptrs[i]);
+		if (sk) {
+			write_lock_bh(&sk->sk_callback_lock);
+			/*
+			 * No need for WRITE_ONCE(). At this point,
+			 * no one is reading it without taking the
+			 * sk->sk_callback_lock.
+			 */
+			sk->sk_user_data = NULL;
+			write_unlock_bh(&sk->sk_callback_lock);
+			RCU_INIT_POINTER(array->ptrs[i], NULL);
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Once reaching here, all sk->sk_user_data is not
+	 * referenceing this "array".  "array" can be freed now.
+	 */
+	bpf_map_area_free(array);
+}
+
+static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
+{
+	int err, numa_node = bpf_map_attr_numa_node(attr);
+	struct reuseport_array *array;
+	u64 cost, array_size;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	array_size = sizeof(*array);
+	array_size += (u64)attr->max_entries * sizeof(struct sock *);
+
+	/* make sure there is no u32 overflow later in round_up() */
+	cost = array_size;
+	if (cost >= U32_MAX - PAGE_SIZE)
+		return ERR_PTR(-ENOMEM);
+	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	err = bpf_map_precharge_memlock(cost);
+	if (err)
+		return ERR_PTR(err);
+
+	/* allocate all map elements and zero-initialize them */
+	array = bpf_map_area_alloc(array_size, numa_node);
+	if (!array)
+		return ERR_PTR(-ENOMEM);
+
+	/* copy mandatory map attributes */
+	bpf_map_init_from_attr(&array->map, attr);
+	array->map.pages = cost;
+
+	return &array->map;
+}
+
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+				       void *value)
+{
+	struct sock *sk;
+	int err;
+
+	if (map->value_size != sizeof(u64))
+		return -ENOSPC;
+
+	rcu_read_lock();
+	sk = reuseport_array_lookup_elem(map, key);
+	if (sk) {
+		*(u64 *)value = sock_gen_cookie(sk);
+		err = 0;
+	} else {
+		err = -ENOENT;
+	}
+	rcu_read_unlock();
+
+	return err;
+}
+
+static int
+reuseport_array_update_check(const struct reuseport_array *array,
+			     const struct sock *nsk,
+			     const struct sock *osk,
+			     const struct sock_reuseport *nsk_reuse,
+			     u32 map_flags)
+{
+	if (osk && map_flags == BPF_NOEXIST)
+		return -EEXIST;
+
+	if (!osk && map_flags == BPF_EXIST)
+		return -ENOENT;
+
+	if (nsk->sk_protocol != IPPROTO_UDP && nsk->sk_protocol != IPPROTO_TCP)
+		return -ENOTSUPP;
+
+	if (nsk->sk_family != AF_INET && nsk->sk_family != AF_INET6)
+		return -ENOTSUPP;
+
+	if (nsk->sk_type != SOCK_STREAM && nsk->sk_type != SOCK_DGRAM)
+		return -ENOTSUPP;
+
+	/*
+	 * sk must be hashed (i.e. listening in the TCP case or binded
+	 * in the UDP case) and
+	 * it must also be a SO_REUSEPORT sk (i.e. reuse cannot be NULL).
+	 *
+	 * Also, sk will be used in bpf helper that is protected by
+	 * rcu_read_lock().
+	 */
+	if (!sock_flag(nsk, SOCK_RCU_FREE) || !sk_hashed(nsk) || !nsk_reuse)
+		return -EINVAL;
+
+	/* READ_ONCE because the sk->sk_callback_lock may not be held here */
+	if (READ_ONCE(nsk->sk_user_data))
+		return -EBUSY;
+
+	return 0;
+}
+
+/*
+ * Called from syscall only.
+ * The "nsk" in the fd refcnt.
+ * The "osk" and "reuse" are protected by reuseport_lock.
+ */
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	struct sock *free_osk = NULL, *osk, *nsk;
+	struct sock_reuseport *reuse;
+	u32 index = *(u32 *)key;
+	struct socket *socket;
+	int err, fd;
+
+	if (map_flags > BPF_EXIST)
+		return -EINVAL;
+
+	if (index >= map->max_entries)
+		return -E2BIG;
+
+	if (map->value_size == sizeof(u64)) {
+		u64 fd64 = *(u64 *)value;
+
+		if (fd64 > S32_MAX)
+			return -EINVAL;
+		fd = fd64;
+	} else {
+		fd = *(int *)value;
+	}
+
+	socket = sockfd_lookup(fd, &err);
+	if (!socket)
+		return err;
+
+	nsk = socket->sk;
+	if (!nsk) {
+		err = -EINVAL;
+		goto put_file;
+	}
+
+	/* Quick checks before taking reuseport_lock */
+	err = reuseport_array_update_check(array, nsk,
+					   rcu_access_pointer(array->ptrs[index]),
+					   rcu_access_pointer(nsk->sk_reuseport_cb),
+					   map_flags);
+	if (err)
+		goto put_file;
+
+	spin_lock_bh(&reuseport_lock);
+	/*
+	 * Some of the checks only need reuseport_lock
+	 * but it is done under sk_callback_lock also
+	 * for simplicity reason.
+	 */
+	write_lock_bh(&nsk->sk_callback_lock);
+
+	osk = rcu_dereference_protected(array->ptrs[index],
+					lockdep_is_held(&reuseport_lock));
+	reuse = rcu_dereference_protected(nsk->sk_reuseport_cb,
+					  lockdep_is_held(&reuseport_lock));
+	err = reuseport_array_update_check(array, nsk, osk, reuse, map_flags);
+	if (err)
+		goto put_file_unlock;
+
+	/* Ensure reuse->reuseport_id is set */
+	err = reuseport_get_id(reuse);
+	if (err < 0)
+		goto put_file_unlock;
+
+	WRITE_ONCE(nsk->sk_user_data, &array->ptrs[index]);
+	rcu_assign_pointer(array->ptrs[index], nsk);
+	free_osk = osk;
+	err = 0;
+
+put_file_unlock:
+	write_unlock_bh(&nsk->sk_callback_lock);
+
+	if (free_osk) {
+		write_lock_bh(&free_osk->sk_callback_lock);
+		WRITE_ONCE(free_osk->sk_user_data, NULL);
+		write_unlock_bh(&free_osk->sk_callback_lock);
+	}
+
+	spin_unlock_bh(&reuseport_lock);
+put_file:
+	fput(socket->file);
+	return err;
+}
+
+/* Called from syscall */
+static int reuseport_array_get_next_key(struct bpf_map *map, void *key,
+					void *next_key)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	u32 index = key ? *(u32 *)key : U32_MAX;
+	u32 *next = (u32 *)next_key;
+
+	if (index >= array->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (index == array->map.max_entries - 1)
+		return -ENOENT;
+
+	*next = index + 1;
+	return 0;
+}
+
+const struct bpf_map_ops reuseport_array_ops = {
+	.map_alloc_check = reuseport_array_alloc_check,
+	.map_alloc = reuseport_array_alloc,
+	.map_free = reuseport_array_free,
+	.map_lookup_elem = reuseport_array_lookup_elem,
+	.map_get_next_key = reuseport_array_get_next_key,
+	.map_delete_elem = reuseport_array_delete_elem,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5af4e9e2722d..57f4d076141b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -684,6 +684,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 		err = bpf_fd_array_map_lookup_elem(map, key, value);
 	} else if (IS_FD_HASH(map)) {
 		err = bpf_fd_htab_map_lookup_elem(map, key, value);
+	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+		err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
 	} else {
 		rcu_read_lock();
 		ptr = map->ops->map_lookup_elem(map, key);
@@ -790,6 +792,10 @@ static int map_update_elem(union bpf_attr *attr)
 		err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 						  attr->flags);
 		rcu_read_unlock();
+	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+		/* rcu_read_lock() is not needed */
+		err = bpf_fd_reuseport_array_update_elem(map, key, value,
+							 attr->flags);
 	} else {
 		rcu_read_lock();
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index cf2e4d305af9..8235f2439816 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -186,6 +186,14 @@ void reuseport_detach_sock(struct sock *sk)
 	spin_lock_bh(&reuseport_lock);
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
+
+	/* At least one of the sk in this reuseport group is added to
+	 * a bpf map.  Notify the bpf side.  The bpf map logic will
+	 * remove the sk if it is indeed added to a bpf map.
+	 */
+	if (reuse->reuseport_id)
+		bpf_sk_reuseport_detach(sk);
+
 	rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
 
 	for (i = 0; i < reuse->num_socks; i++) {
-- 
cgit v1.2.3


From 2dbb9b9e6df67d444fbe425c7f6014858d337adf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:25 -0700
Subject: bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT

This patch adds a BPF_PROG_TYPE_SK_REUSEPORT which can select
a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY.  Like other
non SK_FILTER/CGROUP_SKB program, it requires CAP_SYS_ADMIN.

BPF_PROG_TYPE_SK_REUSEPORT introduces "struct sk_reuseport_kern"
to store the bpf context instead of using the skb->cb[48].

At the SO_REUSEPORT sk lookup time, it is in the middle of transiting
from a lower layer (ipv4/ipv6) to a upper layer (udp/tcp).  At this
point,  it is not always clear where the bpf context can be appended
in the skb->cb[48] to avoid saving-and-restoring cb[].  Even putting
aside the difference between ipv4-vs-ipv6 and udp-vs-tcp.  It is not
clear if the lower layer is only ipv4 and ipv6 in the future and
will it not touch the cb[] again before transiting to the upper
layer.

For example, in udp_gro_receive(), it uses the 48 byte NAPI_GRO_CB
instead of IP[6]CB and it may still modify the cb[] after calling
the udp[46]_lib_lookup_skb().  Because of the above reason, if
sk->cb is used for the bpf ctx, saving-and-restoring is needed
and likely the whole 48 bytes cb[] has to be saved and restored.

Instead of saving, setting and restoring the cb[], this patch opts
to create a new "struct sk_reuseport_kern" and setting the needed
values in there.

The new BPF_PROG_TYPE_SK_REUSEPORT and "struct sk_reuseport_(kern|md)"
will serve all ipv4/ipv6 + udp/tcp combinations.  There is no protocol
specific usage at this point and it is also inline with the current
sock_reuseport.c implementation (i.e. no protocol specific requirement).

In "struct sk_reuseport_md", this patch exposes data/data_end/len
with semantic similar to other existing usages.  Together
with "bpf_skb_load_bytes()" and "bpf_skb_load_bytes_relative()",
the bpf prog can peek anywhere in the skb.  The "bind_inany" tells
the bpf prog that the reuseport group is bind-ed to a local
INANY address which cannot be learned from skb.

The new "bind_inany" is added to "struct sock_reuseport" which will be
used when running the new "BPF_PROG_TYPE_SK_REUSEPORT" bpf prog in order
to avoid repeating the "bind INANY" test on
"sk_v6_rcv_saddr/sk->sk_rcv_saddr" every time a bpf prog is run.  It can
only be properly initialized when a "sk->sk_reuseport" enabled sk is
adding to a hashtable (i.e. during "reuseport_alloc()" and
"reuseport_add_sock()").

The new "sk_select_reuseport()" is the main helper that the
bpf prog will use to select a SO_REUSEPORT sk.  It is the only function
that can use the new BPF_MAP_TYPE_REUSEPORT_ARRAY.  As mentioned in
the earlier patch, the validity of a selected sk is checked in
run time in "sk_select_reuseport()".  Doing the check in
verification time is difficult and inflexible (consider the map-in-map
use case).  The runtime check is to compare the selected sk's reuseport_id
with the reuseport_id that we want.  This helper will return -EXXX if the
selected sk cannot serve the incoming request (e.g. reuseport_id
not match).  The bpf prog can decide if it wants to do SK_DROP as its
discretion.

When the bpf prog returns SK_PASS, the kernel will check if a
valid sk has been selected (i.e. "reuse_kern->selected_sk != NULL").
If it does , it will use the selected sk.  If not, the kernel
will select one from "reuse->socks[]" (as before this patch).

The SK_DROP and SK_PASS handling logic will be in the next patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_types.h       |   3 +
 include/linux/filter.h          |  15 +++
 include/net/addrconf.h          |   1 +
 include/net/sock_reuseport.h    |   6 +-
 include/uapi/linux/bpf.h        |  36 +++++-
 kernel/bpf/verifier.c           |   9 ++
 net/core/filter.c               | 269 +++++++++++++++++++++++++++++++++++++++-
 net/core/sock_reuseport.c       |  20 ++-
 net/ipv4/inet_connection_sock.c |   9 ++
 net/ipv4/inet_hashtables.c      |   5 +-
 net/ipv4/udp.c                  |   5 +-
 11 files changed, 365 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 14fd6c02d258..cd26c090e7c0 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
 #ifdef CONFIG_BPF_LIRC_MODE2
 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #endif
+#ifdef CONFIG_INET
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
+#endif
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2b072dab32c0..70e9d57677fe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -32,6 +32,7 @@ struct seccomp_data;
 struct bpf_prog_aux;
 struct xdp_rxq_info;
 struct xdp_buff;
+struct sock_reuseport;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -833,6 +834,20 @@ void bpf_warn_invalid_xdp_action(u32 act);
 struct sock *do_sk_redirect_map(struct sk_buff *skb);
 struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 
+#ifdef CONFIG_INET
+struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+				  struct bpf_prog *prog, struct sk_buff *skb,
+				  u32 hash);
+#else
+static inline struct sock *
+bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+		     struct bpf_prog *prog, struct sk_buff *skb,
+		     u32 hash)
+{
+	return NULL;
+}
+#endif
+
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
 extern int bpf_jit_harden;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 5f43f7a70fe6..6def0351bcc3 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -108,6 +108,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 		    u32 banned_flags);
 bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 			  bool match_wildcard);
+bool inet_rcv_saddr_any(const struct sock *sk);
 void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
 void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
 
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index e1a7681856f7..73b569556be6 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,12 +21,14 @@ struct sock_reuseport {
 	unsigned int		synq_overflow_ts;
 	/* ID stays the same even after the size of socks[] grows. */
 	unsigned int		reuseport_id;
+	bool			bind_inany;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
 
-extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
+extern int reuseport_alloc(struct sock *sk, bool bind_inany);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
+			      bool bind_inany);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 40f584bc7da0..3102a2a23c31 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -151,6 +151,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 	BPF_PROG_TYPE_LIRC_MODE2,
+	BPF_PROG_TYPE_SK_REUSEPORT,
 };
 
 enum bpf_attach_type {
@@ -2114,6 +2115,14 @@ union bpf_attr {
  *		the shared data.
  *	Return
  *		Pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		Select a SO_REUSEPORT sk from a	BPF_MAP_TYPE_REUSEPORT_ARRAY map
+ *		It checks the selected sk is matching the incoming
+ *		request in the skb.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2197,7 +2206,8 @@ union bpf_attr {
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
-	FN(get_local_storage),
+	FN(get_local_storage),		\
+	FN(sk_select_reuseport),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2414,6 +2424,30 @@ struct sk_msg_md {
 	__u32 local_port;	/* stored in host byte order */
 };
 
+struct sk_reuseport_md {
+	/*
+	 * Start of directly accessible data. It begins from
+	 * the tcp/udp header.
+	 */
+	void *data;
+	void *data_end;		/* End of directly accessible data */
+	/*
+	 * Total length of packet (starting from the tcp/udp header).
+	 * Note that the directly accessible bytes (data_end - data)
+	 * could be less than this "len".  Those bytes could be
+	 * indirectly read by a helper "bpf_skb_load_bytes()".
+	 */
+	__u32 len;
+	/*
+	 * Eth protocol in the mac header (network byte order). e.g.
+	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+	 */
+	__u32 eth_protocol;
+	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+	__u32 bind_inany;	/* Is sock bound to an INANY address? */
+	__u32 hash;		/* A hash of the packet 4 tuples */
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 587468a9c37d..ca90679a7fe5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1310,6 +1310,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_LWT_IN:
 	case BPF_PROG_TYPE_LWT_OUT:
 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+	case BPF_PROG_TYPE_SK_REUSEPORT:
 		/* dst_input() and dst_output() can't write for now */
 		if (t == BPF_WRITE)
 			return false;
@@ -2166,6 +2167,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_msg_redirect_hash)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+		if (func_id != BPF_FUNC_sk_select_reuseport)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -2217,6 +2222,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_sk_select_reuseport:
+		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index 2de7dd9f2a57..142595b4e0d1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1462,7 +1462,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
 		return -ENOMEM;
 
 	if (sk_unhashed(sk) && sk->sk_reuseport) {
-		err = reuseport_alloc(sk);
+		err = reuseport_alloc(sk, false);
 		if (err)
 			return err;
 	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -7013,3 +7013,270 @@ out:
 	release_sock(sk);
 	return ret;
 }
+
+#ifdef CONFIG_INET
+struct sk_reuseport_kern {
+	struct sk_buff *skb;
+	struct sock *sk;
+	struct sock *selected_sk;
+	void *data_end;
+	u32 hash;
+	u32 reuseport_id;
+	bool bind_inany;
+};
+
+static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
+				    struct sock_reuseport *reuse,
+				    struct sock *sk, struct sk_buff *skb,
+				    u32 hash)
+{
+	reuse_kern->skb = skb;
+	reuse_kern->sk = sk;
+	reuse_kern->selected_sk = NULL;
+	reuse_kern->data_end = skb->data + skb_headlen(skb);
+	reuse_kern->hash = hash;
+	reuse_kern->reuseport_id = reuse->reuseport_id;
+	reuse_kern->bind_inany = reuse->bind_inany;
+}
+
+struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+				  struct bpf_prog *prog, struct sk_buff *skb,
+				  u32 hash)
+{
+	struct sk_reuseport_kern reuse_kern;
+	enum sk_action action;
+
+	bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
+	action = BPF_PROG_RUN(prog, &reuse_kern);
+
+	if (action == SK_PASS)
+		return reuse_kern.selected_sk;
+	else
+		return ERR_PTR(-ECONNREFUSED);
+}
+
+BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
+	   struct bpf_map *, map, void *, key, u32, flags)
+{
+	struct sock_reuseport *reuse;
+	struct sock *selected_sk;
+
+	selected_sk = map->ops->map_lookup_elem(map, key);
+	if (!selected_sk)
+		return -ENOENT;
+
+	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
+	if (!reuse)
+		/* selected_sk is unhashed (e.g. by close()) after the
+		 * above map_lookup_elem().  Treat selected_sk has already
+		 * been removed from the map.
+		 */
+		return -ENOENT;
+
+	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
+		struct sock *sk;
+
+		if (unlikely(!reuse_kern->reuseport_id))
+			/* There is a small race between adding the
+			 * sk to the map and setting the
+			 * reuse_kern->reuseport_id.
+			 * Treat it as the sk has not been added to
+			 * the bpf map yet.
+			 */
+			return -ENOENT;
+
+		sk = reuse_kern->sk;
+		if (sk->sk_protocol != selected_sk->sk_protocol)
+			return -EPROTOTYPE;
+		else if (sk->sk_family != selected_sk->sk_family)
+			return -EAFNOSUPPORT;
+
+		/* Catch all. Likely bound to a different sockaddr. */
+		return -EBADFD;
+	}
+
+	reuse_kern->selected_sk = selected_sk;
+
+	return 0;
+}
+
+static const struct bpf_func_proto sk_select_reuseport_proto = {
+	.func           = sk_select_reuseport,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(sk_reuseport_load_bytes,
+	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
+	   void *, to, u32, len)
+{
+	return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
+}
+
+static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
+	.func		= sk_reuseport_load_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(sk_reuseport_load_bytes_relative,
+	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
+	   void *, to, u32, len, u32, start_header)
+{
+	return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
+					       len, start_header);
+}
+
+static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
+	.func		= sk_reuseport_load_bytes_relative,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *
+sk_reuseport_func_proto(enum bpf_func_id func_id,
+			const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_sk_select_reuseport:
+		return &sk_select_reuseport_proto;
+	case BPF_FUNC_skb_load_bytes:
+		return &sk_reuseport_load_bytes_proto;
+	case BPF_FUNC_skb_load_bytes_relative:
+		return &sk_reuseport_load_bytes_relative_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static bool
+sk_reuseport_is_valid_access(int off, int size,
+			     enum bpf_access_type type,
+			     const struct bpf_prog *prog,
+			     struct bpf_insn_access_aux *info)
+{
+	const u32 size_default = sizeof(__u32);
+
+	if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
+	    off % size || type != BPF_READ)
+		return false;
+
+	switch (off) {
+	case offsetof(struct sk_reuseport_md, data):
+		info->reg_type = PTR_TO_PACKET;
+		return size == sizeof(__u64);
+
+	case offsetof(struct sk_reuseport_md, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		return size == sizeof(__u64);
+
+	case offsetof(struct sk_reuseport_md, hash):
+		return size == size_default;
+
+	/* Fields that allow narrowing */
+	case offsetof(struct sk_reuseport_md, eth_protocol):
+		if (size < FIELD_SIZEOF(struct sk_buff, protocol))
+			return false;
+	case offsetof(struct sk_reuseport_md, ip_protocol):
+	case offsetof(struct sk_reuseport_md, bind_inany):
+	case offsetof(struct sk_reuseport_md, len):
+		bpf_ctx_record_field_size(info, size_default);
+		return bpf_ctx_narrow_access_ok(off, size, size_default);
+
+	default:
+		return false;
+	}
+}
+
+#define SK_REUSEPORT_LOAD_FIELD(F) ({					\
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+			      si->dst_reg, si->src_reg,			\
+			      bpf_target_off(struct sk_reuseport_kern, F, \
+					     FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+					     target_size));		\
+	})
+
+#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD)				\
+	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,		\
+				    struct sk_buff,			\
+				    skb,				\
+				    SKB_FIELD)
+
+#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
+	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern,	\
+					     struct sock,		\
+					     sk,			\
+					     SK_FIELD, BPF_SIZE, EXTRA_OFF)
+
+static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
+					   const struct bpf_insn *si,
+					   struct bpf_insn *insn_buf,
+					   struct bpf_prog *prog,
+					   u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct sk_reuseport_md, data):
+		SK_REUSEPORT_LOAD_SKB_FIELD(data);
+		break;
+
+	case offsetof(struct sk_reuseport_md, len):
+		SK_REUSEPORT_LOAD_SKB_FIELD(len);
+		break;
+
+	case offsetof(struct sk_reuseport_md, eth_protocol):
+		SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
+		break;
+
+	case offsetof(struct sk_reuseport_md, ip_protocol):
+		BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
+		SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
+						    BPF_W, 0);
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+					SK_FL_PROTO_SHIFT);
+		/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
+		 * aware.  No further narrowing or masking is needed.
+		 */
+		*target_size = 1;
+		break;
+
+	case offsetof(struct sk_reuseport_md, data_end):
+		SK_REUSEPORT_LOAD_FIELD(data_end);
+		break;
+
+	case offsetof(struct sk_reuseport_md, hash):
+		SK_REUSEPORT_LOAD_FIELD(hash);
+		break;
+
+	case offsetof(struct sk_reuseport_md, bind_inany):
+		SK_REUSEPORT_LOAD_FIELD(bind_inany);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
+	.get_func_proto		= sk_reuseport_func_proto,
+	.is_valid_access	= sk_reuseport_is_valid_access,
+	.convert_ctx_access	= sk_reuseport_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_reuseport_prog_ops = {
+};
+#endif /* CONFIG_INET */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 8235f2439816..d260167f5f77 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -51,7 +51,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 	return reuse;
 }
 
-int reuseport_alloc(struct sock *sk)
+int reuseport_alloc(struct sock *sk, bool bind_inany)
 {
 	struct sock_reuseport *reuse;
 
@@ -63,9 +63,17 @@ int reuseport_alloc(struct sock *sk)
 	/* Allocation attempts can occur concurrently via the setsockopt path
 	 * and the bind/hash path.  Nothing to do when we lose the race.
 	 */
-	if (rcu_dereference_protected(sk->sk_reuseport_cb,
-				      lockdep_is_held(&reuseport_lock)))
+	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+					  lockdep_is_held(&reuseport_lock));
+	if (reuse) {
+		/* Only set reuse->bind_inany if the bind_inany is true.
+		 * Otherwise, it will overwrite the reuse->bind_inany
+		 * which was set by the bind/hash path.
+		 */
+		if (bind_inany)
+			reuse->bind_inany = bind_inany;
 		goto out;
+	}
 
 	reuse = __reuseport_alloc(INIT_SOCKS);
 	if (!reuse) {
@@ -75,6 +83,7 @@ int reuseport_alloc(struct sock *sk)
 
 	reuse->socks[0] = sk;
 	reuse->num_socks = 1;
+	reuse->bind_inany = bind_inany;
 	rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 
 out:
@@ -101,6 +110,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 	more_reuse->num_socks = reuse->num_socks;
 	more_reuse->prog = reuse->prog;
 	more_reuse->reuseport_id = reuse->reuseport_id;
+	more_reuse->bind_inany = reuse->bind_inany;
 
 	memcpy(more_reuse->socks, reuse->socks,
 	       reuse->num_socks * sizeof(struct sock *));
@@ -136,12 +146,12 @@ static void reuseport_free_rcu(struct rcu_head *head)
  *  @sk2: Socket belonging to the existing reuseport group.
  *  May return ENOMEM and not add socket to group under memory pressure.
  */
-int reuseport_add_sock(struct sock *sk, struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 {
 	struct sock_reuseport *old_reuse, *reuse;
 
 	if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
-		int err = reuseport_alloc(sk2);
+		int err = reuseport_alloc(sk2, bind_inany);
 
 		if (err)
 			return err;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 33a88e045efd..dfd5009f96ef 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -107,6 +107,15 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 }
 EXPORT_SYMBOL(inet_rcv_saddr_equal);
 
+bool inet_rcv_saddr_any(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
+#endif
+	return !sk->sk_rcv_saddr;
+}
+
 void inet_get_local_port_range(struct net *net, int *low, int *high)
 {
 	unsigned int seq;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3647167c8fa3..370e24463fb7 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -567,10 +567,11 @@ static int inet_reuseport_add_sock(struct sock *sk,
 		    inet_csk(sk2)->icsk_bind_hash == tb &&
 		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
 		    inet_rcv_saddr_equal(sk, sk2, false))
-			return reuseport_add_sock(sk, sk2);
+			return reuseport_add_sock(sk, sk2,
+						  inet_rcv_saddr_any(sk));
 	}
 
-	return reuseport_alloc(sk);
+	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
 }
 
 int __inet_hash(struct sock *sk, struct sock *osk)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 060e841dde40..038dd7909051 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -221,11 +221,12 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
 		    (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
 		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
 		    inet_rcv_saddr_equal(sk, sk2, false)) {
-			return reuseport_add_sock(sk, sk2);
+			return reuseport_add_sock(sk, sk2,
+						  inet_rcv_saddr_any(sk));
 		}
 	}
 
-	return reuseport_alloc(sk);
+	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
 }
 
 /**
-- 
cgit v1.2.3


From 8217ca653ec601246832d562207bc24bdf652d2f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:26 -0700
Subject: bpf: Enable BPF_PROG_TYPE_SK_REUSEPORT bpf prog in reuseport
 selection

This patch allows a BPF_PROG_TYPE_SK_REUSEPORT bpf prog to select a
SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY introduced in
the earlier patch.  "bpf_run_sk_reuseport()" will return -ECONNREFUSED
when the BPF_PROG_TYPE_SK_REUSEPORT prog returns SK_DROP.
The callers, in inet[6]_hashtable.c and ipv[46]/udp.c, are modified to
handle this case and return NULL immediately instead of continuing the
sk search from its hashtable.

It re-uses the existing SO_ATTACH_REUSEPORT_EBPF setsockopt to attach
BPF_PROG_TYPE_SK_REUSEPORT.  The "sk_reuseport_attach_bpf()" will check
if the attaching bpf prog is in the new SK_REUSEPORT or the existing
SOCKET_FILTER type and then check different things accordingly.

One level of "__reuseport_attach_prog()" call is removed.  The
"sk_unhashed() && ..." and "sk->sk_reuseport_cb" tests are pushed
back to "reuseport_attach_prog()" in sock_reuseport.c.  sock_reuseport.c
seems to have more knowledge on those test requirements than filter.c.
In "reuseport_attach_prog()", after new_prog is attached to reuse->prog,
the old_prog (if any) is also directly freed instead of returning the
old_prog to the caller and asking the caller to free.

The sysctl_optmem_max check is moved back to the
"sk_reuseport_attach_filter()" and "sk_reuseport_attach_bpf()".
As of other bpf prog types, the new BPF_PROG_TYPE_SK_REUSEPORT is only
bounded by the usual "bpf_prog_charge_memlock()" during load time
instead of bounded by both bpf_prog_charge_memlock and sysctl_optmem_max.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h       |  1 +
 include/net/sock_reuseport.h |  3 +-
 net/core/filter.c            | 87 ++++++++++++++++++++++++++------------------
 net/core/sock_reuseport.c    | 36 +++++++++++++-----
 net/ipv4/inet_hashtables.c   | 14 ++++---
 net/ipv4/udp.c               |  4 ++
 net/ipv6/inet6_hashtables.c  | 14 ++++---
 net/ipv6/udp.c               |  4 ++
 8 files changed, 106 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 70e9d57677fe..5d565c50bcb2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -753,6 +753,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
+void sk_reuseport_prog_free(struct bpf_prog *prog);
 int sk_detach_filter(struct sock *sk);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 73b569556be6..8a5f70c7cdf2 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -34,8 +34,7 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
 					  struct sk_buff *skb,
 					  int hdr_len);
-extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
-					      struct bpf_prog *prog);
+extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index 142595b4e0d1..22906b31d43f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1453,30 +1453,6 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
 	return 0;
 }
 
-static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
-{
-	struct bpf_prog *old_prog;
-	int err;
-
-	if (bpf_prog_size(prog->len) > sysctl_optmem_max)
-		return -ENOMEM;
-
-	if (sk_unhashed(sk) && sk->sk_reuseport) {
-		err = reuseport_alloc(sk, false);
-		if (err)
-			return err;
-	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
-		/* The socket wasn't bound with SO_REUSEPORT */
-		return -EINVAL;
-	}
-
-	old_prog = reuseport_attach_prog(sk, prog);
-	if (old_prog)
-		bpf_prog_destroy(old_prog);
-
-	return 0;
-}
-
 static
 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
 {
@@ -1550,13 +1526,15 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __reuseport_attach_prog(prog, sk);
-	if (err < 0) {
+	if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+		err = -ENOMEM;
+	else
+		err = reuseport_attach_prog(sk, prog);
+
+	if (err)
 		__bpf_prog_release(prog);
-		return err;
-	}
 
-	return 0;
+	return err;
 }
 
 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
@@ -1586,19 +1564,58 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 
 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
 {
-	struct bpf_prog *prog = __get_bpf(ufd, sk);
+	struct bpf_prog *prog;
 	int err;
 
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
+	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+	if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
+		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __reuseport_attach_prog(prog, sk);
-	if (err < 0) {
-		bpf_prog_put(prog);
-		return err;
+	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
+		/* Like other non BPF_PROG_TYPE_SOCKET_FILTER
+		 * bpf prog (e.g. sockmap).  It depends on the
+		 * limitation imposed by bpf_prog_load().
+		 * Hence, sysctl_optmem_max is not checked.
+		 */
+		if ((sk->sk_type != SOCK_STREAM &&
+		     sk->sk_type != SOCK_DGRAM) ||
+		    (sk->sk_protocol != IPPROTO_UDP &&
+		     sk->sk_protocol != IPPROTO_TCP) ||
+		    (sk->sk_family != AF_INET &&
+		     sk->sk_family != AF_INET6)) {
+			err = -ENOTSUPP;
+			goto err_prog_put;
+		}
+	} else {
+		/* BPF_PROG_TYPE_SOCKET_FILTER */
+		if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+			err = -ENOMEM;
+			goto err_prog_put;
+		}
 	}
 
-	return 0;
+	err = reuseport_attach_prog(sk, prog);
+err_prog_put:
+	if (err)
+		bpf_prog_put(prog);
+
+	return err;
+}
+
+void sk_reuseport_prog_free(struct bpf_prog *prog)
+{
+	if (!prog)
+		return;
+
+	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
+		bpf_prog_put(prog);
+	else
+		bpf_prog_destroy(prog);
 }
 
 struct bpf_scratchpad {
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index d260167f5f77..ba5cba56f574 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -9,6 +9,7 @@
 #include <net/sock_reuseport.h>
 #include <linux/bpf.h>
 #include <linux/idr.h>
+#include <linux/filter.h>
 #include <linux/rcupdate.h>
 
 #define INIT_SOCKS 128
@@ -133,8 +134,7 @@ static void reuseport_free_rcu(struct rcu_head *head)
 	struct sock_reuseport *reuse;
 
 	reuse = container_of(head, struct sock_reuseport, rcu);
-	if (reuse->prog)
-		bpf_prog_destroy(reuse->prog);
+	sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
 	if (reuse->reuseport_id)
 		ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
 	kfree(reuse);
@@ -219,9 +219,9 @@ void reuseport_detach_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(reuseport_detach_sock);
 
-static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
-			    struct bpf_prog *prog, struct sk_buff *skb,
-			    int hdr_len)
+static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
+				   struct bpf_prog *prog, struct sk_buff *skb,
+				   int hdr_len)
 {
 	struct sk_buff *nskb = NULL;
 	u32 index;
@@ -282,9 +282,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
 		/* paired with smp_wmb() in reuseport_add_sock() */
 		smp_rmb();
 
-		if (prog && skb)
-			sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+		if (!prog || !skb)
+			goto select_by_hash;
+
+		if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
+			sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
+		else
+			sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
 
+select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
 		if (!sk2)
 			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
@@ -296,12 +302,21 @@ out:
 }
 EXPORT_SYMBOL(reuseport_select_sock);
 
-struct bpf_prog *
-reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
+int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
 {
 	struct sock_reuseport *reuse;
 	struct bpf_prog *old_prog;
 
+	if (sk_unhashed(sk) && sk->sk_reuseport) {
+		int err = reuseport_alloc(sk, false);
+
+		if (err)
+			return err;
+	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
+		/* The socket wasn't bound with SO_REUSEPORT */
+		return -EINVAL;
+	}
+
 	spin_lock_bh(&reuseport_lock);
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
@@ -310,6 +325,7 @@ reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
 	rcu_assign_pointer(reuse->prog, prog);
 	spin_unlock_bh(&reuseport_lock);
 
-	return old_prog;
+	sk_reuseport_prog_free(old_prog);
+	return 0;
 }
 EXPORT_SYMBOL(reuseport_attach_prog);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 370e24463fb7..f5c9ef2586de 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -328,7 +328,7 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    saddr, sport, daddr, hnum,
 				    dif, sdif);
 	if (result)
-		return result;
+		goto done;
 
 	/* Lookup lhash2 with INADDR_ANY */
 
@@ -337,9 +337,10 @@ struct sock *__inet_lookup_listener(struct net *net,
 	if (ilb2->count > ilb->count)
 		goto port_lookup;
 
-	return inet_lhash2_lookup(net, ilb2, skb, doff,
-				  saddr, sport, daddr, hnum,
-				  dif, sdif);
+	result = inet_lhash2_lookup(net, ilb2, skb, doff,
+				    saddr, sport, daddr, hnum,
+				    dif, sdif);
+	goto done;
 
 port_lookup:
 	sk_for_each_rcu(sk, &ilb->head) {
@@ -352,12 +353,15 @@ port_lookup:
 				result = reuseport_select_sock(sk, phash,
 							       skb, doff);
 				if (result)
-					return result;
+					goto done;
 			}
 			result = sk;
 			hiscore = score;
 		}
 	}
+done:
+	if (unlikely(IS_ERR(result)))
+		return NULL;
 	return result;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 038dd7909051..f4e35b2ff8b8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -499,6 +499,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2, skb);
 		}
+		if (unlikely(IS_ERR(result)))
+			return NULL;
 		return result;
 	}
 begin:
@@ -513,6 +515,8 @@ begin:
 						   saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
+				if (unlikely(IS_ERR(result)))
+					return NULL;
 				if (result)
 					return result;
 			}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 595ad408dba0..3d7c7460a0c5 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -191,7 +191,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 				     saddr, sport, daddr, hnum,
 				     dif, sdif);
 	if (result)
-		return result;
+		goto done;
 
 	/* Lookup lhash2 with in6addr_any */
 
@@ -200,9 +200,10 @@ struct sock *inet6_lookup_listener(struct net *net,
 	if (ilb2->count > ilb->count)
 		goto port_lookup;
 
-	return inet6_lhash2_lookup(net, ilb2, skb, doff,
-				   saddr, sport, daddr, hnum,
-				   dif, sdif);
+	result = inet6_lhash2_lookup(net, ilb2, skb, doff,
+				     saddr, sport, daddr, hnum,
+				     dif, sdif);
+	goto done;
 
 port_lookup:
 	sk_for_each(sk, &ilb->head) {
@@ -214,12 +215,15 @@ port_lookup:
 				result = reuseport_select_sock(sk, phash,
 							       skb, doff);
 				if (result)
-					return result;
+					goto done;
 			}
 			result = sk;
 			hiscore = score;
 		}
 	}
+done:
+	if (unlikely(IS_ERR(result)))
+		return NULL;
 	return result;
 }
 EXPORT_SYMBOL_GPL(inet6_lookup_listener);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f6b96956a8ed..83f4c77c79d8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -235,6 +235,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
 						  exact_dif, hslot2,
 						  skb);
 		}
+		if (unlikely(IS_ERR(result)))
+			return NULL;
 		return result;
 	}
 begin:
@@ -249,6 +251,8 @@ begin:
 						    saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
+				if (unlikely(IS_ERR(result)))
+					return NULL;
 				if (result)
 					return result;
 			}
-- 
cgit v1.2.3


From 19e226e8cc5da02f17ed119f9137036c0f0f5d80 Mon Sep 17 00:00:00 2001
From: Caleb Raitto <caraitto@google.com>
Date: Thu, 9 Aug 2018 18:18:28 -0700
Subject: virtio: Make vp_set_vq_affinity() take a mask.

Make vp_set_vq_affinity() take a cpumask instead of taking a single CPU.

If there are fewer queues than cores, queue affinity should be able to
map to multiple cores.

Link: https://patchwork.ozlabs.org/patch/948149/
Suggested-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Caleb Raitto <caraitto@google.com>
Acked-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/virtio/virtio_crypto_core.c | 4 ++--
 drivers/net/virtio_net.c                   | 8 ++++----
 drivers/virtio/virtio_pci_common.c         | 7 +++----
 drivers/virtio/virtio_pci_common.h         | 2 +-
 include/linux/virtio_config.h              | 7 ++++---
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index 83326986c113..7c7198553699 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -146,7 +146,7 @@ static void virtcrypto_clean_affinity(struct virtio_crypto *vi, long hcpu)
 
 	if (vi->affinity_hint_set) {
 		for (i = 0; i < vi->max_data_queues; i++)
-			virtqueue_set_affinity(vi->data_vq[i].vq, -1);
+			virtqueue_set_affinity(vi->data_vq[i].vq, NULL);
 
 		vi->affinity_hint_set = false;
 	}
@@ -173,7 +173,7 @@ static void virtcrypto_set_affinity(struct virtio_crypto *vcrypto)
 	 *
 	 */
 	for_each_online_cpu(cpu) {
-		virtqueue_set_affinity(vcrypto->data_vq[i].vq, cpu);
+		virtqueue_set_affinity(vcrypto->data_vq[i].vq, cpumask_of(cpu));
 		if (++i >= vcrypto->max_data_queues)
 			break;
 	}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 39a7f4452587..43fabc0eb4d2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1878,8 +1878,8 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
 
 	if (vi->affinity_hint_set) {
 		for (i = 0; i < vi->max_queue_pairs; i++) {
-			virtqueue_set_affinity(vi->rq[i].vq, -1);
-			virtqueue_set_affinity(vi->sq[i].vq, -1);
+			virtqueue_set_affinity(vi->rq[i].vq, NULL);
+			virtqueue_set_affinity(vi->sq[i].vq, NULL);
 		}
 
 		vi->affinity_hint_set = false;
@@ -1905,8 +1905,8 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
 	for_each_online_cpu(cpu) {
 		const unsigned long *mask = cpumask_bits(cpumask_of(cpu));
 
-		virtqueue_set_affinity(vi->rq[i].vq, cpu);
-		virtqueue_set_affinity(vi->sq[i].vq, cpu);
+		virtqueue_set_affinity(vi->rq[i].vq, cpumask_of(cpu));
+		virtqueue_set_affinity(vi->sq[i].vq, cpumask_of(cpu));
 		__netif_set_xps_queue(vi->dev, mask, i, false);
 		i++;
 	}
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 705aebd74e56..465a6f5142cc 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -421,7 +421,7 @@ const char *vp_bus_name(struct virtio_device *vdev)
  * - OR over all affinities for shared MSI
  * - ignore the affinity request if we're using INTX
  */
-int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
+int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
 {
 	struct virtio_device *vdev = vq->vdev;
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -435,11 +435,10 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	if (vp_dev->msix_enabled) {
 		mask = vp_dev->msix_affinity_masks[info->msix_vector];
 		irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
-		if (cpu == -1)
+		if (!cpu_mask)
 			irq_set_affinity_hint(irq, NULL);
 		else {
-			cpumask_clear(mask);
-			cpumask_set_cpu(cpu, mask);
+			cpumask_copy(mask, cpu_mask);
 			irq_set_affinity_hint(irq, mask);
 		}
 	}
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 135ee3cf7175..02271002c2f3 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -141,7 +141,7 @@ const char *vp_bus_name(struct virtio_device *vdev);
  * - OR over all affinities for shared MSI
  * - ignore the affinity request if we're using INTX
  */
-int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
+int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask);
 
 const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index);
 
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 5559a2d31c46..32baf8e26735 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -79,7 +79,8 @@ struct virtio_config_ops {
 	u64 (*get_features)(struct virtio_device *vdev);
 	int (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
-	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
+	int (*set_vq_affinity)(struct virtqueue *vq,
+			       const struct cpumask *cpu_mask);
 	const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
 			int index);
 };
@@ -236,11 +237,11 @@ const char *virtio_bus_name(struct virtio_device *vdev)
  *
  */
 static inline
-int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
+int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
 {
 	struct virtio_device *vdev = vq->vdev;
 	if (vdev->config->set_vq_affinity)
-		return vdev->config->set_vq_affinity(vq, cpu);
+		return vdev->config->set_vq_affinity(vq, cpu_mask);
 	return 0;
 }
 
-- 
cgit v1.2.3


From b86d865cb1cae1e61527ea0b8977078bbf694328 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 10 Aug 2018 13:28:07 -0700
Subject: blkcg: Make blkg_root_lookup() work for queues in bypass mode

For legacy queues the only call of blkg_root_lookup() happens after
bypass mode has been enabled. Since blkg_lookup() returns NULL for
queues in bypass mode, modify the blkg_root_lookup() such that it
no longer depends on bypass mode. Rename the function into
blk_queue_root_blkg() as suggested by Tejun.

Suggested-by: Tejun Heo <tj@kernel.org>
Fixes: 6bad9b210a22 ("blkcg: Introduce blkg_root_lookup()")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c          |  2 +-
 include/linux/blk-cgroup.h | 15 +++++----------
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 380bc284ced1..bb109bb0a055 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -813,7 +813,7 @@ static void __blk_release_queue(struct work_struct *work)
 		blk_exit_queue(q);
 	}
 
-	WARN(blkg_root_lookup(q),
+	WARN(blk_queue_root_blkg(q),
 	     "request queue %p is being released but it has not yet been removed from the blkcg controller\n",
 	     q);
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1361cfc9b878..34aec30e06c7 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -342,20 +342,14 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 }
 
 /**
- * blkg_lookup - look up blkg for the specified request queue
+ * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
  * @q: request_queue of interest
  *
  * Lookup blkg for @q at the root level. See also blkg_lookup().
  */
-static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q)
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 {
-	struct blkcg_gq *blkg;
-
-	rcu_read_lock();
-	blkg = blkg_lookup(&blkcg_root, q);
-	rcu_read_unlock();
-
-	return blkg;
+	return q->root_blkg;
 }
 
 /**
@@ -881,7 +875,8 @@ static inline bool blk_cgroup_congested(void) { return false; }
 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
-static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) { return NULL; }
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{ return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-- 
cgit v1.2.3


From b5b1404d0815894de0690de8a1ab58269e56eae6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 12 Aug 2018 12:19:42 -0700
Subject: init: rename and re-order boot_cpu_state_init()

This is purely a preparatory patch for upcoming changes during the 4.19
merge window.

We have a function called "boot_cpu_state_init()" that isn't really
about the bootup cpu state: that is done much earlier by the similarly
named "boot_cpu_init()" (note lack of "state" in name).

This function initializes some hotplug CPU state, and needs to run after
the percpu data has been properly initialized.  It even has a comment to
that effect.

Except it _doesn't_ actually run after the percpu data has been properly
initialized.  On x86 it happens to do that, but on at least arm and
arm64, the percpu base pointers are initialized by the arch-specific
'smp_prepare_boot_cpu()' hook, which ran _after_ boot_cpu_state_init().

This had some unexpected results, and in particular we have a patch
pending for the merge window that did the obvious cleanup of using
'this_cpu_write()' in the cpu hotplug init code:

  -       per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
  +       this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);

which is obviously the right thing to do.  Except because of the
ordering issue, it actually failed miserably and unexpectedly on arm64.

So this just fixes the ordering, and changes the name of the function to
be 'boot_cpu_hotplug_init()' to make it obvious that it's about cpu
hotplug state, because the core CPU state was supposed to have already
been done earlier.

Marked for stable, since the (not yet merged) patch that will show this
problem is marked for stable.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h | 2 +-
 init/main.c         | 2 +-
 kernel/cpu.c        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..3233fbe23594 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -30,7 +30,7 @@ struct cpu {
 };
 
 extern void boot_cpu_init(void);
-extern void boot_cpu_state_init(void);
+extern void boot_cpu_hotplug_init(void);
 extern void cpu_init(void);
 extern void trap_init(void);
 
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..5e13c544bbf4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -561,8 +561,8 @@ asmlinkage __visible void __init start_kernel(void)
 	setup_command_line(command_line);
 	setup_nr_cpu_ids();
 	setup_per_cpu_areas();
-	boot_cpu_state_init();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
+	boot_cpu_hotplug_init();
 
 	build_all_zonelists(NULL);
 	page_alloc_init();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0db8938fbb23..2f8f338e77cf 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2010,7 +2010,7 @@ void __init boot_cpu_init(void)
 /*
  * Must be called _AFTER_ setting up the per_cpu areas
  */
-void __init boot_cpu_state_init(void)
+void __init boot_cpu_hotplug_init(void)
 {
 	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
 }
-- 
cgit v1.2.3


From e8d2bec0457962e8f348a9a3627b398f7fe5c5fc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 12 Aug 2018 01:59:17 +0200
Subject: bpf: decouple btf from seq bpf fs dump and enable more maps

Commit a26ca7c982cb ("bpf: btf: Add pretty print support to
the basic arraymap") and 699c86d6ec21 ("bpf: btf: add pretty
print for hash/lru_hash maps") enabled support for BTF and
dumping via BPF fs for array and hash/lru map. However, both
can be decoupled from each other such that regular BPF maps
can be supported for attaching BTF key/value information,
while not all maps necessarily need to dump via map_seq_show_elem()
callback.

The basic sanity check which is a prerequisite for all maps
is that key/value size has to match in any case, and some maps
can have extra checks via map_check_btf() callback, e.g.
probing certain types or indicating no support in general. With
that we can also enable retrieving BTF info for per-cpu map
types and lpm.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h        | 13 +++++++++----
 kernel/bpf/arraymap.c      | 26 ++++++++++++--------------
 kernel/bpf/cpumap.c        |  1 +
 kernel/bpf/devmap.c        |  1 +
 kernel/bpf/hashtab.c       | 20 +-------------------
 kernel/bpf/inode.c         |  3 ++-
 kernel/bpf/local_storage.c |  1 +
 kernel/bpf/lpm_trie.c      | 12 ++++++++++++
 kernel/bpf/sockmap.c       |  2 ++
 kernel/bpf/stackmap.c      |  1 +
 kernel/bpf/syscall.c       | 36 ++++++++++++++++++++++++++++++++----
 kernel/bpf/xskmap.c        |  3 +--
 12 files changed, 75 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index db11662faea6..523481a3471b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -23,7 +23,7 @@ struct bpf_prog;
 struct bpf_map;
 struct sock;
 struct seq_file;
-struct btf;
+struct btf_type;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
@@ -48,8 +48,9 @@ struct bpf_map_ops {
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
 				  struct seq_file *m);
-	int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
-			     u32 key_type_id, u32 value_type_id);
+	int (*map_check_btf)(const struct bpf_map *map,
+			     const struct btf_type *key_type,
+			     const struct btf_type *value_type);
 };
 
 struct bpf_map {
@@ -118,9 +119,13 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
 
 static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 {
-	return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+	return map->btf && map->ops->map_seq_show_elem;
 }
 
+int map_check_no_btf(const struct bpf_map *map,
+		     const struct btf_type *key_type,
+		     const struct btf_type *value_type);
+
 extern const struct bpf_map_ops bpf_map_offload_ops;
 
 /* function argument constraints */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index f6ca3e712831..0c17aab3ce5f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -358,27 +358,20 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
-static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
-			       u32 btf_key_id, u32 btf_value_id)
+static int array_map_check_btf(const struct bpf_map *map,
+			       const struct btf_type *key_type,
+			       const struct btf_type *value_type)
 {
-	const struct btf_type *key_type, *value_type;
-	u32 key_size, value_size;
 	u32 int_data;
 
-	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
-	if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
 		return -EINVAL;
 
 	int_data = *(u32 *)(key_type + 1);
-	/* bpf array can only take a u32 key.  This check makes
-	 * sure that the btf matches the attr used during map_create.
+	/* bpf array can only take a u32 key. This check makes sure
+	 * that the btf matches the attr used during map_create.
 	 */
-	if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
-	    BTF_INT_OFFSET(int_data))
-		return -EINVAL;
-
-	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
-	if (!value_type || value_size != map->value_size)
+	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
 		return -EINVAL;
 
 	return 0;
@@ -405,6 +398,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_lookup_elem = percpu_array_map_lookup_elem,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
+	.map_check_btf = array_map_check_btf,
 };
 
 static int fd_array_map_alloc_check(union bpf_attr *attr)
@@ -546,6 +540,7 @@ const struct bpf_map_ops prog_array_map_ops = {
 	.map_fd_put_ptr = prog_fd_array_put_ptr,
 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
 	.map_release_uref = bpf_fd_array_map_clear,
+	.map_check_btf = map_check_no_btf,
 };
 
 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -634,6 +629,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
 	.map_release = perf_event_fd_array_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 #ifdef CONFIG_CGROUPS
@@ -665,6 +661,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
+	.map_check_btf = map_check_no_btf,
 };
 #endif
 
@@ -749,4 +746,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 	.map_gen_lookup = array_of_map_gen_lookup,
+	.map_check_btf = map_check_no_btf,
 };
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0918d180f08..3b494941a44a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -555,6 +555,7 @@ const struct bpf_map_ops cpu_map_ops = {
 	.map_update_elem	= cpu_map_update_elem,
 	.map_lookup_elem	= cpu_map_lookup_elem,
 	.map_get_next_key	= cpu_map_get_next_key,
+	.map_check_btf		= map_check_no_btf,
 };
 
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d361fc1e3bf3..a7c6620d8389 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -484,6 +484,7 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_lookup_elem = dev_map_lookup_elem,
 	.map_update_elem = dev_map_update_elem,
 	.map_delete_elem = dev_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 static int dev_map_notification(struct notifier_block *notifier,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d6110042e0d9..04b8eda94e7d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1185,23 +1185,6 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
-static int htab_map_check_btf(const struct bpf_map *map, const struct btf *btf,
-			      u32 btf_key_id, u32 btf_value_id)
-{
-	const struct btf_type *key_type, *value_type;
-	u32 key_size, value_size;
-
-	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
-	if (!key_type || key_size != map->key_size)
-		return -EINVAL;
-
-	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
-	if (!value_type || value_size != map->value_size)
-		return -EINVAL;
-
-	return 0;
-}
-
 const struct bpf_map_ops htab_map_ops = {
 	.map_alloc_check = htab_map_alloc_check,
 	.map_alloc = htab_map_alloc,
@@ -1212,7 +1195,6 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
-	.map_check_btf = htab_map_check_btf,
 };
 
 const struct bpf_map_ops htab_lru_map_ops = {
@@ -1225,7 +1207,6 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_gen_lookup = htab_lru_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
-	.map_check_btf = htab_map_check_btf,
 };
 
 /* Called from eBPF program */
@@ -1452,4 +1433,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 	.map_gen_lookup = htab_of_map_gen_lookup,
+	.map_check_btf = map_check_no_btf,
 };
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index fc5b103512e7..2ada5e21dfa6 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -334,7 +334,8 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 	struct bpf_map *map = arg;
 
 	return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
-			     map->btf ? &bpffs_map_fops : &bpffs_obj_fops);
+			     bpf_map_support_seq_show(map) ?
+			     &bpffs_map_fops : &bpffs_obj_fops);
 }
 
 static struct dentry *
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index fc4e37f68f2a..22ad967d1e5f 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -246,6 +246,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
 	.map_lookup_elem = cgroup_storage_lookup_elem,
 	.map_update_elem = cgroup_storage_update_elem,
 	.map_delete_elem = cgroup_storage_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1603492c9cc7..9058317ba9de 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -10,11 +10,13 @@
  */
 
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <net/ipv6.h>
+#include <uapi/linux/btf.h>
 
 /* Intermediate node */
 #define LPM_TREE_NODE_FLAG_IM BIT(0)
@@ -686,6 +688,15 @@ free_stack:
 	return err;
 }
 
+static int trie_check_btf(const struct bpf_map *map,
+			  const struct btf_type *key_type,
+			  const struct btf_type *value_type)
+{
+	/* Keys must have struct bpf_lpm_trie_key embedded. */
+	return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
+	       -EINVAL : 0;
+}
+
 const struct bpf_map_ops trie_map_ops = {
 	.map_alloc = trie_alloc,
 	.map_free = trie_free,
@@ -693,4 +704,5 @@ const struct bpf_map_ops trie_map_ops = {
 	.map_lookup_elem = trie_lookup_elem,
 	.map_update_elem = trie_update_elem,
 	.map_delete_elem = trie_delete_elem,
+	.map_check_btf = trie_check_btf,
 };
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 0b38be5a955c..e376ffffebce 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2495,6 +2495,7 @@ const struct bpf_map_ops sock_map_ops = {
 	.map_update_elem = sock_map_update_elem,
 	.map_delete_elem = sock_map_delete_elem,
 	.map_release_uref = sock_map_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 const struct bpf_map_ops sock_hash_ops = {
@@ -2505,6 +2506,7 @@ const struct bpf_map_ops sock_hash_ops = {
 	.map_update_elem = sock_hash_update_elem,
 	.map_delete_elem = sock_hash_delete_elem,
 	.map_release_uref = sock_map_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b675a3f3d141..8061a439ef18 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -607,6 +607,7 @@ const struct bpf_map_ops stack_map_ops = {
 	.map_lookup_elem = stack_map_lookup_elem,
 	.map_update_elem = stack_map_update_elem,
 	.map_delete_elem = stack_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 static int __init stack_map_init(void)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 57f4d076141b..43727ed0d94a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -103,6 +103,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
 const struct bpf_map_ops bpf_map_offload_ops = {
 	.map_alloc = bpf_map_offload_map_alloc,
 	.map_free = bpf_map_offload_map_free,
+	.map_check_btf = map_check_no_btf,
 };
 
 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -455,6 +456,34 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
 	return 0;
 }
 
+int map_check_no_btf(const struct bpf_map *map,
+		     const struct btf_type *key_type,
+		     const struct btf_type *value_type)
+{
+	return -ENOTSUPP;
+}
+
+static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
+			 u32 btf_key_id, u32 btf_value_id)
+{
+	const struct btf_type *key_type, *value_type;
+	u32 key_size, value_size;
+	int ret = 0;
+
+	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+	if (!key_type || key_size != map->key_size)
+		return -EINVAL;
+
+	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
+	if (!value_type || value_size != map->value_size)
+		return -EINVAL;
+
+	if (map->ops->map_check_btf)
+		ret = map->ops->map_check_btf(map, key_type, value_type);
+
+	return ret;
+}
+
 #define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
@@ -489,8 +518,7 @@ static int map_create(union bpf_attr *attr)
 	atomic_set(&map->refcnt, 1);
 	atomic_set(&map->usercnt, 1);
 
-	if (bpf_map_support_seq_show(map) &&
-	    (attr->btf_key_type_id || attr->btf_value_type_id)) {
+	if (attr->btf_key_type_id || attr->btf_value_type_id) {
 		struct btf *btf;
 
 		if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
@@ -504,8 +532,8 @@ static int map_create(union bpf_attr *attr)
 			goto free_map_nouncharge;
 		}
 
-		err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
-					      attr->btf_value_type_id);
+		err = map_check_btf(map, btf, attr->btf_key_type_id,
+				    attr->btf_value_type_id);
 		if (err) {
 			btf_put(btf);
 			goto free_map_nouncharge;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index b3c557476a8d..4ddf61e158f6 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -227,6 +227,5 @@ const struct bpf_map_ops xsk_map_ops = {
 	.map_lookup_elem = xsk_map_lookup_elem,
 	.map_update_elem = xsk_map_update_elem,
 	.map_delete_elem = xsk_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
-
-
-- 
cgit v1.2.3


From 7723628101aaeb1d723786747529b4ea65c5b5c5 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sun, 12 Aug 2018 10:49:27 -0700
Subject: bpf: Introduce bpf_skb_ancestor_cgroup_id helper

== Problem description ==

It's useful to be able to identify cgroup associated with skb in TC so
that a policy can be applied to this skb, and existing bpf_skb_cgroup_id
helper can help with this.

Though in real life cgroup hierarchy and hierarchy to apply a policy to
don't map 1:1.

It's often the case that there is a container and corresponding cgroup,
but there are many more sub-cgroups inside container, e.g. because it's
delegated to containerized application to control resources for its
subsystems, or to separate application inside container from infra that
belongs to containerization system (e.g. sshd).

At the same time it may be useful to apply a policy to container as a
whole.

If multiple containers like this are run on a host (what is often the
case) and many of them have sub-cgroups, it may not be possible to apply
per-container policy in TC with existing helpers such as
bpf_skb_under_cgroup or bpf_skb_cgroup_id:

* bpf_skb_cgroup_id will return id of immediate cgroup associated with
  skb, i.e. if it's a sub-cgroup inside container, it can't be used to
  identify container's cgroup;

* bpf_skb_under_cgroup can work only with one cgroup and doesn't scale,
  i.e. if there are N containers on a host and a policy has to be
  applied to M of them (0 <= M <= N), it'd require M calls to
  bpf_skb_under_cgroup, and, if M changes, it'd require to rebuild &
  load new BPF program.

== Solution ==

The patch introduces new helper bpf_skb_ancestor_cgroup_id that can be
used to get id of cgroup v2 that is an ancestor of cgroup associated
with skb at specified level of cgroup hierarchy.

That way admin can place all containers on one level of cgroup hierarchy
(what is a good practice in general and already used in many
configurations) and identify specific cgroup on this level no matter
what sub-cgroup skb is associated with.

E.g. if there is a cgroup hierarchy:
  root/
  root/container1/
  root/container1/app11/
  root/container1/app11/sub-app-a/
  root/container1/app12/
  root/container2/
  root/container2/app21/
  root/container2/app22/
  root/container2/app22/sub-app-b/

, then having skb associated with root/container1/app11/sub-app-a/ it's
possible to get ancestor at level 1, what is container1 and apply policy
for this container, or apply another policy if it's container2.

Policies can be kept e.g. in a hash map where key is a container cgroup
id and value is an action.

Levels where container cgroups are created are usually known in advance
whether cgroup hierarchy inside container may be hard to predict
especially in case when its creation is delegated to containerized
application.

== Implementation details ==

The helper gets ancestor by walking parents up to specified level.

Another option would be to get different kind of "id" from
cgroup->ancestor_ids[level] and use it with idr_find() to get struct
cgroup for ancestor. But that would require radix lookup what doesn't
seem to be better (at least it's not obviously better).

Format of return value of the new helper is same as that of
bpf_skb_cgroup_id.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/cgroup.h   | 30 ++++++++++++++++++++++++++++++
 include/uapi/linux/bpf.h | 21 ++++++++++++++++++++-
 net/core/filter.c        | 28 ++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c9fdf6f57913..32c553556bbd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -553,6 +553,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
 	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * cgroup_ancestor - find ancestor of cgroup
+ * @cgrp: cgroup to find ancestor of
+ * @ancestor_level: level of ancestor to find starting from root
+ *
+ * Find ancestor of cgroup at specified level starting from root if it exists
+ * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
+ * @ancestor_level.
+ *
+ * This function is safe to call as long as @cgrp is accessible.
+ */
+static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
+					     int ancestor_level)
+{
+	struct cgroup *ptr;
+
+	if (cgrp->level < ancestor_level)
+		return NULL;
+
+	for (ptr = cgrp;
+	     ptr && ptr->level > ancestor_level;
+	     ptr = cgroup_parent(ptr))
+		;
+
+	if (ptr && ptr->level == ancestor_level)
+		return ptr;
+
+	return NULL;
+}
+
 /**
  * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
  * @task: the task to be tested
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3102a2a23c31..66917a4eba27 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2093,6 +2093,24 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *skb*, then return value will be same as that
+ *		of **bpf_skb_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *skb*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_skb_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
  * u64 bpf_get_current_cgroup_id(void)
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
@@ -2207,7 +2225,8 @@ union bpf_attr {
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
 	FN(get_local_storage),		\
-	FN(sk_select_reuseport),
+	FN(sk_select_reuseport),	\
+	FN(skb_ancestor_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 22906b31d43f..15b9d2df92ca 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3778,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
 	.ret_type       = RET_INTEGER,
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
+
+BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
+	   ancestor_level)
+{
+	struct sock *sk = skb_to_full_sk(skb);
+	struct cgroup *ancestor;
+	struct cgroup *cgrp;
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	ancestor = cgroup_ancestor(cgrp, ancestor_level);
+	if (!ancestor)
+		return 0;
+
+	return ancestor->kn->id.id;
+}
+
+static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
+	.func           = bpf_skb_ancestor_cgroup_id,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
 #endif
 
 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -4966,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_SOCK_CGROUP_DATA
 	case BPF_FUNC_skb_cgroup_id:
 		return &bpf_skb_cgroup_id_proto;
+	case BPF_FUNC_skb_ancestor_cgroup_id:
+		return &bpf_skb_ancestor_cgroup_id_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
-- 
cgit v1.2.3


From 62b0194368147def8c5a77ce604a125d620fc582 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@dabbelt.com>
Date: Sat, 4 Aug 2018 10:23:19 +0200
Subject: clocksource: new RISC-V SBI timer driver

The RISC-V ISA defines a per-hart real-time clock and timer, which is
present on all systems.  The clock is accessed via the 'rdtime'
pseudo-instruction (which reads a CSR), and the timer is set via an SBI
call.

Contains various improvements from Atish Patra <atish.patra@wdc.com>.

Signed-off-by: Dmitriy Cherkasov <dmitriy@oss-tech.org>
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
[hch: remove dead code, add SPDX tags, used riscv_of_processor_hart(),
 minor cleanups, merged  hotplug cpu support and other improvements
 from Atish]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/smp.h      |   3 --
 arch/riscv/kernel/irq.c           |   3 ++
 arch/riscv/kernel/smpboot.c       |   1 -
 arch/riscv/kernel/time.c          |   9 +---
 drivers/clocksource/Kconfig       |  11 ++++
 drivers/clocksource/Makefile      |   1 +
 drivers/clocksource/riscv_timer.c | 105 ++++++++++++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h        |   1 +
 8 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 drivers/clocksource/riscv_timer.c

(limited to 'include/linux')

diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index c9395fff246f..36016845461d 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -24,9 +24,6 @@
 
 #ifdef CONFIG_SMP
 
-/* SMP initialization hook for setup_arch */
-void __init init_clockevent(void);
-
 /* SMP initialization hook for setup_arch */
 void __init setup_smp(void);
 
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index ab5f3e22c7cc..0cfac48a1272 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -30,6 +30,9 @@ asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long cause)
 
 	irq_enter();
 	switch (cause & ~INTERRUPT_CAUSE_FLAG) {
+	case INTERRUPT_CAUSE_TIMER:
+		riscv_timer_interrupt();
+		break;
 #ifdef CONFIG_SMP
 	case INTERRUPT_CAUSE_SOFTWARE:
 		/*
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index f741458c5a3f..56abab6a9812 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -104,7 +104,6 @@ asmlinkage void __init smp_callin(void)
 	current->active_mm = mm;
 
 	trap_init();
-	init_clockevent();
 	notify_cpu_starting(smp_processor_id());
 	set_cpu_online(smp_processor_id(), 1);
 	local_flush_tlb_all();
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 0df9b2cbd645..1911c8f6b8a6 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -18,12 +18,6 @@
 
 unsigned long riscv_timebase;
 
-void __init init_clockevent(void)
-{
-	timer_probe();
-	csr_set(sie, SIE_STIE);
-}
-
 void __init time_init(void)
 {
 	struct device_node *cpu;
@@ -35,6 +29,5 @@ void __init time_init(void)
 	riscv_timebase = prop;
 
 	lpj_fine = riscv_timebase / HZ;
-
-	init_clockevent();
+	timer_probe();
 }
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index dec0dd88ec15..a11f4ba98b05 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -609,4 +609,15 @@ config ATCPIT100_TIMER
 	help
 	  This option enables support for the Andestech ATCPIT100 timers.
 
+config RISCV_TIMER
+	bool "Timer for the RISC-V platform"
+	depends on RISCV
+	default y
+	select TIMER_PROBE
+	select TIMER_OF
+	help
+	  This enables the per-hart timer built into all RISC-V systems, which
+	  is accessed via both the SBI and the rdcycle instruction.  This is
+	  required for all RISC-V systems.
+
 endmenu
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 00caf37e52f9..ded31f720bd9 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -78,3 +78,4 @@ obj-$(CONFIG_H8300_TPU)			+= h8300_tpu.o
 obj-$(CONFIG_CLKSRC_ST_LPC)		+= clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)		+= numachip.o
 obj-$(CONFIG_ATCPIT100_TIMER)		+= timer-atcpit100.o
+obj-$(CONFIG_RISCV_TIMER)		+= riscv_timer.o
diff --git a/drivers/clocksource/riscv_timer.c b/drivers/clocksource/riscv_timer.c
new file mode 100644
index 000000000000..4e8b347e43e2
--- /dev/null
+++ b/drivers/clocksource/riscv_timer.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <asm/sbi.h>
+
+/*
+ * All RISC-V systems have a timer attached to every hart.  These timers can be
+ * read by the 'rdcycle' pseudo instruction, and can use the SBI to setup
+ * events.  In order to abstract the architecture-specific timer reading and
+ * setting functions away from the clock event insertion code, we provide
+ * function pointers to the clockevent subsystem that perform two basic
+ * operations: rdtime() reads the timer on the current CPU, and
+ * next_event(delta) sets the next timer event to 'delta' cycles in the future.
+ * As the timers are inherently a per-cpu resource, these callbacks perform
+ * operations on the current hart.  There is guaranteed to be exactly one timer
+ * per hart on all RISC-V systems.
+ */
+
+static int riscv_clock_next_event(unsigned long delta,
+		struct clock_event_device *ce)
+{
+	csr_set(sie, SIE_STIE);
+	sbi_set_timer(get_cycles64() + delta);
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = {
+	.name			= "riscv_timer_clockevent",
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.rating			= 100,
+	.set_next_event		= riscv_clock_next_event,
+};
+
+/*
+ * It is guaranteed that all the timers across all the harts are synchronized
+ * within one tick of each other, so while this could technically go
+ * backwards when hopping between CPUs, practically it won't happen.
+ */
+static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs)
+{
+	return get_cycles64();
+}
+
+static DEFINE_PER_CPU(struct clocksource, riscv_clocksource) = {
+	.name		= "riscv_clocksource",
+	.rating		= 300,
+	.mask		= CLOCKSOURCE_MASK(BITS_PER_LONG),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.read		= riscv_clocksource_rdtime,
+};
+
+static int riscv_timer_starting_cpu(unsigned int cpu)
+{
+	struct clock_event_device *ce = per_cpu_ptr(&riscv_clock_event, cpu);
+
+	ce->cpumask = cpumask_of(cpu);
+	clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
+
+	csr_set(sie, SIE_STIE);
+	return 0;
+}
+
+static int riscv_timer_dying_cpu(unsigned int cpu)
+{
+	csr_clear(sie, SIE_STIE);
+	return 0;
+}
+
+/* called directly from the low-level interrupt handler */
+void riscv_timer_interrupt(void)
+{
+	struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event);
+
+	csr_clear(sie, SIE_STIE);
+	evdev->event_handler(evdev);
+}
+
+static int __init riscv_timer_init_dt(struct device_node *n)
+{
+	int cpu_id = riscv_of_processor_hart(n), error;
+	struct clocksource *cs;
+
+	if (cpu_id != smp_processor_id())
+		return 0;
+
+	cs = per_cpu_ptr(&riscv_clocksource, cpu_id);
+	clocksource_register_hz(cs, riscv_timebase);
+
+	error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING,
+			 "clockevents/riscv/timer:starting",
+			 riscv_timer_starting_cpu, riscv_timer_dying_cpu);
+	if (error)
+		pr_err("RISCV timer register failed [%d] for cpu = [%d]\n",
+		       error, cpu_id);
+	return error;
+}
+
+TIMER_OF_DECLARE(riscv_timer, "riscv", riscv_timer_init_dt);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..554c27f6cfbd 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -125,6 +125,7 @@ enum cpuhp_state {
 	CPUHP_AP_MARCO_TIMER_STARTING,
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 	CPUHP_AP_ARC_TIMER_STARTING,
+	CPUHP_AP_RISCV_TIMER_STARTING,
 	CPUHP_AP_KVM_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_STARTING,
-- 
cgit v1.2.3


From 9af18e56d43ca4864ce65c3542c513827c2697de Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sun, 12 Aug 2018 09:14:03 -0400
Subject: cpumask: make cpumask_next_wrap available without smp

The kbuild robot shows build failure on machines without CONFIG_SMP:

  drivers/net/virtio_net.c:1916:10: error:
    implicit declaration of function 'cpumask_next_wrap'

cpumask_next_wrap is exported from lib/cpumask.o, which has

    lib-$(CONFIG_SMP) += cpumask.o

same as other functions, also define it as static inline in the
NR_CPUS==1 branch in include/linux/cpumask.h.

If wrap is true and next == start, return nr_cpumask_bits, or 1.
Else wrap across the range of valid cpus, here [0].

Fixes: 2ca653d607ce ("virtio_net: Stripe queue affinities across cores.")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Tested-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 57f20a0a7794..147bdec42215 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -159,6 +159,13 @@ static inline unsigned int cpumask_next_and(int n,
 	return n+1;
 }
 
+static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask,
+					     int start, bool wrap)
+{
+	/* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */
+	return (wrap && n == 0);
+}
+
 /* cpu must be a valid cpu, ie 0, so there's no other choice. */
 static inline unsigned int cpumask_any_but(const struct cpumask *mask,
 					   unsigned int cpu)
-- 
cgit v1.2.3


From cf916ffbe0c628bb072ea829f300cff1874162ce Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Sun, 29 Apr 2018 09:17:34 -0500
Subject: net/mlx5: Improve argument name for add flow API

The last argument to mlx5_add_flow_rules passes the number of
destinations in the struct pointed to by the dest arg. Change the name
to better reflect this fact.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 8 ++++----
 include/linux/mlx5/fs.h                           | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a21df24b695e..261cb6aacf12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1876,7 +1876,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		    struct mlx5_flow_spec *spec,
 		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
-		    int dest_num)
+		    int num_dest)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_flow_destination gen_dest = {};
@@ -1889,7 +1889,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 	if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!fwd_next_prio_supported(ft))
 			return ERR_PTR(-EOPNOTSUPP);
-		if (dest_num)
+		if (num_dest)
 			return ERR_PTR(-EINVAL);
 		mutex_lock(&root->chain_lock);
 		next_ft = find_next_chained_ft(prio);
@@ -1897,7 +1897,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 			gen_dest.ft = next_ft;
 			dest = &gen_dest;
-			dest_num = 1;
+			num_dest = 1;
 			flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 		} else {
 			mutex_unlock(&root->chain_lock);
@@ -1905,7 +1905,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		}
 	}
 
-	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num);
+	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
 
 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!IS_ERR_OR_NULL(handle) &&
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index c40f2fc68655..71fb503b2b52 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -177,7 +177,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		    struct mlx5_flow_spec *spec,
 		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
-		    int dest_num);
+		    int num_dest);
 void mlx5_del_flow_rules(struct mlx5_flow_handle *fr);
 
 int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
-- 
cgit v1.2.3


From e4648aa4f98a87cf0a83f73a5864cede073053a0 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 13 Aug 2018 15:33:01 -0400
Subject: NFS recover from destination server reboot for copies

Mark the destination state to indicate a server-side copy is
happening. On detecting a reboot and recovering open state check
if any state is engaged in a server-side copy, if so, find the
copy and mark it and then signal the waiting thread. Upon wakeup,
if copy was marked then propage EAGAIN to the nfsd_copy_file_range
and restart the copy from scratch.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42proc.c     | 16 +++++++++++++---
 fs/nfs/nfs4_fs.h       |  3 +++
 fs/nfs/nfs4file.c      |  9 +++++++--
 fs/nfs/nfs4state.c     | 16 ++++++++++++++++
 include/linux/nfs_fs.h |  2 ++
 5 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d158526d1044..ac5b784a1de0 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -140,6 +140,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	struct nfs4_copy_state *copy;
 	int status = NFS4_OK;
 	bool found_pending = false;
+	struct nfs_open_context *ctx = nfs_file_open_context(dst);
 
 	spin_lock(&server->nfs_client->cl_lock);
 	list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids,
@@ -163,6 +164,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	}
 	memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
 	init_completion(&copy->completion);
+	copy->parent_state = ctx->state;
 
 	list_add_tail(&copy->copies, &server->ss_copies);
 	spin_unlock(&server->nfs_client->cl_lock);
@@ -172,15 +174,20 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	list_del_init(&copy->copies);
 	spin_unlock(&server->nfs_client->cl_lock);
 	if (status == -ERESTARTSYS) {
-		nfs42_do_offload_cancel_async(dst, &copy->stateid);
-		kfree(copy);
-		return status;
+		goto out_cancel;
+	} else if (copy->flags) {
+		status = -EAGAIN;
+		goto out_cancel;
 	}
 out:
 	res->write_res.count = copy->count;
 	memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
 	status = -copy->error;
 
+	kfree(copy);
+	return status;
+out_cancel:
+	nfs42_do_offload_cancel_async(dst, &copy->stateid);
 	kfree(copy);
 	return status;
 }
@@ -254,6 +261,9 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 		if (!res->commit_res.verf)
 			return -ENOMEM;
 	}
+	set_bit(NFS_CLNT_DST_SSC_COPY_STATE,
+		&dst_lock->open_context->state->flags);
+
 	status = nfs4_call_sync(server->client, server, &msg,
 				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e87d414c40d0..542b2fce0447 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -163,6 +163,9 @@ enum {
 	NFS_STATE_RECOVERY_FAILED,	/* OPEN stateid state recovery failed */
 	NFS_STATE_MAY_NOTIFY_LOCK,	/* server may CB_NOTIFY_LOCK */
 	NFS_STATE_CHANGE_WAIT,		/* A state changing operation is outstanding */
+#ifdef CONFIG_NFS_V4_2
+	NFS_CLNT_DST_SSC_COPY_STATE,    /* dst server open state on client*/
+#endif /* CONFIG_NFS_V4_2 */
 };
 
 struct nfs4_state {
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 6c03d6b570b2..4288a6ecaf75 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,10 +133,15 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
 				    struct file *file_out, loff_t pos_out,
 				    size_t count, unsigned int flags)
 {
+	ssize_t ret;
+
 	if (file_inode(file_in) == file_inode(file_out))
 		return -EINVAL;
-
-	return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+retry:
+	ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+	if (ret == -EAGAIN)
+		goto retry;
+	return ret;
 }
 
 static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f1b27e895a94..b6882e09d0f4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1589,6 +1589,22 @@ restart:
 				}
 				clear_bit(NFS_STATE_RECLAIM_NOGRACE,
 					&state->flags);
+#ifdef CONFIG_NFS_V4_2
+				if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) {
+					struct nfs4_copy_state *copy;
+
+					spin_lock(&sp->so_server->nfs_client->cl_lock);
+					list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
+						if (memcmp(&state->stateid.other, &copy->parent_state->stateid.other, NFS4_STATEID_SIZE))
+							continue;
+						copy->flags = 1;
+						complete(&copy->completion);
+						printk("AGLO: server rebooted waking up the copy\n");
+						break;
+					}
+					spin_unlock(&sp->so_server->nfs_client->cl_lock);
+				}
+#endif /* CONFIG_NFS_V4_2 */
 				nfs4_put_open_state(state);
 				spin_lock(&sp->so_lock);
 				goto restart;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 645ad8e342f6..a0831e9d19c9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -192,6 +192,8 @@ struct nfs4_copy_state {
 	uint64_t		count;
 	struct nfs_writeverf	verf;
 	int			error;
+	int			flags;
+	struct nfs4_state	*parent_state;
 };
 
 /*
-- 
cgit v1.2.3


From 6d43743e9079ac0531b60cde7eadd0f042873344 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Thu, 9 Aug 2018 09:48:52 +0530
Subject: Uprobe: Additional argument arch_uprobe to uprobe_write_opcode()

Add addition argument 'arch_uprobe' to uprobe_write_opcode().
We need this in later set of patches.

Link: http://lkml.kernel.org/r/20180809041856.1547-3-ravi.bangoria@linux.ibm.com

Reviewed-by: Song Liu <songliubraving@fb.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arm/probes/uprobes/core.c | 2 +-
 arch/mips/kernel/uprobes.c     | 2 +-
 include/linux/uprobes.h        | 2 +-
 kernel/events/uprobes.c        | 9 +++++----
 4 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c
index d1329f1ba4e4..bf992264060e 100644
--- a/arch/arm/probes/uprobes/core.c
+++ b/arch/arm/probes/uprobes/core.c
@@ -32,7 +32,7 @@ bool is_swbp_insn(uprobe_opcode_t *insn)
 int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	     unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr,
+	return uprobe_write_opcode(auprobe, mm, vaddr,
 		   __opcode_to_mem_arm(auprobe->bpinsn));
 }
 
diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index f7a0645ccb82..4aaff3b3175c 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -224,7 +224,7 @@ unsigned long arch_uretprobe_hijack_return_addr(
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0a294e950df8..bb9d2084af03 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -121,7 +121,7 @@ extern bool is_swbp_insn(uprobe_opcode_t *insn);
 extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
-extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 471eac896635..c0418ba52ba8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -299,8 +299,8 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * Called with mm->mmap_sem held for write.
  * Return 0 (success) or a negative errno.
  */
-int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
-			uprobe_opcode_t opcode)
+int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
+			unsigned long vaddr, uprobe_opcode_t opcode)
 {
 	struct page *old_page, *new_page;
 	struct vm_area_struct *vma;
@@ -351,7 +351,7 @@ put_old:
  */
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 /**
@@ -366,7 +366,8 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
 int __weak
 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn);
+	return uprobe_write_opcode(auprobe, mm, vaddr,
+			*(uprobe_opcode_t *)&auprobe->insn);
 }
 
 static struct uprobe *get_uprobe(struct uprobe *uprobe)
-- 
cgit v1.2.3


From b72ae8cac0caff86fe414fceb940655c2d1371c9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sun, 29 Jul 2018 16:16:56 +0300
Subject: PCI: Add PCI_DEVICE_DATA() macro to fully describe device ID entry

There are a lot of examples in the kernel where PCI_VDEVICE() is used and
still looks not so convenient due to additional driver_data field attached.

Introduce PCI_DEVICE_DATA() macro to fully describe device ID entry in
shortest possible form. For example,

  before:

    { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
      (kernel_ulong_t) &dwc3_pci_mrfld_properties, },

  after:

    { PCI_DEVICE_DATA(INTEL, MRFLD, &dwc3_pci_mrfld_properties) },

Drivers can be converted later on in independent way.

While here, remove the unused macro with the same name from Ralink wireless
driver.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kalle Valo <kvalo@codeaurora.org>	# for rt2x00
---
 drivers/net/wireless/ralink/rt2x00/rt2x00pci.h |  6 ------
 include/linux/pci.h                            | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h b/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h
index bc0ca5f58f38..283e2e607bba 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h
@@ -27,12 +27,6 @@
 #include <linux/io.h>
 #include <linux/pci.h>
 
-/*
- * This variable should be used with the
- * pci_driver structure initialization.
- */
-#define PCI_DEVICE_DATA(__ops)	.driver_data = (kernel_ulong_t)(__ops)
-
 /*
  * PCI driver handlers.
  */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..bf3665c534c2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -819,6 +819,21 @@ struct pci_driver {
 	.vendor = PCI_VENDOR_ID_##vend, .device = (dev), \
 	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
 
+/**
+ * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form
+ * @vend: the vendor name (without PCI_VENDOR_ID_ prefix)
+ * @dev: the device name (without PCI_DEVICE_ID_<vend>_ prefix)
+ * @data: the driver data to be filled
+ *
+ * This macro is used to create a struct pci_device_id that matches a
+ * specific PCI device.  The subvendor, and subdevice fields will be set
+ * to PCI_ANY_ID.
+ */
+#define PCI_DEVICE_DATA(vend, dev, data) \
+	.vendor = PCI_VENDOR_ID_##vend, .device = PCI_DEVICE_ID_##vend##_##dev, \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0, \
+	.driver_data = (kernel_ulong_t)(data)
+
 enum {
 	PCI_REASSIGN_ALL_RSRC	= 0x00000001,	/* Ignore firmware setup */
 	PCI_REASSIGN_ALL_BUS	= 0x00000002,	/* Reassign all bus numbers */
-- 
cgit v1.2.3


From 6855dc41b24619c3d1de3dbd27dd0546b0e45272 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 6 Jun 2018 15:54:11 +0300
Subject: x86: Add entry trampolines to kcore

Without program headers for PTI entry trampoline pages, the trampoline
virtual addresses do not map to anything.

Example before:

 sudo gdb --quiet vmlinux /proc/kcore
 Reading symbols from vmlinux...done.
 [New process 1]
 Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0 root=UUID=a6096b83-b763-4101-807e-f33daff63233'.
 #0  0x0000000000000000 in irq_stack_union ()
 (gdb) x /21ib 0xfffffe0000006000
    0xfffffe0000006000:  Cannot access memory at address 0xfffffe0000006000
 (gdb) quit

After:

 sudo gdb --quiet vmlinux /proc/kcore
 [sudo] password for ahunter:
 Reading symbols from vmlinux...done.
 [New process 1]
 Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0-fix-4-00005-gd6e65a8b4072 root=UUID=a6096b83-b7'.
 #0  0x0000000000000000 in irq_stack_union ()
 (gdb) x /21ib 0xfffffe0000006000
    0xfffffe0000006000:  swapgs
    0xfffffe0000006003:  mov    %rsp,-0x3e12(%rip)        # 0xfffffe00000021f8
    0xfffffe000000600a:  xchg   %ax,%ax
    0xfffffe000000600c:  mov    %cr3,%rsp
    0xfffffe000000600f:  bts    $0x3f,%rsp
    0xfffffe0000006014:  and    $0xffffffffffffe7ff,%rsp
    0xfffffe000000601b:  mov    %rsp,%cr3
    0xfffffe000000601e:  mov    -0x3019(%rip),%rsp        # 0xfffffe000000300c
    0xfffffe0000006025:  pushq  $0x2b
    0xfffffe0000006027:  pushq  -0x3e35(%rip)        # 0xfffffe00000021f8
    0xfffffe000000602d:  push   %r11
    0xfffffe000000602f:  pushq  $0x33
    0xfffffe0000006031:  push   %rcx
    0xfffffe0000006032:  push   %rdi
    0xfffffe0000006033:  mov    $0xffffffff91a00010,%rdi
    0xfffffe000000603a:  callq  0xfffffe0000006046
    0xfffffe000000603f:  pause
    0xfffffe0000006041:  lfence
    0xfffffe0000006044:  jmp    0xfffffe000000603f
    0xfffffe0000006046:  mov    %rdi,(%rsp)
    0xfffffe000000604a:  retq
 (gdb) quit

In addition, entry trampolines all map to the same page.  Represent that
by giving the corresponding program headers in kcore the same offset.

This has the benefit that, when perf tools uses /proc/kcore as a source
for kernel object code, samples from different CPU trampolines are
aggregated together.  Note, such aggregation is normal for profiling
i.e. people want to profile the object code, not every different virtual
address the object code might be mapped to (across different processes
for example).

Notes by PeterZ:

This also adds the KCORE_REMAP functionality.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Link: http://lkml.kernel.org/r/1528289651-4113-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/mm/cpu_entry_area.c | 10 ++++++++++
 fs/proc/kcore.c              |  7 +++++--
 include/linux/kcore.h        | 13 +++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index fab49fd5190f..076ebdce9bd4 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -3,6 +3,7 @@
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
 #include <linux/kallsyms.h>
+#include <linux/kcore.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/pgtable.h>
@@ -14,6 +15,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+static DEFINE_PER_CPU(struct kcore_list, kcore_entry_trampoline);
 #endif
 
 struct cpu_entry_area *get_cpu_entry_area(int cpu)
@@ -147,6 +149,14 @@ static void __init setup_cpu_entry_area(int cpu)
 
 	cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
 		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+	/*
+	 * The cpu_entry_area alias addresses are not in the kernel binary
+	 * so they do not show up in /proc/kcore normally.  This adds entries
+	 * for them manually.
+	 */
+	kclist_add_remap(&per_cpu(kcore_entry_trampoline, cpu),
+			 _entry_trampoline,
+			 &get_cpu_entry_area(cpu)->entry_trampoline, PAGE_SIZE);
 #endif
 	percpu_setup_debug_store(cpu);
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e64ecb9f2720..00282f134336 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -383,8 +383,11 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 		phdr->p_type	= PT_LOAD;
 		phdr->p_flags	= PF_R|PF_W|PF_X;
 		phdr->p_offset	= kc_vaddr_to_offset(m->addr) + dataoff;
-		phdr->p_vaddr	= (size_t)m->addr;
-		if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+		if (m->type == KCORE_REMAP)
+			phdr->p_vaddr	= (size_t)m->vaddr;
+		else
+			phdr->p_vaddr	= (size_t)m->addr;
+		if (m->type == KCORE_RAM || m->type == KCORE_TEXT || m->type == KCORE_REMAP)
 			phdr->p_paddr	= __pa(m->addr);
 		else
 			phdr->p_paddr	= (elf_addr_t)-1;
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8de55e4b5ee9..bc088ef96358 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -12,11 +12,13 @@ enum kcore_type {
 	KCORE_VMEMMAP,
 	KCORE_USER,
 	KCORE_OTHER,
+	KCORE_REMAP,
 };
 
 struct kcore_list {
 	struct list_head list;
 	unsigned long addr;
+	unsigned long vaddr;
 	size_t size;
 	int type;
 };
@@ -36,11 +38,22 @@ struct vmcoredd_node {
 
 #ifdef CONFIG_PROC_KCORE
 extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+static inline
+void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
+{
+	m->vaddr = (unsigned long)vaddr;
+	kclist_add(m, addr, sz, KCORE_REMAP);
+}
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
+
+static inline
+void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
+{
+}
 #endif
 
 #endif /* _LINUX_KCORE_H */
-- 
cgit v1.2.3


From 2538fb89b8f4ee9e1c1759cfb3c7989d9ef1f6e7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 14 Aug 2018 16:48:50 -0700
Subject: PCI: Limit config space size for Netronome NFP5000

Like the NFP4000 and NFP6000, the NFP5000 as an erratum where reading/
writing to PCI config space addresses above 0x600 can cause the NFP to
generate PCIe completion timeouts.

Limit the NFP5000's PF's config space size to 0x600 bytes as is already
done for the NFP4000 and NFP6000.

The NFP5000's VF is 0x6003 (PCI_DEVICE_ID_NETRONOME_NFP6000_VF), the same
device ID as the NFP6000's VF.  Thus, its config space is already limited
by the existing use of quirk_nfp6000().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Tony Egan <tony.egan@netronome.com>
---
 drivers/pci/quirks.c    | 1 +
 include/linux/pci_ids.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 502275c092ae..7743cd56b89a 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -460,6 +460,7 @@ static void quirk_nfp6000(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP4000,	quirk_nfp6000);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP6000,	quirk_nfp6000);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP5000,	quirk_nfp6000);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP6000_VF,	quirk_nfp6000);
 
 /*  On IBM Crocodile ipr SAS adapters, expand BAR to system page size */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 29502238e510..381fc009551d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2541,6 +2541,7 @@
 #define PCI_DEVICE_ID_NETRONOME_NFP3200	0x3200
 #define PCI_DEVICE_ID_NETRONOME_NFP3240	0x3240
 #define PCI_DEVICE_ID_NETRONOME_NFP4000	0x4000
+#define PCI_DEVICE_ID_NETRONOME_NFP5000	0x5000
 #define PCI_DEVICE_ID_NETRONOME_NFP6000	0x6000
 #define PCI_DEVICE_ID_NETRONOME_NFP6000_VF	0x6003
 
-- 
cgit v1.2.3


From 30107fa6908b6c2747ee9100b40af813f99483c3 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Fri, 3 Aug 2018 14:08:14 +0300
Subject: mfd: bd71837: Core driver for ROHM BD71837 PMIC

ROHM BD71837 PMIC MFD driver providing interrupts and support
for three subsystems:
- clk
- Regulators
- input/power-key

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig              |  13 ++
 drivers/mfd/Makefile             |   1 +
 drivers/mfd/rohm-bd718x7.c       | 211 +++++++++++++++++++++++++
 include/linux/mfd/rohm-bd718x7.h | 332 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 557 insertions(+)
 create mode 100644 drivers/mfd/rohm-bd718x7.c
 create mode 100644 include/linux/mfd/rohm-bd718x7.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f3fa516011ec..11841f4b7b2b 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1817,6 +1817,19 @@ config MFD_STW481X
 	  in various ST Microelectronics and ST-Ericsson embedded
 	  Nomadik series.
 
+config MFD_ROHM_BD718XX
+	tristate "ROHM BD71837 Power Management IC"
+	depends on I2C=y
+	depends on OF
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	select MFD_CORE
+	help
+	  Select this option to get support for the ROHM BD71837
+	  Power Management ICs. BD71837 is designed to power processors like
+	  NXP i.MX8. It contains 8 BUCK outputs and 7 LDOs, voltage monitoring
+	  and emergency shut down as well as 32,768KHz clock output.
+
 config MFD_STM32_LPTIMER
 	tristate "Support for STM32 Low-Power Timer"
 	depends on (ARCH_STM32 && OF) || COMPILE_TEST
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2852a6042ecf..5856a9489cbd 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -239,4 +239,5 @@ obj-$(CONFIG_MFD_STM32_TIMERS) 	+= stm32-timers.o
 obj-$(CONFIG_MFD_MXS_LRADC)     += mxs-lradc.o
 obj-$(CONFIG_MFD_SC27XX_PMIC)	+= sprd-sc27xx-spi.o
 obj-$(CONFIG_RAVE_SP_CORE)	+= rave-sp.o
+obj-$(CONFIG_MFD_ROHM_BD718XX)	+= rohm-bd718x7.o
 
diff --git a/drivers/mfd/rohm-bd718x7.c b/drivers/mfd/rohm-bd718x7.c
new file mode 100644
index 000000000000..75c8ec659547
--- /dev/null
+++ b/drivers/mfd/rohm-bd718x7.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+//
+// Copyright (C) 2018 ROHM Semiconductors
+//
+// ROHM BD71837MWV PMIC driver
+//
+// Datasheet available from
+// https://www.rohm.com/datasheet/BD71837MWV/bd71837mwv-e
+
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/rohm-bd718x7.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+/*
+ * gpio_keys.h requires definiton of bool. It is brought in
+ * by above includes. Keep this as last until gpio_keys.h gets fixed.
+ */
+#include <linux/gpio_keys.h>
+
+static const u8 supported_revisions[] = { 0xA2 /* BD71837 */ };
+
+static struct gpio_keys_button button = {
+	.code = KEY_POWER,
+	.gpio = -1,
+	.type = EV_KEY,
+};
+
+static struct gpio_keys_platform_data bd718xx_powerkey_data = {
+	.buttons = &button,
+	.nbuttons = 1,
+	.name = "bd718xx-pwrkey",
+};
+
+static struct mfd_cell bd71837_mfd_cells[] = {
+	{
+		.name = "gpio-keys",
+		.platform_data = &bd718xx_powerkey_data,
+		.pdata_size = sizeof(bd718xx_powerkey_data),
+	},
+	{ .name = "bd71837-clk", },
+	{ .name = "bd71837-pmic", },
+};
+
+static const struct regmap_irq bd71837_irqs[] = {
+	REGMAP_IRQ_REG(BD71837_INT_SWRST, 0, BD71837_INT_SWRST_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_PWRBTN_S, 0, BD71837_INT_PWRBTN_S_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_PWRBTN_L, 0, BD71837_INT_PWRBTN_L_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_PWRBTN, 0, BD71837_INT_PWRBTN_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_WDOG, 0, BD71837_INT_WDOG_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_ON_REQ, 0, BD71837_INT_ON_REQ_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_STBY_REQ, 0, BD71837_INT_STBY_REQ_MASK),
+};
+
+static struct regmap_irq_chip bd71837_irq_chip = {
+	.name = "bd71837-irq",
+	.irqs = bd71837_irqs,
+	.num_irqs = ARRAY_SIZE(bd71837_irqs),
+	.num_regs = 1,
+	.irq_reg_stride = 1,
+	.status_base = BD71837_REG_IRQ,
+	.mask_base = BD71837_REG_MIRQ,
+	.ack_base = BD71837_REG_IRQ,
+	.init_ack_masked = true,
+	.mask_invert = false,
+};
+
+static const struct regmap_range pmic_status_range = {
+	.range_min = BD71837_REG_IRQ,
+	.range_max = BD71837_REG_POW_STATE,
+};
+
+static const struct regmap_access_table volatile_regs = {
+	.yes_ranges = &pmic_status_range,
+	.n_yes_ranges = 1,
+};
+
+static const struct regmap_config bd71837_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.volatile_table = &volatile_regs,
+	.max_register = BD71837_MAX_REGISTER - 1,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static int bd71837_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct bd71837 *bd71837;
+	int ret, i;
+	unsigned int val;
+
+	bd71837 = devm_kzalloc(&i2c->dev, sizeof(struct bd71837), GFP_KERNEL);
+
+	if (!bd71837)
+		return -ENOMEM;
+
+	bd71837->chip_irq = i2c->irq;
+
+	if (!bd71837->chip_irq) {
+		dev_err(&i2c->dev, "No IRQ configured\n");
+		return -EINVAL;
+	}
+
+	bd71837->dev = &i2c->dev;
+	dev_set_drvdata(&i2c->dev, bd71837);
+
+	bd71837->regmap = devm_regmap_init_i2c(i2c, &bd71837_regmap_config);
+	if (IS_ERR(bd71837->regmap)) {
+		dev_err(&i2c->dev, "regmap initialization failed\n");
+		return PTR_ERR(bd71837->regmap);
+	}
+
+	ret = regmap_read(bd71837->regmap, BD71837_REG_REV, &val);
+	if (ret) {
+		dev_err(&i2c->dev, "Read BD71837_REG_DEVICE failed\n");
+		return ret;
+	}
+	for (i = 0; i < ARRAY_SIZE(supported_revisions); i++)
+		if (supported_revisions[i] == val)
+			break;
+
+	if (i == ARRAY_SIZE(supported_revisions)) {
+		dev_err(&i2c->dev, "Unsupported chip revision\n");
+		return -ENODEV;
+	}
+
+	ret = devm_regmap_add_irq_chip(&i2c->dev, bd71837->regmap,
+				       bd71837->chip_irq, IRQF_ONESHOT, 0,
+				       &bd71837_irq_chip, &bd71837->irq_data);
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to add irq_chip\n");
+		return ret;
+	}
+
+	/* Configure short press to 10 milliseconds */
+	ret = regmap_update_bits(bd71837->regmap,
+				 BD71837_REG_PWRONCONFIG0,
+				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
+				 BD718XX_PWRBTN_SHORT_PRESS_10MS);
+	if (ret) {
+		dev_err(&i2c->dev,
+			"Failed to configure button short press timeout\n");
+		return ret;
+	}
+
+	/* Configure long press to 10 seconds */
+	ret = regmap_update_bits(bd71837->regmap,
+				 BD71837_REG_PWRONCONFIG1,
+				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
+				 BD718XX_PWRBTN_LONG_PRESS_10S);
+
+	if (ret) {
+		dev_err(&i2c->dev,
+			"Failed to configure button long press timeout\n");
+		return ret;
+	}
+
+	ret = regmap_irq_get_virq(bd71837->irq_data, BD71837_INT_PWRBTN_S);
+
+	if (ret < 0) {
+		dev_err(&i2c->dev, "Failed to get the IRQ\n");
+		return ret;
+	}
+
+	button.irq = ret;
+
+	ret = devm_mfd_add_devices(bd71837->dev, PLATFORM_DEVID_AUTO,
+				   bd71837_mfd_cells,
+				   ARRAY_SIZE(bd71837_mfd_cells), NULL, 0,
+				   regmap_irq_get_domain(bd71837->irq_data));
+	if (ret)
+		dev_err(&i2c->dev, "Failed to create subdevices\n");
+
+	return ret;
+}
+
+static const struct of_device_id bd71837_of_match[] = {
+	{ .compatible = "rohm,bd71837", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, bd71837_of_match);
+
+static struct i2c_driver bd71837_i2c_driver = {
+	.driver = {
+		.name = "rohm-bd718x7",
+		.of_match_table = bd71837_of_match,
+	},
+	.probe = bd71837_i2c_probe,
+};
+
+static int __init bd71837_i2c_init(void)
+{
+	return i2c_add_driver(&bd71837_i2c_driver);
+}
+
+/* Initialise early so consumer devices can complete system boot */
+subsys_initcall(bd71837_i2c_init);
+
+static void __exit bd71837_i2c_exit(void)
+{
+	i2c_del_driver(&bd71837_i2c_driver);
+}
+module_exit(bd71837_i2c_exit);
+
+MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
+MODULE_DESCRIPTION("ROHM BD71837 Power Management IC driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
new file mode 100644
index 000000000000..a528747f8aed
--- /dev/null
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (C) 2018 ROHM Semiconductors */
+
+#ifndef __LINUX_MFD_BD71837_H__
+#define __LINUX_MFD_BD71837_H__
+
+#include <linux/regmap.h>
+
+enum {
+	BD71837_BUCK1	=	0,
+	BD71837_BUCK2,
+	BD71837_BUCK3,
+	BD71837_BUCK4,
+	BD71837_BUCK5,
+	BD71837_BUCK6,
+	BD71837_BUCK7,
+	BD71837_BUCK8,
+	BD71837_LDO1,
+	BD71837_LDO2,
+	BD71837_LDO3,
+	BD71837_LDO4,
+	BD71837_LDO5,
+	BD71837_LDO6,
+	BD71837_LDO7,
+	BD71837_REGULATOR_CNT,
+};
+
+#define BD71837_BUCK1_VOLTAGE_NUM	0x40
+#define BD71837_BUCK2_VOLTAGE_NUM	0x40
+#define BD71837_BUCK3_VOLTAGE_NUM	0x40
+#define BD71837_BUCK4_VOLTAGE_NUM	0x40
+
+#define BD71837_BUCK5_VOLTAGE_NUM	0x08
+#define BD71837_BUCK6_VOLTAGE_NUM	0x04
+#define BD71837_BUCK7_VOLTAGE_NUM	0x08
+#define BD71837_BUCK8_VOLTAGE_NUM	0x40
+
+#define BD71837_LDO1_VOLTAGE_NUM	0x04
+#define BD71837_LDO2_VOLTAGE_NUM	0x02
+#define BD71837_LDO3_VOLTAGE_NUM	0x10
+#define BD71837_LDO4_VOLTAGE_NUM	0x10
+#define BD71837_LDO5_VOLTAGE_NUM	0x10
+#define BD71837_LDO6_VOLTAGE_NUM	0x10
+#define BD71837_LDO7_VOLTAGE_NUM	0x10
+
+enum {
+	BD71837_REG_REV                = 0x00,
+	BD71837_REG_SWRESET            = 0x01,
+	BD71837_REG_I2C_DEV            = 0x02,
+	BD71837_REG_PWRCTRL0           = 0x03,
+	BD71837_REG_PWRCTRL1           = 0x04,
+	BD71837_REG_BUCK1_CTRL         = 0x05,
+	BD71837_REG_BUCK2_CTRL         = 0x06,
+	BD71837_REG_BUCK3_CTRL         = 0x07,
+	BD71837_REG_BUCK4_CTRL         = 0x08,
+	BD71837_REG_BUCK5_CTRL         = 0x09,
+	BD71837_REG_BUCK6_CTRL         = 0x0A,
+	BD71837_REG_BUCK7_CTRL         = 0x0B,
+	BD71837_REG_BUCK8_CTRL         = 0x0C,
+	BD71837_REG_BUCK1_VOLT_RUN     = 0x0D,
+	BD71837_REG_BUCK1_VOLT_IDLE    = 0x0E,
+	BD71837_REG_BUCK1_VOLT_SUSP    = 0x0F,
+	BD71837_REG_BUCK2_VOLT_RUN     = 0x10,
+	BD71837_REG_BUCK2_VOLT_IDLE    = 0x11,
+	BD71837_REG_BUCK3_VOLT_RUN     = 0x12,
+	BD71837_REG_BUCK4_VOLT_RUN     = 0x13,
+	BD71837_REG_BUCK5_VOLT         = 0x14,
+	BD71837_REG_BUCK6_VOLT         = 0x15,
+	BD71837_REG_BUCK7_VOLT         = 0x16,
+	BD71837_REG_BUCK8_VOLT         = 0x17,
+	BD71837_REG_LDO1_VOLT          = 0x18,
+	BD71837_REG_LDO2_VOLT          = 0x19,
+	BD71837_REG_LDO3_VOLT          = 0x1A,
+	BD71837_REG_LDO4_VOLT          = 0x1B,
+	BD71837_REG_LDO5_VOLT          = 0x1C,
+	BD71837_REG_LDO6_VOLT          = 0x1D,
+	BD71837_REG_LDO7_VOLT          = 0x1E,
+	BD71837_REG_TRANS_COND0        = 0x1F,
+	BD71837_REG_TRANS_COND1        = 0x20,
+	BD71837_REG_VRFAULTEN          = 0x21,
+	BD71837_REG_MVRFLTMASK0        = 0x22,
+	BD71837_REG_MVRFLTMASK1        = 0x23,
+	BD71837_REG_MVRFLTMASK2        = 0x24,
+	BD71837_REG_RCVCFG             = 0x25,
+	BD71837_REG_RCVNUM             = 0x26,
+	BD71837_REG_PWRONCONFIG0       = 0x27,
+	BD71837_REG_PWRONCONFIG1       = 0x28,
+	BD71837_REG_RESETSRC           = 0x29,
+	BD71837_REG_MIRQ               = 0x2A,
+	BD71837_REG_IRQ                = 0x2B,
+	BD71837_REG_IN_MON             = 0x2C,
+	BD71837_REG_POW_STATE          = 0x2D,
+	BD71837_REG_OUT32K             = 0x2E,
+	BD71837_REG_REGLOCK            = 0x2F,
+	BD71837_REG_OTPVER             = 0xFF,
+	BD71837_MAX_REGISTER           = 0x100,
+};
+
+#define REGLOCK_PWRSEQ	0x1
+#define REGLOCK_VREG	0x10
+
+/* Generic BUCK control masks */
+#define BD71837_BUCK_SEL	0x02
+#define BD71837_BUCK_EN		0x01
+#define BD71837_BUCK_RUN_ON	0x04
+
+/* Generic LDO masks */
+#define BD71837_LDO_SEL		0x80
+#define BD71837_LDO_EN		0x40
+
+/* BD71837 BUCK ramp rate CTRL reg bits */
+#define BUCK_RAMPRATE_MASK	0xC0
+#define BUCK_RAMPRATE_10P00MV	0x0
+#define BUCK_RAMPRATE_5P00MV	0x1
+#define BUCK_RAMPRATE_2P50MV	0x2
+#define BUCK_RAMPRATE_1P25MV	0x3
+
+/* BD71837_REG_BUCK1_VOLT_RUN bits */
+#define BUCK1_RUN_MASK		0x3F
+#define BUCK1_RUN_DEFAULT	0x14
+
+/* BD71837_REG_BUCK1_VOLT_SUSP bits */
+#define BUCK1_SUSP_MASK		0x3F
+#define BUCK1_SUSP_DEFAULT	0x14
+
+/* BD71837_REG_BUCK1_VOLT_IDLE bits */
+#define BUCK1_IDLE_MASK		0x3F
+#define BUCK1_IDLE_DEFAULT	0x14
+
+/* BD71837_REG_BUCK2_VOLT_RUN bits */
+#define BUCK2_RUN_MASK		0x3F
+#define BUCK2_RUN_DEFAULT	0x1E
+
+/* BD71837_REG_BUCK2_VOLT_IDLE bits */
+#define BUCK2_IDLE_MASK		0x3F
+#define BUCK2_IDLE_DEFAULT	0x14
+
+/* BD71837_REG_BUCK3_VOLT_RUN bits */
+#define BUCK3_RUN_MASK		0x3F
+#define BUCK3_RUN_DEFAULT	0x1E
+
+/* BD71837_REG_BUCK4_VOLT_RUN bits */
+#define BUCK4_RUN_MASK		0x3F
+#define BUCK4_RUN_DEFAULT	0x1E
+
+/* BD71837_REG_BUCK5_VOLT bits */
+#define BUCK5_MASK		0x07
+#define BUCK5_DEFAULT		0x02
+
+/* BD71837_REG_BUCK6_VOLT bits */
+#define BUCK6_MASK		0x03
+#define BUCK6_DEFAULT		0x03
+
+/* BD71837_REG_BUCK7_VOLT bits */
+#define BUCK7_MASK		0x07
+#define BUCK7_DEFAULT		0x03
+
+/* BD71837_REG_BUCK8_VOLT bits */
+#define BUCK8_MASK		0x3F
+#define BUCK8_DEFAULT		0x1E
+
+/* BD71837_REG_IRQ bits */
+#define IRQ_SWRST		0x40
+#define IRQ_PWRON_S		0x20
+#define IRQ_PWRON_L		0x10
+#define IRQ_PWRON		0x08
+#define IRQ_WDOG		0x04
+#define IRQ_ON_REQ		0x02
+#define IRQ_STBY_REQ		0x01
+
+/* BD71837_REG_OUT32K bits */
+#define BD71837_OUT32K_EN	0x01
+
+/* BD71837 gated clock rate */
+#define BD71837_CLK_RATE 32768
+
+/* ROHM BD71837 irqs */
+enum {
+	BD71837_INT_STBY_REQ,
+	BD71837_INT_ON_REQ,
+	BD71837_INT_WDOG,
+	BD71837_INT_PWRBTN,
+	BD71837_INT_PWRBTN_L,
+	BD71837_INT_PWRBTN_S,
+	BD71837_INT_SWRST
+};
+
+/* ROHM BD71837 interrupt masks */
+#define BD71837_INT_SWRST_MASK		0x40
+#define BD71837_INT_PWRBTN_S_MASK	0x20
+#define BD71837_INT_PWRBTN_L_MASK	0x10
+#define BD71837_INT_PWRBTN_MASK		0x8
+#define BD71837_INT_WDOG_MASK		0x4
+#define BD71837_INT_ON_REQ_MASK		0x2
+#define BD71837_INT_STBY_REQ_MASK	0x1
+
+/* BD71837_REG_LDO1_VOLT bits */
+#define LDO1_MASK		0x03
+
+/* BD71837_REG_LDO1_VOLT bits */
+#define LDO2_MASK		0x20
+
+/* BD71837_REG_LDO3_VOLT bits */
+#define LDO3_MASK		0x0F
+
+/* BD71837_REG_LDO4_VOLT bits */
+#define LDO4_MASK		0x0F
+
+/* BD71837_REG_LDO5_VOLT bits */
+#define LDO5_MASK		0x0F
+
+/* BD71837_REG_LDO6_VOLT bits */
+#define LDO6_MASK		0x0F
+
+/* BD71837_REG_LDO7_VOLT bits */
+#define LDO7_MASK		0x0F
+
+/* Register write induced reset settings */
+
+/*
+ * Even though the bit zero is not SWRESET type we still want to write zero
+ * to it when changing type. Bit zero is 'SWRESET' trigger bit and if we
+ * write 1 to it we will trigger the action. So always write 0 to it when
+ * changning SWRESET action - no matter what we read from it.
+ */
+#define BD71837_SWRESET_TYPE_MASK	7
+#define BD71837_SWRESET_TYPE_DISABLED	0
+#define BD71837_SWRESET_TYPE_COLD	4
+#define BD71837_SWRESET_TYPE_WARM	6
+
+#define BD71837_SWRESET_RESET_MASK	1
+#define BD71837_SWRESET_RESET		1
+
+/* Poweroff state transition conditions */
+
+#define BD718XX_ON_REQ_POWEROFF_MASK	1
+#define BD718XX_SWRESET_POWEROFF_MASK	2
+#define BD718XX_WDOG_POWEROFF_MASK	4
+#define BD718XX_KEY_L_POWEROFF_MASK	8
+
+#define BD718XX_POWOFF_TO_SNVS	0
+#define BD718XX_POWOFF_TO_RDY	0xF
+
+#define BD718XX_POWOFF_TIME_MASK 0xF0
+enum {
+	BD718XX_POWOFF_TIME_5MS = 0,
+	BD718XX_POWOFF_TIME_10MS,
+	BD718XX_POWOFF_TIME_15MS,
+	BD718XX_POWOFF_TIME_20MS,
+	BD718XX_POWOFF_TIME_25MS,
+	BD718XX_POWOFF_TIME_30MS,
+	BD718XX_POWOFF_TIME_35MS,
+	BD718XX_POWOFF_TIME_40MS,
+	BD718XX_POWOFF_TIME_45MS,
+	BD718XX_POWOFF_TIME_50MS,
+	BD718XX_POWOFF_TIME_75MS,
+	BD718XX_POWOFF_TIME_100MS,
+	BD718XX_POWOFF_TIME_250MS,
+	BD718XX_POWOFF_TIME_500MS,
+	BD718XX_POWOFF_TIME_750MS,
+	BD718XX_POWOFF_TIME_1500MS
+};
+
+/* Poweron sequence state transition conditions */
+#define BD718XX_RDY_TO_SNVS_MASK 0xF
+#define BD718XX_SNVS_TO_RUN_MASK 0xF0
+
+#define BD718XX_PWR_TRIG_KEY_L		1
+#define BD718XX_PWR_TRIG_KEY_S		2
+#define BD718XX_PWR_TRIG_PMIC_ON	4
+#define BD718XX_PWR_TRIG_VSYS_UVLO	8
+#define BD718XX_RDY_TO_SNVS_SIFT	0
+#define BD718XX_SNVS_TO_RUN_SIFT	4
+
+#define BD718XX_PWRBTN_PRESS_DURATION_MASK 0xF
+
+/* Timeout value for detecting short press */
+enum {
+	BD718XX_PWRBTN_SHORT_PRESS_10MS = 0,
+	BD718XX_PWRBTN_SHORT_PRESS_500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_1000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_1500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_2000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_2500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_3000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_3500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_4000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_4500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_5000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_5500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_6000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_6500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_7000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_7500MS
+};
+
+/* Timeout value for detecting LONG press */
+enum {
+	BD718XX_PWRBTN_LONG_PRESS_10MS = 0,
+	BD718XX_PWRBTN_LONG_PRESS_1S,
+	BD718XX_PWRBTN_LONG_PRESS_2S,
+	BD718XX_PWRBTN_LONG_PRESS_3S,
+	BD718XX_PWRBTN_LONG_PRESS_4S,
+	BD718XX_PWRBTN_LONG_PRESS_5S,
+	BD718XX_PWRBTN_LONG_PRESS_6S,
+	BD718XX_PWRBTN_LONG_PRESS_7S,
+	BD718XX_PWRBTN_LONG_PRESS_8S,
+	BD718XX_PWRBTN_LONG_PRESS_9S,
+	BD718XX_PWRBTN_LONG_PRESS_10S,
+	BD718XX_PWRBTN_LONG_PRESS_11S,
+	BD718XX_PWRBTN_LONG_PRESS_12S,
+	BD718XX_PWRBTN_LONG_PRESS_13S,
+	BD718XX_PWRBTN_LONG_PRESS_14S,
+	BD718XX_PWRBTN_LONG_PRESS_15S
+};
+
+struct bd71837_pmic;
+struct bd71837_clk;
+
+struct bd71837 {
+	struct device *dev;
+	struct regmap *regmap;
+	unsigned long int id;
+
+	int chip_irq;
+	struct regmap_irq_chip_data *irq_data;
+
+	struct bd71837_pmic *pmic;
+	struct bd71837_clk *clk;
+};
+
+#endif /* __LINUX_MFD_BD71837_H__ */
-- 
cgit v1.2.3


From d2c9281c184bf2b768ac141a7a10586e0643695d Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 7 Aug 2018 10:08:12 +0100
Subject: mfd: madera: Add register definitions for accessory detect

Add some register definitions for accessory detection, used
by the extcon driver.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/madera/registers.h | 51 ++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/madera/registers.h b/include/linux/mfd/madera/registers.h
index a6b7c4222c5e..977e06101711 100644
--- a/include/linux/mfd/madera/registers.h
+++ b/include/linux/mfd/madera/registers.h
@@ -1219,9 +1219,11 @@
 #define MADERA_IRQ1_STATUS_33				0x1820
 #define MADERA_IRQ1_MASK_1				0x1840
 #define MADERA_IRQ1_MASK_2				0x1841
+#define MADERA_IRQ1_MASK_6				0x1845
 #define MADERA_IRQ1_MASK_33				0x1860
 #define MADERA_IRQ1_RAW_STATUS_1			0x1880
 #define MADERA_IRQ1_RAW_STATUS_2			0x1881
+#define MADERA_IRQ1_RAW_STATUS_7			0x1886
 #define MADERA_IRQ1_RAW_STATUS_15			0x188E
 #define MADERA_IRQ1_RAW_STATUS_33			0x18A0
 #define MADERA_INTERRUPT_DEBOUNCE_7			0x1A06
@@ -1664,6 +1666,42 @@
 #define MADERA_MICB2A_ENA_SHIFT				     0
 #define MADERA_MICB2A_ENA_WIDTH				     1
 
+/* (0x0225) - HP Ctrl 1L */
+#define MADERA_RMV_SHRT_HP1L				0x4000
+#define MADERA_RMV_SHRT_HP1L_MASK			0x4000
+#define MADERA_RMV_SHRT_HP1L_SHIFT			    14
+#define MADERA_RMV_SHRT_HP1L_WIDTH			     1
+#define MADERA_HP1L_FLWR				0x0004
+#define MADERA_HP1L_FLWR_MASK				0x0004
+#define MADERA_HP1L_FLWR_SHIFT				     2
+#define MADERA_HP1L_FLWR_WIDTH				     1
+#define MADERA_HP1L_SHRTI				0x0002
+#define MADERA_HP1L_SHRTI_MASK				0x0002
+#define MADERA_HP1L_SHRTI_SHIFT				     1
+#define MADERA_HP1L_SHRTI_WIDTH				     1
+#define MADERA_HP1L_SHRTO				0x0001
+#define MADERA_HP1L_SHRTO_MASK				0x0001
+#define MADERA_HP1L_SHRTO_SHIFT				     0
+#define MADERA_HP1L_SHRTO_WIDTH				     1
+
+/* (0x0226) - HP Ctrl 1R */
+#define MADERA_RMV_SHRT_HP1R				0x4000
+#define MADERA_RMV_SHRT_HP1R_MASK			0x4000
+#define MADERA_RMV_SHRT_HP1R_SHIFT			    14
+#define MADERA_RMV_SHRT_HP1R_WIDTH			     1
+#define MADERA_HP1R_FLWR				0x0004
+#define MADERA_HP1R_FLWR_MASK				0x0004
+#define MADERA_HP1R_FLWR_SHIFT				     2
+#define MADERA_HP1R_FLWR_WIDTH				     1
+#define MADERA_HP1R_SHRTI				0x0002
+#define MADERA_HP1R_SHRTI_MASK				0x0002
+#define MADERA_HP1R_SHRTI_SHIFT				     1
+#define MADERA_HP1R_SHRTI_WIDTH				     1
+#define MADERA_HP1R_SHRTO				0x0001
+#define MADERA_HP1R_SHRTO_MASK				0x0001
+#define MADERA_HP1R_SHRTO_SHIFT				     0
+#define MADERA_HP1R_SHRTO_WIDTH				     1
+
 /* (0x0293)  Accessory_Detect_Mode_1 */
 #define MADERA_ACCDET_SRC				0x2000
 #define MADERA_ACCDET_SRC_MASK				0x2000
@@ -3766,6 +3804,19 @@
 #define MADERA_DSP1_BUS_ERR_EINT1_SHIFT			     0
 #define MADERA_DSP1_BUS_ERR_EINT1_WIDTH			     1
 
+/* (0x1845)  IRQ1_Mask_6 */
+#define MADERA_IM_MICDET2_EINT1				0x0200
+#define MADERA_IM_MICDET2_EINT1_MASK			0x0200
+#define MADERA_IM_MICDET2_EINT1_SHIFT			     9
+#define MADERA_IM_MICDET2_EINT1_WIDTH			     1
+#define MADERA_IM_MICDET1_EINT1				0x0100
+#define MADERA_IM_MICDET1_EINT1_MASK			0x0100
+#define MADERA_IM_MICDET1_EINT1_SHIFT			     8
+#define MADERA_IM_MICDET1_EINT1_WIDTH			     1
+#define MADERA_IM_HPDET_EINT1				0x0001
+#define MADERA_IM_HPDET_EINT1_MASK			0x0001
+#define MADERA_IM_HPDET_EINT1_SHIFT			     0
+#define MADERA_IM_HPDET_EINT1_WIDTH			     1
 /* (0x184E)  IRQ1_Mask_15 */
 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1		0x0004
 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_MASK		0x0004
-- 
cgit v1.2.3


From 817aef260037f33ee0f44c17fe341323d3aebd6d Mon Sep 17 00:00:00 2001
From: Yannik Sembritzki <yannik@sembritzki.me>
Date: Thu, 16 Aug 2018 14:05:10 +0100
Subject: Replace magic for trusting the secondary keyring with #define

Replace the use of a magic number that indicates that verify_*_signature()
should use the secondary keyring with a symbol.

Signed-off-by: Yannik Sembritzki <yannik@sembritzki.me>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: keyrings@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 certs/system_keyring.c                  | 3 ++-
 crypto/asymmetric_keys/pkcs7_key_type.c | 2 +-
 include/linux/verification.h            | 6 ++++++
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 6251d1b27f0c..81728717523d 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -15,6 +15,7 @@
 #include <linux/cred.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/verification.h>
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
 #include <crypto/pkcs7.h>
@@ -230,7 +231,7 @@ int verify_pkcs7_signature(const void *data, size_t len,
 
 	if (!trusted_keys) {
 		trusted_keys = builtin_trusted_keys;
-	} else if (trusted_keys == (void *)1UL) {
+	} else if (trusted_keys == VERIFY_USE_SECONDARY_KEYRING) {
 #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
 		trusted_keys = secondary_trusted_keys;
 #else
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index e284d9cb9237..5b2f6a2b5585 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -63,7 +63,7 @@ static int pkcs7_preparse(struct key_preparsed_payload *prep)
 
 	return verify_pkcs7_signature(NULL, 0,
 				      prep->data, prep->datalen,
-				      (void *)1UL, usage,
+				      VERIFY_USE_SECONDARY_KEYRING, usage,
 				      pkcs7_view_content, prep);
 }
 
diff --git a/include/linux/verification.h b/include/linux/verification.h
index a10549a6c7cd..cfa4730d607a 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -12,6 +12,12 @@
 #ifndef _LINUX_VERIFICATION_H
 #define _LINUX_VERIFICATION_H
 
+/*
+ * Indicate that both builtin trusted keys and secondary trusted keys
+ * should be used.
+ */
+#define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL)
+
 /*
  * The use to which an asymmetric key is being put.
  */
-- 
cgit v1.2.3


From 14d32b2525dd3a21c29b0d9edf1748f5dabc1bbc Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Tue, 31 Jul 2018 18:03:32 +0200
Subject: jiffies: add utility function to calculate delta in ms

add jiffies_delta_to_msecs() helper func to calculate the delta between
two times and eventually 0 if negative.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/jiffies.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index a27cf6652327..fa928242567d 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -447,6 +447,11 @@ static inline clock_t jiffies_delta_to_clock_t(long delta)
 	return jiffies_to_clock_t(max(0L, delta));
 }
 
+static inline unsigned int jiffies_delta_to_msecs(long delta)
+{
+	return jiffies_to_msecs(max(0L, delta));
+}
+
 extern unsigned long clock_t_to_jiffies(unsigned long x);
 extern u64 jiffies_64_to_clock_t(u64 x);
 extern u64 nsec_to_clock_t(u64 x);
-- 
cgit v1.2.3


From ff93bca769925a2d8fd7f910cdf543d992e17f07 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 14 Aug 2018 15:21:31 -0700
Subject: ila: make lockdep happy again

Previously, alloc_ila_locks() and bucket_table_alloc() call
spin_lock_init() separately, therefore they have two different
lock names and lock class keys. However, after commit b893281715ab
("ila: Call library function alloc_bucket_locks") they both call
helper alloc_bucket_spinlocks() which now only has one lock
name and lock class key. This causes a few bogus lockdep warnings
as reported by syzbot.

Fix this by making alloc_bucket_locks() a macro and pass declaration
name as lock name and a static lock class key inside the macro.

Fixes: b893281715ab ("ila: Call library function alloc_bucket_locks")
Reported-by: <syzbot+b66a5a554991a8ed027c@syzkaller.appspotmail.com>
Cc: Tom Herbert <tom@quantonium.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/spinlock.h | 17 ++++++++++++++---
 lib/bucket_locks.c       | 11 +++++++----
 2 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3190997df9ca..e089157dcf97 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -451,9 +451,20 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
 #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
 		__cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
 
-int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
-			   size_t max_size, unsigned int cpu_mult,
-			   gfp_t gfp);
+int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
+			     size_t max_size, unsigned int cpu_mult,
+			     gfp_t gfp, const char *name,
+			     struct lock_class_key *key);
+
+#define alloc_bucket_spinlocks(locks, lock_mask, max_size, cpu_mult, gfp)    \
+	({								     \
+		static struct lock_class_key key;			     \
+		int ret;						     \
+									     \
+		ret = __alloc_bucket_spinlocks(locks, lock_mask, max_size,   \
+					       cpu_mult, gfp, #locks, &key); \
+		ret;							     \
+	})
 
 void free_bucket_spinlocks(spinlock_t *locks);
 
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
index ade3ce6c4af6..64b92e1dbace 100644
--- a/lib/bucket_locks.c
+++ b/lib/bucket_locks.c
@@ -11,8 +11,9 @@
  * to a power of 2 to be suitable as a hash table.
  */
 
-int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
-			   size_t max_size, unsigned int cpu_mult, gfp_t gfp)
+int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
+			     size_t max_size, unsigned int cpu_mult, gfp_t gfp,
+			     const char *name, struct lock_class_key *key)
 {
 	spinlock_t *tlocks = NULL;
 	unsigned int i, size;
@@ -33,8 +34,10 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
 		tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp);
 		if (!tlocks)
 			return -ENOMEM;
-		for (i = 0; i < size; i++)
+		for (i = 0; i < size; i++) {
 			spin_lock_init(&tlocks[i]);
+			lockdep_init_map(&tlocks[i].dep_map, name, key, 0);
+		}
 	}
 
 	*locks = tlocks;
@@ -42,7 +45,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
 
 	return 0;
 }
-EXPORT_SYMBOL(alloc_bucket_spinlocks);
+EXPORT_SYMBOL(__alloc_bucket_spinlocks);
 
 void free_bucket_spinlocks(spinlock_t *locks)
 {
-- 
cgit v1.2.3


From f6069b9aa9934ede26f41ac0781fce241279ad43 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 17 Aug 2018 23:26:14 +0200
Subject: bpf: fix redirect to map under tail calls

Commits 109980b894e9 ("bpf: don't select potentially stale ri->map
from buggy xdp progs") and 7c3001313396 ("bpf: fix ri->map_owner
pointer on bpf_prog_realloc") tried to mitigate that buggy programs
using bpf_redirect_map() helper call do not leave stale maps behind.
Idea was to add a map_owner cookie into the per CPU struct redirect_info
which was set to prog->aux by the prog making the helper call as a
proof that the map is not stale since the prog is implicitly holding
a reference to it. This owner cookie could later on get compared with
the program calling into BPF whether they match and therefore the
redirect could proceed with processing the map safely.

In (obvious) hindsight, this approach breaks down when tail calls are
involved since the original caller's prog->aux pointer does not have
to match the one from one of the progs out of the tail call chain,
and therefore the xdp buffer will be dropped instead of redirected.
A way around that would be to fix the issue differently (which also
allows to remove related work in fast path at the same time): once
the life-time of a redirect map has come to its end we use it's map
free callback where we need to wait on synchronize_rcu() for current
outstanding xdp buffers and remove such a map pointer from the
redirect info if found to be present. At that time no program is
using this map anymore so we simply invalidate the map pointers to
NULL iff they previously pointed to that instance while making sure
that the redirect path only reads out the map once.

Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine")
Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs")
Reported-by: Sebastiano Miano <sebastiano.miano@polito.it>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h     |  3 +-
 include/trace/events/xdp.h |  5 ++--
 kernel/bpf/cpumap.c        |  2 ++
 kernel/bpf/devmap.c        |  1 +
 kernel/bpf/verifier.c      | 21 --------------
 kernel/bpf/xskmap.c        |  1 +
 net/core/filter.c          | 68 ++++++++++++++++++++--------------------------
 7 files changed, 38 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5d565c50bcb2..6791a0ac0139 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,7 +543,6 @@ struct bpf_redirect_info {
 	u32 flags;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
-	unsigned long   map_owner;
 	u32 kern_flags;
 };
 
@@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
+void bpf_clear_redirect_map(struct bpf_map *map);
+
 static inline bool xdp_return_frame_no_direct(void)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 1ecf4c67fcf7..e95cb86b65cf 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -147,9 +147,8 @@ struct _bpf_dtab_netdev {
 
 #define devmap_ifindex(fwd, map)				\
 	(!fwd ? 0 :						\
-	 (!map ? 0 :						\
-	  ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
-	   ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
+	 ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
+	  ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))
 
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
 	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 620bc5024d7d..24aac0d0f412 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map)
 	 * It does __not__ ensure pending flush operations (if any) are
 	 * complete.
 	 */
+
+	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ac1df79f3788..141710b82a6c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map)
 	list_del_rcu(&dtab->list);
 	spin_unlock(&dev_map_lock);
 
+	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ca90679a7fe5..92246117d2b0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			goto patch_call_imm;
 		}
 
-		if (insn->imm == BPF_FUNC_redirect_map) {
-			/* Note, we cannot use prog directly as imm as subsequent
-			 * rewrites would still change the prog pointer. The only
-			 * stable address we can use is aux, which also works with
-			 * prog clones during blinding.
-			 */
-			u64 addr = (unsigned long)prog->aux;
-			struct bpf_insn r4_ld[] = {
-				BPF_LD_IMM64(BPF_REG_4, addr),
-				*insn,
-			};
-			cnt = ARRAY_SIZE(r4_ld);
-
-			new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-		}
 patch_call_imm:
 		fn = env->ops->get_func_proto(insn->imm, env->prog);
 		/* all functions that have prototype and verifier allowed
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 4ddf61e158f6..9f8463afda9c 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map)
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 	int i;
 
+	bpf_clear_redirect_map(map);
 	synchronize_net();
 
 	for (i = 0; i < map->max_entries; i++) {
diff --git a/net/core/filter.c b/net/core/filter.c
index fd423ce3da34..c25eb36f1320 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 	}
 }
 
-static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
-				   unsigned long aux)
+void bpf_clear_redirect_map(struct bpf_map *map)
 {
-	return (unsigned long)xdp_prog->aux != aux;
+	struct bpf_redirect_info *ri;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+		/* Avoid polluting remote cacheline due to writes if
+		 * not needed. Once we pass this test, we need the
+		 * cmpxchg() to make sure it hasn't been changed in
+		 * the meantime by remote CPU.
+		 */
+		if (unlikely(READ_ONCE(ri->map) == map))
+			cmpxchg(&ri->map, map, NULL);
+	}
 }
 
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
-			       struct bpf_prog *xdp_prog)
+			       struct bpf_prog *xdp_prog, struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	unsigned long map_owner = ri->map_owner;
-	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
 	void *fwd = NULL;
 	int err;
 
 	ri->ifindex = 0;
-	ri->map = NULL;
-	ri->map_owner = 0;
-
-	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
-		err = -EFAULT;
-		map = NULL;
-		goto err;
-	}
+	WRITE_ONCE(ri->map, NULL);
 
 	fwd = __xdp_map_lookup_elem(map, index);
 	if (!fwd) {
@@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_map *map = READ_ONCE(ri->map);
 	struct net_device *fwd;
 	u32 index = ri->ifindex;
 	int err;
 
-	if (ri->map)
-		return xdp_do_redirect_map(dev, xdp, xdp_prog);
+	if (map)
+		return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
 
 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
 	ri->ifindex = 0;
@@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
 static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
-				       struct bpf_prog *xdp_prog)
+				       struct bpf_prog *xdp_prog,
+				       struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	unsigned long map_owner = ri->map_owner;
-	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
 	void *fwd = NULL;
 	int err = 0;
 
 	ri->ifindex = 0;
-	ri->map = NULL;
-	ri->map_owner = 0;
+	WRITE_ONCE(ri->map, NULL);
 
-	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
-		err = -EFAULT;
-		map = NULL;
-		goto err;
-	}
 	fwd = __xdp_map_lookup_elem(map, index);
 	if (unlikely(!fwd)) {
 		err = -EINVAL;
@@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_map *map = READ_ONCE(ri->map);
 	u32 index = ri->ifindex;
 	struct net_device *fwd;
 	int err = 0;
 
-	if (ri->map)
-		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog);
-
+	if (map)
+		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
+						   map);
 	ri->ifindex = 0;
 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
 	if (unlikely(!fwd)) {
@@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
-	ri->map = NULL;
-	ri->map_owner = 0;
+	WRITE_ONCE(ri->map, NULL);
 
 	return XDP_REDIRECT;
 }
@@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
-	   unsigned long, map_owner)
+BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
+	   u64, flags)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
@@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
-	ri->map = map;
-	ri->map_owner = map_owner;
+	WRITE_ONCE(ri->map, map);
 
 	return XDP_REDIRECT;
 }
 
-/* Note, arg4 is hidden from users and populated by the verifier
- * with the right pointer.
- */
 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.func           = bpf_xdp_redirect_map,
 	.gpl_only       = false,
-- 
cgit v1.2.3


From e36488c83b6d871b35dd555afb2d434bd61687cf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Aug 2018 15:43:36 -0700
Subject: bitfield: avoid gcc-8 -Wint-in-bool-context warning

Passing an enum into FIELD_GET() produces a long but harmless warning on
newer compilers:

                   from include/linux/linkage.h:7,
                   from include/linux/kernel.h:7,
                   from include/linux/skbuff.h:17,
                   from include/linux/if_ether.h:23,
                   from include/linux/etherdevice.h:25,
                   from drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:63:
  drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c: In function 'iwl_mvm_rx_mpdu_mq':
  include/linux/bitfield.h:56:20: error: enum constant in boolean context [-Werror=int-in-bool-context]
     BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \
                      ^
  ...
  include/linux/bitfield.h:103:3: note: in expansion of macro '__BF_FIELD_CHECK'
     __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
     ^~~~~~~~~~~~~~~~
  drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:1025:21: note: in expansion of macro 'FIELD_GET'
      le16_encode_bits(FIELD_GET(IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK,

The problem here is that the caller has no idea how the macro gets
expanding, leading to a false-positive.  It can be trivially avoided by
doing a comparison against zero.

This only recently started appearing as the iwlwifi driver was patched
to use FIELD_GET.

Link: http://lkml.kernel.org/r/20180813220950.194841-1-arnd@arndb.de
Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitfield.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 65a6981eef7b..3f1ef4450a7c 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -53,7 +53,7 @@
 	({								\
 		BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),		\
 				 _pfx "mask is not constant");		\
-		BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero");	\
+		BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");	\
 		BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?		\
 				 ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
 				 _pfx "value too large for the field"); \
-- 
cgit v1.2.3


From 4cdfffc8722e99be8d400d8fa1fcd615d078ad43 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 17 Aug 2018 15:44:37 -0700
Subject: vfs: discard ATTR_ATTR_FLAG

This flag was introduce in 2.1.37pre1 and the only place it was tested
was removed in 2.1.43pre1.  The flag was never set.

Let's discard it properly.

Link: http://lkml.kernel.org/r/877en0hewz.fsf@notabene.neil.brown.name
Signed-off-by: NeilBrown <neilb@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hostfs/hostfs.h | 2 +-
 include/linux/fs.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index cb8374af08a6..33b8423ef0c9 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -19,7 +19,7 @@
 #define HOSTFS_ATTR_ATIME_SET	128
 #define HOSTFS_ATTR_MTIME_SET	256
 
-/* These two are unused by hostfs. */
+/* This one is unused by hostfs. */
 #define HOSTFS_ATTR_FORCE	512	/* Not a change, but a change it */
 #define HOSTFS_ATTR_ATTR_FLAG	1024
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ec33fd0423f..9d319f1f66f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -179,7 +179,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define ATTR_ATIME_SET	(1 << 7)
 #define ATTR_MTIME_SET	(1 << 8)
 #define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG	(1 << 10)
 #define ATTR_KILL_SUID	(1 << 11)
 #define ATTR_KILL_SGID	(1 << 12)
 #define ATTR_FILE	(1 << 13)
-- 
cgit v1.2.3


From b3a2369692fedcc439dfaa4f215cf5323a886c88 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 17 Aug 2018 15:45:12 -0700
Subject: include/linux/page_ext.h: drop definition of unused
 PAGE_EXT_DEBUG_POISON

After commit bd33ef368135 ("mm: enable page poisoning early at boot")
PAGE_EXT_DEBUG_POISON is not longer used.  Remove it.

Link: http://lkml.kernel.org/r/20180531135457.20167-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ext.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index ca5461efae2f..bbec618a614b 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -16,18 +16,7 @@ struct page_ext_operations {
 
 #ifdef CONFIG_PAGE_EXTENSION
 
-/*
- * page_ext->flags bits:
- *
- * PAGE_EXT_DEBUG_POISON is set for poisoned pages. This is used to
- * implement generic debug pagealloc feature. The pages are filled with
- * poison patterns and set this flag after free_pages(). The poisoned
- * pages are verified whether the patterns are not corrupted and clear
- * the flag before alloc_pages().
- */
-
 enum page_ext_flags {
-	PAGE_EXT_DEBUG_POISON,		/* Page is poisoned */
 	PAGE_EXT_DEBUG_GUARD,
 	PAGE_EXT_OWNER,
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
-- 
cgit v1.2.3


From 10ed63415223b16d7e80ba528556500231814232 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 17 Aug 2018 15:45:15 -0700
Subject: mm/page_ext.c: constify lookup_page_ext() argument

lookup_page_ext() finds 'struct page_ext' for a given page.  It requires
only read access to the given struct page.

Current implemnentation takes 'struct page *' as an argument.  It makes
compiler complain when 'const struct page *' passed.

Change the argument to 'const struct page *'.

Link: http://lkml.kernel.org/r/20180531135457.20167-3-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ext.h | 4 ++--
 mm/page_ext.c            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index bbec618a614b..f84f167ec04c 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -50,7 +50,7 @@ static inline void page_ext_init(void)
 }
 #endif
 
-struct page_ext *lookup_page_ext(struct page *page);
+struct page_ext *lookup_page_ext(const struct page *page);
 
 #else /* !CONFIG_PAGE_EXTENSION */
 struct page_ext;
@@ -59,7 +59,7 @@ static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
 {
 }
 
-static inline struct page_ext *lookup_page_ext(struct page *page)
+static inline struct page_ext *lookup_page_ext(const struct page *page)
 {
 	return NULL;
 }
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 5295ef331165..a9826da84ccb 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -120,7 +120,7 @@ void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
 	pgdat->node_page_ext = NULL;
 }
 
-struct page_ext *lookup_page_ext(struct page *page)
+struct page_ext *lookup_page_ext(const struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
 	unsigned long index;
@@ -195,7 +195,7 @@ fail:
 
 #else /* CONFIG_FLAT_NODE_MEM_MAP */
 
-struct page_ext *lookup_page_ext(struct page *page)
+struct page_ext *lookup_page_ext(const struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
 	struct mem_section *section = __pfn_to_section(pfn);
-- 
cgit v1.2.3


From 74c8164e1cdb1eb22f1d49d54e515e81821a8ad0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 17 Aug 2018 15:45:36 -0700
Subject: mpage: mpage_readpages() should submit IO as read-ahead

a_ops->readpages() is only ever used for read-ahead, yet we don't flag
the IO being submitted as such.  Fix that up.  Any file system that uses
mpage_readpages() as its ->readpages() implementation will now get this
right.

Since we're passing in whether the IO is read-ahead or not, we don't
need to pass in the 'gfp' separately, as it is dependent on the IO being
read-ahead.  Kill off that member.

Add some documentation notes on ->readpages() being purely for
read-ahead.

Link: http://lkml.kernel.org/r/20180621010725.17813-3-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Chris Mason <clm@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/f2fs/data.c     |  5 +++++
 fs/mpage.c         | 29 +++++++++++++++++++----------
 include/linux/fs.h |  4 ++++
 3 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f931d699287..b7c9b58acf3e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1421,6 +1421,11 @@ out:
 /*
  * This function was originally taken from fs/mpage.c, and customized for f2fs.
  * Major change was from block_size == page_size in f2fs by default.
+ *
+ * Note that the aops->readpages() function is ONLY used for read-ahead. If
+ * this function ever deviates from doing just read-ahead, it should either
+ * use ->readpage() or do the necessary surgery to decouple ->readpages()
+ * readom read-ahead.
  */
 static int f2fs_mpage_readpages(struct address_space *mapping,
 			struct list_head *pages, struct page *page,
diff --git a/fs/mpage.c b/fs/mpage.c
index 6dc90e456abf..c820dc9bebab 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -137,11 +137,11 @@ struct mpage_readpage_args {
 	struct bio *bio;
 	struct page *page;
 	unsigned int nr_pages;
+	bool is_readahead;
 	sector_t last_block_in_bio;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block;
 	get_block_t *get_block;
-	gfp_t gfp;
 };
 
 /*
@@ -170,8 +170,18 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	struct block_device *bdev = NULL;
 	int length;
 	int fully_mapped = 1;
+	int op_flags;
 	unsigned nblocks;
 	unsigned relative_block;
+	gfp_t gfp;
+
+	if (args->is_readahead) {
+		op_flags = REQ_RAHEAD;
+		gfp = readahead_gfp_mask(page->mapping);
+	} else {
+		op_flags = 0;
+		gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+	}
 
 	if (page_has_buffers(page))
 		goto confused;
@@ -284,7 +294,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 
 alloc_new:
 	if (args->bio == NULL) {
@@ -296,14 +306,14 @@ alloc_new:
 		args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 					min_t(int, args->nr_pages,
 					      BIO_MAX_PAGES),
-					args->gfp);
+					gfp);
 		if (args->bio == NULL)
 			goto confused;
 	}
 
 	length = first_hole << blkbits;
 	if (bio_add_page(args->bio, page, length, 0) < length) {
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 		goto alloc_new;
 	}
 
@@ -311,7 +321,7 @@ alloc_new:
 	nblocks = map_bh->b_size >> blkbits;
 	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
 	    (first_hole != blocks_per_page))
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 	else
 		args->last_block_in_bio = blocks[blocks_per_page - 1];
 out:
@@ -319,7 +329,7 @@ out:
 
 confused:
 	if (args->bio)
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 	if (!PageUptodate(page))
 		block_read_full_page(page, args->get_block);
 	else
@@ -377,7 +387,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 {
 	struct mpage_readpage_args args = {
 		.get_block = get_block,
-		.gfp = readahead_gfp_mask(mapping),
+		.is_readahead = true,
 	};
 	unsigned page_idx;
 
@@ -388,7 +398,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping,
 					page->index,
-					args.gfp)) {
+					readahead_gfp_mask(mapping))) {
 			args.page = page;
 			args.nr_pages = nr_pages - page_idx;
 			args.bio = do_mpage_readpage(&args);
@@ -397,7 +407,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	}
 	BUG_ON(!list_empty(pages));
 	if (args.bio)
-		mpage_bio_submit(REQ_OP_READ, 0, args.bio);
+		mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpages);
@@ -411,7 +421,6 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 		.page = page,
 		.nr_pages = 1,
 		.get_block = get_block,
-		.gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL),
 	};
 
 	args.bio = do_mpage_readpage(&args);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d319f1f66f6..a9242f336f02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -344,6 +344,10 @@ struct address_space_operations {
 	/* Set a page dirty.  Return true if this dirtied it */
 	int (*set_page_dirty)(struct page *page);
 
+	/*
+	 * Reads in the requested pages. Unlike ->readpage(), this is
+	 * PURELY used for read-ahead!.
+	 */
 	int (*readpages)(struct file *filp, struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages);
 
-- 
cgit v1.2.3


From c9f4cd71383576a916e7fca99c490fc92a289f5a Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 17 Aug 2018 15:45:49 -0700
Subject: mm, huge page: copy target sub-page last when copy huge page

Huge page helps to reduce TLB miss rate, but it has higher cache
footprint, sometimes this may cause some issue.  For example, when
copying huge page on x86_64 platform, the cache footprint is 4M.  But on
a Xeon E5 v3 2699 CPU, there are 18 cores, 36 threads, and only 45M LLC
(last level cache).  That is, in average, there are 2.5M LLC for each
core and 1.25M LLC for each thread.

If the cache contention is heavy when copying the huge page, and we copy
the huge page from the begin to the end, it is possible that the begin
of huge page is evicted from the cache after we finishing copying the
end of the huge page.  And it is possible for the application to access
the begin of the huge page after copying the huge page.

In c79b57e462b5d ("mm: hugetlb: clear target sub-page last when clearing
huge page"), to keep the cache lines of the target subpage hot, the
order to clear the subpages in the huge page in clear_huge_page() is
changed to clearing the subpage which is furthest from the target
subpage firstly, and the target subpage last.  The similar order
changing helps huge page copying too.  That is implemented in this
patch.  Because we have put the order algorithm into a separate
function, the implementation is quite simple.

The patch is a generic optimization which should benefit quite some
workloads, not for a specific use case.  To demonstrate the performance
benefit of the patch, we tested it with vm-scalability run on
transparent huge page.

With this patch, the throughput increases ~16.6% in vm-scalability
anon-cow-seq test case with 36 processes on a 2 socket Xeon E5 v3 2699
system (36 cores, 72 threads).  The test case set
/sys/kernel/mm/transparent_hugepage/enabled to be always, mmap() a big
anonymous memory area and populate it, then forked 36 child processes,
each writes to the anonymous memory area from the begin to the end, so
cause copy on write.  For each child process, other child processes
could be seen as other workloads which generate heavy cache pressure.
At the same time, the IPC (instruction per cycle) increased from 0.63 to
0.78, and the time spent in user space is reduced ~7.2%.

Link: http://lkml.kernel.org/r/20180524005851.4079-3-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Shaohua Li <shli@fb.com>
Cc: Christopher Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  3 ++-
 mm/huge_memory.c   |  3 ++-
 mm/memory.c        | 30 +++++++++++++++++++++++-------
 3 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 68a5121694ef..2fb32d1561eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2752,7 +2752,8 @@ extern void clear_huge_page(struct page *page,
 			    unsigned long addr_hint,
 			    unsigned int pages_per_huge_page);
 extern void copy_user_huge_page(struct page *dst, struct page *src,
-				unsigned long addr, struct vm_area_struct *vma,
+				unsigned long addr_hint,
+				struct vm_area_struct *vma,
 				unsigned int pages_per_huge_page);
 extern long copy_huge_page_from_user(struct page *dst_page,
 				const void __user *usr_src,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 064a9d78879d..78427af91de9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1328,7 +1328,8 @@ alloc:
 	if (!page)
 		clear_huge_page(new_page, vmf->address, HPAGE_PMD_NR);
 	else
-		copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
+		copy_user_huge_page(new_page, page, vmf->address,
+				    vma, HPAGE_PMD_NR);
 	__SetPageUptodate(new_page);
 
 	mmun_start = haddr;
diff --git a/mm/memory.c b/mm/memory.c
index 65bb59e031c9..175f344e1523 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4705,11 +4705,31 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
 	}
 }
 
+struct copy_subpage_arg {
+	struct page *dst;
+	struct page *src;
+	struct vm_area_struct *vma;
+};
+
+static void copy_subpage(unsigned long addr, int idx, void *arg)
+{
+	struct copy_subpage_arg *copy_arg = arg;
+
+	copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx,
+			   addr, copy_arg->vma);
+}
+
 void copy_user_huge_page(struct page *dst, struct page *src,
-			 unsigned long addr, struct vm_area_struct *vma,
+			 unsigned long addr_hint, struct vm_area_struct *vma,
 			 unsigned int pages_per_huge_page)
 {
-	int i;
+	unsigned long addr = addr_hint &
+		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
+	struct copy_subpage_arg arg = {
+		.dst = dst,
+		.src = src,
+		.vma = vma,
+	};
 
 	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
 		copy_user_gigantic_page(dst, src, addr, vma,
@@ -4717,11 +4737,7 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 		return;
 	}
 
-	might_sleep();
-	for (i = 0; i < pages_per_huge_page; i++) {
-		cond_resched();
-		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
-	}
+	process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
 }
 
 long copy_huge_page_from_user(struct page *dst_page,
-- 
cgit v1.2.3


From 4fbce633910ed80b135b84160a22b219080c8082 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Fri, 17 Aug 2018 15:46:22 -0700
Subject: mm/memory_hotplug.c: make register_mem_sect_under_node() a callback
 of walk_memory_range()

link_mem_sections() and walk_memory_range() share most of the code, so
we can use convert link_mem_sections() into a dummy function that calls
walk_memory_range() with a callback to register_mem_sect_under_node().

This patch converts register_mem_sect_under_node() in order to match a
walk_memory_range's callback, getting rid of the check_nid argument and
checking instead if the system is still boothing, since we only have to
check for the nid if the system is in such state.

Link: http://lkml.kernel.org/r/20180622111839.10071-4-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Suggested-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  | 44 ++++++--------------------------------------
 include/linux/node.h | 12 +++++++-----
 mm/memory_hotplug.c  |  5 +----
 3 files changed, 14 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index a5e821d09656..845d5523812b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -399,10 +399,9 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 }
 
 /* register memory section under specified node if it spans that node */
-int register_mem_sect_under_node(struct memory_block *mem_blk, int nid,
-				 bool check_nid)
+int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
 {
-	int ret;
+	int ret, nid = *(int *)arg;
 	unsigned long pfn, sect_start_pfn, sect_end_pfn;
 
 	if (!mem_blk)
@@ -433,7 +432,7 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid,
 		 * case, during hotplug we know that all pages in the memory
 		 * block belong to the same node.
 		 */
-		if (check_nid) {
+		if (system_state == SYSTEM_BOOTING) {
 			page_nid = get_nid_for_pfn(pfn);
 			if (page_nid < 0)
 				continue;
@@ -490,41 +489,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
 	return 0;
 }
 
-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages,
-		      bool check_nid)
+int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
 {
-	unsigned long end_pfn = start_pfn + nr_pages;
-	unsigned long pfn;
-	struct memory_block *mem_blk = NULL;
-	int err = 0;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-		unsigned long section_nr = pfn_to_section_nr(pfn);
-		struct mem_section *mem_sect;
-		int ret;
-
-		if (!present_section_nr(section_nr))
-			continue;
-		mem_sect = __nr_to_section(section_nr);
-
-		/* same memblock ? */
-		if (mem_blk)
-			if ((section_nr >= mem_blk->start_section_nr) &&
-			    (section_nr <= mem_blk->end_section_nr))
-				continue;
-
-		mem_blk = find_memory_block_hinted(mem_sect, mem_blk);
-
-		ret = register_mem_sect_under_node(mem_blk, nid, check_nid);
-		if (!err)
-			err = ret;
-
-		/* discard ref obtained in find_memory_block() */
-	}
-
-	if (mem_blk)
-		kobject_put(&mem_blk->dev.kobj);
-	return err;
+	return walk_memory_range(start_pfn, end_pfn, (void *)&nid,
+					register_mem_sect_under_node);
 }
 
 #ifdef CONFIG_HUGETLBFS
diff --git a/include/linux/node.h b/include/linux/node.h
index 6d336e38d155..257bb3d6d014 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -33,10 +33,10 @@ typedef  void (*node_registration_func_t)(struct node *);
 
 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
 extern int link_mem_sections(int nid, unsigned long start_pfn,
-			     unsigned long nr_pages, bool check_nid);
+			     unsigned long end_pfn);
 #else
 static inline int link_mem_sections(int nid, unsigned long start_pfn,
-				    unsigned long nr_pages, bool check_nid)
+				    unsigned long end_pfn)
 {
 	return 0;
 }
@@ -54,12 +54,14 @@ static inline int register_one_node(int nid)
 
 	if (node_online(nid)) {
 		struct pglist_data *pgdat = NODE_DATA(nid);
+		unsigned long start_pfn = pgdat->node_start_pfn;
+		unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
 
 		error = __register_one_node(nid);
 		if (error)
 			return error;
 		/* link memory sections under this node */
-		error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages, true);
+		error = link_mem_sections(nid, start_pfn, end_pfn);
 	}
 
 	return error;
@@ -69,7 +71,7 @@ extern void unregister_one_node(int nid);
 extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int register_mem_sect_under_node(struct memory_block *mem_blk,
-						int nid, bool check_nid);
+						void *arg);
 extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
 					   unsigned long phys_index);
 
@@ -99,7 +101,7 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 	return 0;
 }
 static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
-							int nid, bool check_nid)
+							void *arg)
 {
 	return 0;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e2ed64b994e5..4eb6e824a80c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1123,7 +1123,6 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	u64 start, size;
 	bool new_node = false;
 	int ret;
-	unsigned long start_pfn, nr_pages;
 
 	start = res->start;
 	size = resource_size(res);
@@ -1164,9 +1163,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	}
 
 	/* link memory sections under this node.*/
-	start_pfn = start >> PAGE_SHIFT;
-	nr_pages = size >> PAGE_SHIFT;
-	ret = link_mem_sections(nid, start_pfn, nr_pages, false);
+	ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1));
 	BUG_ON(ret);
 
 	/* create new memmap entry */
-- 
cgit v1.2.3


From dc0b58643aff8b378086f25cce6789ccba68cbcb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 17 Aug 2018 15:46:36 -0700
Subject: mm: introduce mem_cgroup_put() helper

Introduce the mem_cgroup_put() helper, which helps to eliminate guarding
memcg css release with "#ifdef CONFIG_MEMCG" in multiple places.

Link: http://lkml.kernel.org/r/20180623000600.5818-2-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 680d3395fc83..42f4719def32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -378,6 +378,11 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
 }
 
+static inline void mem_cgroup_put(struct mem_cgroup *memcg)
+{
+	css_put(&memcg->css);
+}
+
 #define mem_cgroup_from_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
@@ -850,6 +855,10 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
 	return true;
 }
 
+static inline void mem_cgroup_put(struct mem_cgroup *memcg)
+{
+}
+
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
 		struct mem_cgroup *prev,
-- 
cgit v1.2.3


From d46eb14b735b11927d4bdc2d1854c311af19de6d Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Fri, 17 Aug 2018 15:46:39 -0700
Subject: fs: fsnotify: account fsnotify metadata to kmemcg

Patch series "Directed kmem charging", v8.

The Linux kernel's memory cgroup allows limiting the memory usage of the
jobs running on the system to provide isolation between the jobs.  All
the kernel memory allocated in the context of the job and marked with
__GFP_ACCOUNT will also be included in the memory usage and be limited
by the job's limit.

The kernel memory can only be charged to the memcg of the process in
whose context kernel memory was allocated.  However there are cases
where the allocated kernel memory should be charged to the memcg
different from the current processes's memcg.  This patch series
contains two such concrete use-cases i.e.  fsnotify and buffer_head.

The fsnotify event objects can consume a lot of system memory for large
or unlimited queues if there is either no or slow listener.  The events
are allocated in the context of the event producer.  However they should
be charged to the event consumer.  Similarly the buffer_head objects can
be allocated in a memcg different from the memcg of the page for which
buffer_head objects are being allocated.

To solve this issue, this patch series introduces mechanism to charge
kernel memory to a given memcg.  In case of fsnotify events, the memcg
of the consumer can be used for charging and for buffer_head, the memcg
of the page can be charged.  For directed charging, the caller can use
the scope API memalloc_[un]use_memcg() to specify the memcg to charge
for all the __GFP_ACCOUNT allocations within the scope.

This patch (of 2):

A lot of memory can be consumed by the events generated for the huge or
unlimited queues if there is either no or slow listener.  This can cause
system level memory pressure or OOMs.  So, it's better to account the
fsnotify kmem caches to the memcg of the listener.

However the listener can be in a different memcg than the memcg of the
producer and these allocations happen in the context of the event
producer.  This patch introduces remote memcg charging API which the
producer can use to charge the allocations to the memcg of the listener.

There are seven fsnotify kmem caches and among them allocations from
dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and
inotify_inode_mark_cachep happens in the context of syscall from the
listener.  So, SLAB_ACCOUNT is enough for these caches.

The objects from fsnotify_mark_connector_cachep are not accounted as
they are small compared to the notification mark or events and it is
unclear whom to account connector to since it is shared by all events
attached to the inode.

The allocations from the event caches happen in the context of the event
producer.  For such caches we will need to remote charge the allocations
to the listener's memcg.  Thus we save the memcg reference in the
fsnotify_group structure of the listener.

This patch has also moved the members of fsnotify_group to keep the size
same, at least for 64 bit build, even with additional member by filling
the holes.

[shakeelb@google.com: use GFP_KERNEL_ACCOUNT rather than open-coding it]
  Link: http://lkml.kernel.org/r/20180702215439.211597-1-shakeelb@google.com
Link: http://lkml.kernel.org/r/20180627191250.209150-2-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/dnotify/dnotify.c          |  5 +++--
 fs/notify/fanotify/fanotify.c        | 14 ++++++++++----
 fs/notify/fanotify/fanotify_user.c   |  5 ++++-
 fs/notify/group.c                    |  3 +++
 fs/notify/inotify/inotify_fsnotify.c |  7 ++++++-
 fs/notify/inotify/inotify_user.c     |  5 ++++-
 include/linux/fsnotify_backend.h     | 12 ++++++++----
 include/linux/memcontrol.h           | 10 +++++++++-
 include/linux/sched.h                |  3 +++
 include/linux/sched/mm.h             | 37 ++++++++++++++++++++++++++++++++++++
 kernel/fork.c                        |  3 +++
 mm/memcontrol.c                      | 37 ++++++++++++++++++++++++++++++++----
 12 files changed, 123 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e2bea2ac5dfb..a6365e6bc047 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -384,8 +384,9 @@ out_err:
 
 static int __init dnotify_init(void)
 {
-	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-	dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
+	dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
+					  SLAB_PANIC|SLAB_ACCOUNT);
+	dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
 
 	dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f90842efea13..eb4e75175cfb 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/audit.h>
+#include <linux/sched/mm.h>
 
 #include "fanotify.h"
 
@@ -140,8 +141,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
 						 struct inode *inode, u32 mask,
 						 const struct path *path)
 {
-	struct fanotify_event_info *event;
-	gfp_t gfp = GFP_KERNEL;
+	struct fanotify_event_info *event = NULL;
+	gfp_t gfp = GFP_KERNEL_ACCOUNT;
 
 	/*
 	 * For queues with unlimited length lost events are not expected and
@@ -151,19 +152,22 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
 	if (group->max_events == UINT_MAX)
 		gfp |= __GFP_NOFAIL;
 
+	/* Whoever is interested in the event, pays for the allocation. */
+	memalloc_use_memcg(group->memcg);
+
 	if (fanotify_is_perm_event(mask)) {
 		struct fanotify_perm_event_info *pevent;
 
 		pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
 		if (!pevent)
-			return NULL;
+			goto out;
 		event = &pevent->fae;
 		pevent->response = 0;
 		goto init;
 	}
 	event = kmem_cache_alloc(fanotify_event_cachep, gfp);
 	if (!event)
-		return NULL;
+		goto out;
 init: __maybe_unused
 	fsnotify_init_event(&event->fse, inode, mask);
 	event->tgid = get_pid(task_tgid(current));
@@ -174,6 +178,8 @@ init: __maybe_unused
 		event->path.mnt = NULL;
 		event->path.dentry = NULL;
 	}
+out:
+	memalloc_unuse_memcg();
 	return event;
 }
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ec4d8c59d0e3..0cf45041dc32 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/compat.h>
 #include <linux/sched/signal.h>
+#include <linux/memcontrol.h>
 
 #include <asm/ioctls.h>
 
@@ -756,6 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 
 	group->fanotify_data.user = user;
 	atomic_inc(&user->fanotify_listeners);
+	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
 	oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
 	if (unlikely(!oevent)) {
@@ -957,7 +959,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+					 SLAB_PANIC|SLAB_ACCOUNT);
 	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
 		fanotify_perm_event_cachep =
diff --git a/fs/notify/group.c b/fs/notify/group.c
index aa5468f23e45..c03b83662876 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -22,6 +22,7 @@
 #include <linux/srcu.h>
 #include <linux/rculist.h>
 #include <linux/wait.h>
+#include <linux/memcontrol.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
@@ -36,6 +37,8 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
 	if (group->ops->free_group_priv)
 		group->ops->free_group_priv(group);
 
+	mem_cgroup_put(group->memcg);
+
 	kfree(group);
 }
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 9ab6dde38a14..f4184b4f3815 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/sched/user.h>
+#include <linux/sched/mm.h>
 
 #include "inotify.h"
 
@@ -98,7 +99,11 @@ int inotify_handle_event(struct fsnotify_group *group,
 	i_mark = container_of(inode_mark, struct inotify_inode_mark,
 			      fsn_mark);
 
-	event = kmalloc(alloc_len, GFP_KERNEL);
+	/* Whoever is interested in the event, pays for the allocation. */
+	memalloc_use_memcg(group->memcg);
+	event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
+	memalloc_unuse_memcg();
+
 	if (unlikely(!event)) {
 		/*
 		 * Treat lost event due to ENOMEM the same way as queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf5b779d862..749c46ababa0 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -38,6 +38,7 @@
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
+#include <linux/memcontrol.h>
 
 #include "inotify.h"
 #include "../fdinfo.h"
@@ -636,6 +637,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 	oevent->name_len = 0;
 
 	group->max_events = max_events;
+	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
 	spin_lock_init(&group->inotify_data.idr_lock);
 	idr_init(&group->inotify_data.idr);
@@ -808,7 +810,8 @@ static int __init inotify_user_setup(void)
 
 	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
 
-	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
+	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
+					       SLAB_PANIC|SLAB_ACCOUNT);
 
 	inotify_max_queued_events = 16384;
 	init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b38964a7a521..a0c4790c5302 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -84,6 +84,8 @@ struct fsnotify_event_private_data;
 struct fsnotify_fname;
 struct fsnotify_iter_info;
 
+struct mem_cgroup;
+
 /*
  * Each group much define these ops.  The fsnotify infrastructure will call
  * these operations for each relevant group.
@@ -127,6 +129,8 @@ struct fsnotify_event {
  * everything will be cleaned up.
  */
 struct fsnotify_group {
+	const struct fsnotify_ops *ops;	/* how this group handles things */
+
 	/*
 	 * How the refcnt is used is up to each group.  When the refcnt hits 0
 	 * fsnotify will clean up all of the resources associated with this group.
@@ -137,8 +141,6 @@ struct fsnotify_group {
 	 */
 	refcount_t refcnt;		/* things with interest in this group */
 
-	const struct fsnotify_ops *ops;	/* how this group handles things */
-
 	/* needed to send notification to userspace */
 	spinlock_t notification_lock;		/* protect the notification_list */
 	struct list_head notification_list;	/* list of event_holder this group needs to send to userspace */
@@ -160,6 +162,8 @@ struct fsnotify_group {
 	atomic_t num_marks;		/* 1 for each mark and 1 for not being
 					 * past the point of no return when freeing
 					 * a group */
+	atomic_t user_waits;		/* Number of tasks waiting for user
+					 * response */
 	struct list_head marks_list;	/* all inode marks for this group */
 
 	struct fasync_struct *fsn_fa;    /* async notification */
@@ -167,8 +171,8 @@ struct fsnotify_group {
 	struct fsnotify_event *overflow_event;	/* Event we queue when the
 						 * notification list is too
 						 * full */
-	atomic_t user_waits;		/* Number of tasks waiting for user
-					 * response */
+
+	struct mem_cgroup *memcg;	/* memcg to charge allocations */
 
 	/* groups can define private fields here or use the void *private */
 	union {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 42f4719def32..121e218d2a21 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -373,6 +373,8 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
+
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -380,7 +382,8 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
-	css_put(&memcg->css);
+	if (memcg)
+		css_put(&memcg->css);
 }
 
 #define mem_cgroup_from_counter(counter, member)	\
@@ -855,6 +858,11 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
 	return true;
 }
 
+static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+	return NULL;
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 95a5018c338e..1827f4a7a6de 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1152,6 +1152,9 @@ struct task_struct {
 
 	/* Number of pages to reclaim on returning to userland: */
 	unsigned int			memcg_nr_pages_over_high;
+
+	/* Used by memcontrol for targeted memcg charge: */
+	struct mem_cgroup		*active_memcg;
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44d356f5e47c..aebb370a0006 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -248,6 +248,43 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC) | flags;
 }
 
+#ifdef CONFIG_MEMCG
+/**
+ * memalloc_use_memcg - Starts the remote memcg charging scope.
+ * @memcg: memcg to charge.
+ *
+ * This function marks the beginning of the remote memcg charging scope. All the
+ * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
+ * given memcg.
+ *
+ * NOTE: This function is not nesting safe.
+ */
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+	WARN_ON_ONCE(current->active_memcg);
+	current->active_memcg = memcg;
+}
+
+/**
+ * memalloc_unuse_memcg - Ends the remote memcg charging scope.
+ *
+ * This function marks the end of the remote memcg charging scope started by
+ * memalloc_use_memcg().
+ */
+static inline void memalloc_unuse_memcg(void)
+{
+	current->active_memcg = NULL;
+}
+#else
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+}
+
+static inline void memalloc_unuse_memcg(void)
+{
+}
+#endif
+
 #ifdef CONFIG_MEMBARRIER
 enum {
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY		= (1U << 0),
diff --git a/kernel/fork.c b/kernel/fork.c
index 33112315b5c0..5ee74c113381 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -871,6 +871,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->use_memdelay = 0;
 #endif
 
+#ifdef CONFIG_MEMCG
+	tsk->active_memcg = NULL;
+#endif
 	return tsk;
 
 free_stack:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b836e7f00309..bf9cf738c836 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -678,9 +678,20 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 }
 EXPORT_SYMBOL(mem_cgroup_from_task);
 
-static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+/**
+ * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
+ * @mm: mm from which memcg should be extracted. It can be NULL.
+ *
+ * Obtain a reference on mm->memcg and returns it if successful. Otherwise
+ * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
+ * returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 {
-	struct mem_cgroup *memcg = NULL;
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled())
+		return NULL;
 
 	rcu_read_lock();
 	do {
@@ -700,6 +711,24 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 	rcu_read_unlock();
 	return memcg;
 }
+EXPORT_SYMBOL(get_mem_cgroup_from_mm);
+
+/**
+ * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
+ */
+static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+	if (unlikely(current->active_memcg)) {
+		struct mem_cgroup *memcg = root_mem_cgroup;
+
+		rcu_read_lock();
+		if (css_tryget_online(&current->active_memcg->css))
+			memcg = current->active_memcg;
+		rcu_read_unlock();
+		return memcg;
+	}
+	return get_mem_cgroup_from_mm(current->mm);
+}
 
 /**
  * mem_cgroup_iter - iterate over memory cgroup hierarchy
@@ -2261,7 +2290,7 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
 	if (current->memcg_kmem_skip_account)
 		return cachep;
 
-	memcg = get_mem_cgroup_from_mm(current->mm);
+	memcg = get_mem_cgroup_from_current();
 	kmemcg_id = READ_ONCE(memcg->kmemcg_id);
 	if (kmemcg_id < 0)
 		goto out;
@@ -2345,7 +2374,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 	if (memcg_kmem_bypass())
 		return 0;
 
-	memcg = get_mem_cgroup_from_mm(current->mm);
+	memcg = get_mem_cgroup_from_current();
 	if (!mem_cgroup_is_root(memcg)) {
 		ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
 		if (!ret)
-- 
cgit v1.2.3


From f745c6f5fe75734f3b35d9d4e6ebe2a7d010ddda Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Fri, 17 Aug 2018 15:46:44 -0700
Subject: fs, mm: account buffer_head to kmemcg

The buffer_head can consume a significant amount of system memory and is
directly related to the amount of page cache.  In our production
environment we have observed that a lot of machines are spending a
significant amount of memory as buffer_head and can not be left as
system memory overhead.

Charging buffer_head is not as simple as adding __GFP_ACCOUNT to the
allocation.  The buffer_heads can be allocated in a memcg different from
the memcg of the page for which buffer_heads are being allocated.  One
concrete example is memory reclaim.  The reclaim can trigger I/O of
pages of any memcg on the system.  So, the right way to charge
buffer_head is to extract the memcg from the page for which buffer_heads
are being allocated and then use targeted memcg charging API.

[shakeelb@google.com: use __GFP_ACCOUNT for directed memcg charging]
  Link: http://lkml.kernel.org/r/20180702220208.213380-1-shakeelb@google.com
Link: http://lkml.kernel.org/r/20180627191250.209150-3-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                | 12 ++++++++++--
 include/linux/memcontrol.h |  7 +++++++
 mm/memcontrol.c            | 22 ++++++++++++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index c8c2b7d8b8d6..4cc679d5bf58 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -45,6 +45,7 @@
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
 #include <linux/pagevec.h>
+#include <linux/sched/mm.h>
 #include <trace/events/block.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -813,12 +814,16 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
 		bool retry)
 {
 	struct buffer_head *bh, *head;
-	gfp_t gfp = GFP_NOFS;
+	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
 	long offset;
+	struct mem_cgroup *memcg;
 
 	if (retry)
 		gfp |= __GFP_NOFAIL;
 
+	memcg = get_mem_cgroup_from_page(page);
+	memalloc_use_memcg(memcg);
+
 	head = NULL;
 	offset = PAGE_SIZE;
 	while ((offset -= size) >= 0) {
@@ -835,6 +840,9 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
 		/* Link the buffer to its page */
 		set_bh_page(bh, page, offset);
 	}
+out:
+	memalloc_unuse_memcg();
+	mem_cgroup_put(memcg);
 	return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -848,7 +856,7 @@ no_grow:
 		} while (head);
 	}
 
-	return NULL;
+	goto out;
 }
 EXPORT_SYMBOL_GPL(alloc_page_buffers);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 121e218d2a21..50e3e807b427 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -375,6 +375,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
+
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -863,6 +865,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 	return NULL;
 }
 
+static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+	return NULL;
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bf9cf738c836..c071af193986 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -713,6 +713,28 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(get_mem_cgroup_from_mm);
 
+/**
+ * get_mem_cgroup_from_page: Obtain a reference on given page's memcg.
+ * @page: page from which memcg should be extracted.
+ *
+ * Obtain a reference on page->memcg and returns it if successful. Otherwise
+ * root_mem_cgroup is returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+	struct mem_cgroup *memcg = page->mem_cgroup;
+
+	if (mem_cgroup_disabled())
+		return NULL;
+
+	rcu_read_lock();
+	if (!memcg || !css_tryget_online(&memcg->css))
+		memcg = root_mem_cgroup;
+	rcu_read_unlock();
+	return memcg;
+}
+EXPORT_SYMBOL(get_mem_cgroup_from_page);
+
 /**
  * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
  */
-- 
cgit v1.2.3


From 0207df4fa1a869281ddbf72db6203dbf036b3e1a Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:04 -0700
Subject: kernel/memremap, kasan: make ZONE_DEVICE with work with KASAN

KASAN learns about hotadded memory via the memory hotplug notifier.
devm_memremap_pages() intentionally skips calling memory hotplug
notifiers.  So KASAN doesn't know anything about new memory added by
devm_memremap_pages().  This causes a crash when KASAN tries to access
non-existent shadow memory:

 BUG: unable to handle kernel paging request at ffffed0078000000
 RIP: 0010:check_memory_region+0x82/0x1e0
 Call Trace:
  memcpy+0x1f/0x50
  pmem_do_bvec+0x163/0x720
  pmem_make_request+0x305/0xac0
  generic_make_request+0x54f/0xcf0
  submit_bio+0x9c/0x370
  submit_bh_wbc+0x4c7/0x700
  block_read_full_page+0x5ef/0x870
  do_read_cache_page+0x2b8/0xb30
  read_dev_sector+0xbd/0x3f0
  read_lba.isra.0+0x277/0x670
  efi_partition+0x41a/0x18f0
  check_partition+0x30d/0x5e9
  rescan_partitions+0x18c/0x840
  __blkdev_get+0x859/0x1060
  blkdev_get+0x23f/0x810
  __device_add_disk+0x9c8/0xde0
  pmem_attach_disk+0x9a8/0xf50
  nvdimm_bus_probe+0xf3/0x3c0
  driver_probe_device+0x493/0xbd0
  bus_for_each_drv+0x118/0x1b0
  __device_attach+0x1cd/0x2b0
  bus_probe_device+0x1ac/0x260
  device_add+0x90d/0x1380
  nd_async_device_register+0xe/0x50
  async_run_entry_fn+0xc3/0x5d0
  process_one_work+0xa0a/0x1810
  worker_thread+0x87/0xe80
  kthread+0x2d7/0x390
  ret_from_fork+0x3a/0x50

Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
interface to map/unmap kasan_zero_page at requested virtual addresses.
And use it to add/remove the shadow memory for hotplugged/unplugged
device memory.

Link: http://lkml.kernel.org/r/20180629164932.740-1-aryabinin@virtuozzo.com
Fixes: 41e94a851304 ("add devm_memremap_pages")
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h |  13 ++-
 kernel/memremap.c     |  10 ++
 mm/kasan/kasan_init.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 325 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index de784fd11d12..46aae129917c 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -20,7 +20,7 @@ extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
 extern pud_t kasan_zero_pud[PTRS_PER_PUD];
 extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];
 
-void kasan_populate_zero_shadow(const void *shadow_start,
+int kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end);
 
 static inline void *kasan_mem_to_shadow(const void *addr)
@@ -71,6 +71,9 @@ struct kasan_cache {
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
+int kasan_add_zero_shadow(void *start, unsigned long size);
+void kasan_remove_zero_shadow(void *start, unsigned long size);
+
 size_t ksize(const void *);
 static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
 size_t kasan_metadata_size(struct kmem_cache *cache);
@@ -124,6 +127,14 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
 static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
+static inline int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+	return 0;
+}
+static inline void kasan_remove_zero_shadow(void *start,
+					unsigned long size)
+{}
+
 static inline void kasan_unpoison_slab(const void *ptr) { }
 static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
 
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 38283363da06..1f87ea6b6545 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -5,6 +5,7 @@
 #include <linux/types.h>
 #include <linux/pfn_t.h>
 #include <linux/io.h>
+#include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/memory_hotplug.h>
 #include <linux/swap.h>
@@ -137,6 +138,7 @@ static void devm_memremap_pages_release(void *data)
 	mem_hotplug_begin();
 	arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
 			&pgmap->altmap : NULL);
+	kasan_remove_zero_shadow(__va(align_start), align_size);
 	mem_hotplug_done();
 
 	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
@@ -239,6 +241,12 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 		goto err_pfn_remap;
 
 	mem_hotplug_begin();
+	error = kasan_add_zero_shadow(__va(align_start), align_size);
+	if (error) {
+		mem_hotplug_done();
+		goto err_kasan;
+	}
+
 	error = arch_add_memory(nid, align_start, align_size, altmap, false);
 	if (!error)
 		move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
@@ -267,6 +275,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	return __va(res->start);
 
  err_add_memory:
+	kasan_remove_zero_shadow(__va(align_start), align_size);
+ err_kasan:
 	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
  err_pfn_remap:
  err_radix:
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index f436246ccc79..7a2a2f13f86f 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -17,10 +17,13 @@
 #include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
+#include <linux/slab.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 
+#include "kasan.h"
+
 /*
  * This page serves two purposes:
  *   - It used as early shadow memory. The entire shadow region populated
@@ -32,22 +35,59 @@ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
 
 #if CONFIG_PGTABLE_LEVELS > 4
 p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+	return pgd_page(pgd) == virt_to_page(lm_alias(kasan_zero_p4d));
+}
+#else
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+	return 0;
+}
 #endif
 #if CONFIG_PGTABLE_LEVELS > 3
 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+	return p4d_page(p4d) == virt_to_page(lm_alias(kasan_zero_pud));
+}
+#else
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+	return 0;
+}
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
 pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static inline bool kasan_pmd_table(pud_t pud)
+{
+	return pud_page(pud) == virt_to_page(lm_alias(kasan_zero_pmd));
+}
+#else
+static inline bool kasan_pmd_table(pud_t pud)
+{
+	return 0;
+}
 #endif
 pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
 
+static inline bool kasan_pte_table(pmd_t pmd)
+{
+	return pmd_page(pmd) == virt_to_page(lm_alias(kasan_zero_pte));
+}
+
+static inline bool kasan_zero_page_entry(pte_t pte)
+{
+	return pte_page(pte) == virt_to_page(lm_alias(kasan_zero_page));
+}
+
 static __init void *early_alloc(size_t size, int node)
 {
 	return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
 					BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
-static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __ref zero_pte_populate(pmd_t *pmd, unsigned long addr,
 				unsigned long end)
 {
 	pte_t *pte = pte_offset_kernel(pmd, addr);
@@ -63,7 +103,7 @@ static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
 	}
 }
 
-static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
+static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 				unsigned long end)
 {
 	pmd_t *pmd = pmd_offset(pud, addr);
@@ -78,14 +118,24 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
 		}
 
 		if (pmd_none(*pmd)) {
-			pmd_populate_kernel(&init_mm, pmd,
-					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			pte_t *p;
+
+			if (slab_is_available())
+				p = pte_alloc_one_kernel(&init_mm, addr);
+			else
+				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+			if (!p)
+				return -ENOMEM;
+
+			pmd_populate_kernel(&init_mm, pmd, p);
 		}
 		zero_pte_populate(pmd, addr, next);
 	} while (pmd++, addr = next, addr != end);
+
+	return 0;
 }
 
-static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
+static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
 				unsigned long end)
 {
 	pud_t *pud = pud_offset(p4d, addr);
@@ -103,14 +153,24 @@ static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
 		}
 
 		if (pud_none(*pud)) {
-			pud_populate(&init_mm, pud,
-				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			pmd_t *p;
+
+			if (slab_is_available()) {
+				p = pmd_alloc(&init_mm, pud, addr);
+				if (!p)
+					return -ENOMEM;
+			} else {
+				pud_populate(&init_mm, pud,
+					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			}
 		}
 		zero_pmd_populate(pud, addr, next);
 	} while (pud++, addr = next, addr != end);
+
+	return 0;
 }
 
-static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
+static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 				unsigned long end)
 {
 	p4d_t *p4d = p4d_offset(pgd, addr);
@@ -132,11 +192,21 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 		}
 
 		if (p4d_none(*p4d)) {
-			p4d_populate(&init_mm, p4d,
-				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			pud_t *p;
+
+			if (slab_is_available()) {
+				p = pud_alloc(&init_mm, p4d, addr);
+				if (!p)
+					return -ENOMEM;
+			} else {
+				p4d_populate(&init_mm, p4d,
+					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			}
 		}
 		zero_pud_populate(p4d, addr, next);
 	} while (p4d++, addr = next, addr != end);
+
+	return 0;
 }
 
 /**
@@ -145,7 +215,7 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
  * @shadow_start - start of the memory range to populate
  * @shadow_end   - end of the memory range to populate
  */
-void __init kasan_populate_zero_shadow(const void *shadow_start,
+int __ref kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end)
 {
 	unsigned long addr = (unsigned long)shadow_start;
@@ -191,9 +261,229 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 		}
 
 		if (pgd_none(*pgd)) {
-			pgd_populate(&init_mm, pgd,
-				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			p4d_t *p;
+
+			if (slab_is_available()) {
+				p = p4d_alloc(&init_mm, pgd, addr);
+				if (!p)
+					return -ENOMEM;
+			} else {
+				pgd_populate(&init_mm, pgd,
+					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			}
 		}
 		zero_p4d_populate(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
+
+	return 0;
+}
+
+static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd)
+{
+	pte_t *pte;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte = pte_start + i;
+		if (!pte_none(*pte))
+			return;
+	}
+
+	pte_free_kernel(&init_mm, (pte_t *)page_to_virt(pmd_page(*pmd)));
+	pmd_clear(pmd);
+}
+
+static void kasan_free_pmd(pmd_t *pmd_start, pud_t *pud)
+{
+	pmd_t *pmd;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd = pmd_start + i;
+		if (!pmd_none(*pmd))
+			return;
+	}
+
+	pmd_free(&init_mm, (pmd_t *)page_to_virt(pud_page(*pud)));
+	pud_clear(pud);
+}
+
+static void kasan_free_pud(pud_t *pud_start, p4d_t *p4d)
+{
+	pud_t *pud;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		pud = pud_start + i;
+		if (!pud_none(*pud))
+			return;
+	}
+
+	pud_free(&init_mm, (pud_t *)page_to_virt(p4d_page(*p4d)));
+	p4d_clear(p4d);
+}
+
+static void kasan_free_p4d(p4d_t *p4d_start, pgd_t *pgd)
+{
+	p4d_t *p4d;
+	int i;
+
+	for (i = 0; i < PTRS_PER_P4D; i++) {
+		p4d = p4d_start + i;
+		if (!p4d_none(*p4d))
+			return;
+	}
+
+	p4d_free(&init_mm, (p4d_t *)page_to_virt(pgd_page(*pgd)));
+	pgd_clear(pgd);
+}
+
+static void kasan_remove_pte_table(pte_t *pte, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, pte++) {
+		next = (addr + PAGE_SIZE) & PAGE_MASK;
+		if (next > end)
+			next = end;
+
+		if (!pte_present(*pte))
+			continue;
+
+		if (WARN_ON(!kasan_zero_page_entry(*pte)))
+			continue;
+		pte_clear(&init_mm, addr, pte);
+	}
+}
+
+static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, pmd++) {
+		pte_t *pte;
+
+		next = pmd_addr_end(addr, end);
+
+		if (!pmd_present(*pmd))
+			continue;
+
+		if (kasan_pte_table(*pmd)) {
+			if (IS_ALIGNED(addr, PMD_SIZE) &&
+			    IS_ALIGNED(next, PMD_SIZE))
+				pmd_clear(pmd);
+			continue;
+		}
+		pte = pte_offset_kernel(pmd, addr);
+		kasan_remove_pte_table(pte, addr, next);
+		kasan_free_pte(pte_offset_kernel(pmd, 0), pmd);
+	}
+}
+
+static void kasan_remove_pud_table(pud_t *pud, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, pud++) {
+		pmd_t *pmd, *pmd_base;
+
+		next = pud_addr_end(addr, end);
+
+		if (!pud_present(*pud))
+			continue;
+
+		if (kasan_pmd_table(*pud)) {
+			if (IS_ALIGNED(addr, PUD_SIZE) &&
+			    IS_ALIGNED(next, PUD_SIZE))
+				pud_clear(pud);
+			continue;
+		}
+		pmd = pmd_offset(pud, addr);
+		pmd_base = pmd_offset(pud, 0);
+		kasan_remove_pmd_table(pmd, addr, next);
+		kasan_free_pmd(pmd_base, pud);
+	}
+}
+
+static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, p4d++) {
+		pud_t *pud;
+
+		next = p4d_addr_end(addr, end);
+
+		if (!p4d_present(*p4d))
+			continue;
+
+		if (kasan_pud_table(*p4d)) {
+			if (IS_ALIGNED(addr, P4D_SIZE) &&
+			    IS_ALIGNED(next, P4D_SIZE))
+				p4d_clear(p4d);
+			continue;
+		}
+		pud = pud_offset(p4d, addr);
+		kasan_remove_pud_table(pud, addr, next);
+		kasan_free_pud(pud_offset(p4d, 0), p4d);
+	}
+}
+
+void kasan_remove_zero_shadow(void *start, unsigned long size)
+{
+	unsigned long addr, end, next;
+	pgd_t *pgd;
+
+	addr = (unsigned long)kasan_mem_to_shadow(start);
+	end = addr + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+	if (WARN_ON((unsigned long)start %
+			(KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+	    WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+		return;
+
+	for (; addr < end; addr = next) {
+		p4d_t *p4d;
+
+		next = pgd_addr_end(addr, end);
+
+		pgd = pgd_offset_k(addr);
+		if (!pgd_present(*pgd))
+			continue;
+
+		if (kasan_p4d_table(*pgd)) {
+			if (IS_ALIGNED(addr, PGDIR_SIZE) &&
+			    IS_ALIGNED(next, PGDIR_SIZE))
+				pgd_clear(pgd);
+			continue;
+		}
+
+		p4d = p4d_offset(pgd, addr);
+		kasan_remove_p4d_table(p4d, addr, next);
+		kasan_free_p4d(p4d_offset(pgd, 0), pgd);
+	}
+}
+
+int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+	int ret;
+	void *shadow_start, *shadow_end;
+
+	shadow_start = kasan_mem_to_shadow(start);
+	shadow_end = shadow_start + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+	if (WARN_ON((unsigned long)start %
+			(KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+	    WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+		return -EINVAL;
+
+	ret = kasan_populate_zero_shadow(shadow_start, shadow_end);
+	if (ret)
+		kasan_remove_zero_shadow(shadow_start,
+					size >> KASAN_SHADOW_SCALE_SHIFT);
+	return ret;
 }
-- 
cgit v1.2.3


From 29ef680ae7c21110af8e6416d84d8a72fc147b14 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 17 Aug 2018 15:47:11 -0700
Subject: memcg, oom: move out_of_memory back to the charge path

Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full
callstack on OOM") has changed the ENOMEM semantic of memcg charges.
Rather than invoking the oom killer from the charging context it delays
the oom killer to the page fault path (pagefault_out_of_memory).  This
in turn means that many users (e.g.  slab or g-u-p) will get ENOMEM when
the corresponding memcg hits the hard limit and the memcg is is OOM.
This is behavior is inconsistent with !memcg case where the oom killer
is invoked from the allocation context and the allocator keeps retrying
until it succeeds.

The difference in the behavior is user visible.  mmap(MAP_POPULATE)
might result in not fully populated ranges while the mmap return code
doesn't tell that to the userspace.  Random syscalls might fail with
ENOMEM etc.

The primary motivation of the different memcg oom semantic was the
deadlock avoidance.  Things have changed since then, though.  We have an
async oom teardown by the oom reaper now and so we do not have to rely
on the victim to tear down its memory anymore.  Therefore we can return
to the original semantic as long as the memcg oom killer is not handed
over to the users space.

There is still one thing to be careful about here though.  If the oom
killer is not able to make any forward progress - e.g.  because there is
no eligible task to kill - then we have to bail out of the charge path
to prevent from same class of deadlocks.  We have basically two options
here.  Either we fail the charge with ENOMEM or force the charge and
allow overcharge.  The first option has been considered more harmful
than useful because rare inconsistencies in the ENOMEM behavior is hard
to test for and error prone.  Basically the same reason why the page
allocator doesn't fail allocations under such conditions.  The later
might allow runaways but those should be really unlikely unless somebody
misconfigures the system.  E.g.  allowing to migrate tasks away from the
memcg to a different unlimited memcg with move_charge_at_immigrate
disabled.

Link: http://lkml.kernel.org/r/20180628151101.25307-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 16 +++++-----
 include/linux/sched.h      |  2 +-
 mm/memcontrol.c            | 75 ++++++++++++++++++++++++++++++++++++----------
 mm/memory.c                |  4 +--
 4 files changed, 71 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 50e3e807b427..57a202f31683 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -507,16 +507,16 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
 void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 				struct task_struct *p);
 
-static inline void mem_cgroup_oom_enable(void)
+static inline void mem_cgroup_enter_user_fault(void)
 {
-	WARN_ON(current->memcg_may_oom);
-	current->memcg_may_oom = 1;
+	WARN_ON(current->in_user_fault);
+	current->in_user_fault = 1;
 }
 
-static inline void mem_cgroup_oom_disable(void)
+static inline void mem_cgroup_exit_user_fault(void)
 {
-	WARN_ON(!current->memcg_may_oom);
-	current->memcg_may_oom = 0;
+	WARN_ON(!current->in_user_fault);
+	current->in_user_fault = 0;
 }
 
 static inline bool task_in_memcg_oom(struct task_struct *p)
@@ -961,11 +961,11 @@ static inline void mem_cgroup_handle_over_high(void)
 {
 }
 
-static inline void mem_cgroup_oom_enable(void)
+static inline void mem_cgroup_enter_user_fault(void)
 {
 }
 
-static inline void mem_cgroup_oom_disable(void)
+static inline void mem_cgroup_exit_user_fault(void)
 {
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1827f4a7a6de..066a2c328653 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -722,7 +722,7 @@ struct task_struct {
 	unsigned			restore_sigmask:1;
 #endif
 #ifdef CONFIG_MEMCG
-	unsigned			memcg_may_oom:1;
+	unsigned			in_user_fault:1;
 #ifndef CONFIG_SLOB
 	unsigned			memcg_kmem_skip_account:1;
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c071af193986..d6724bed57d8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1534,28 +1534,53 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 		__wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
 }
 
-static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
+enum oom_status {
+	OOM_SUCCESS,
+	OOM_FAILED,
+	OOM_ASYNC,
+	OOM_SKIPPED
+};
+
+static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)
-		return;
+	if (order > PAGE_ALLOC_COSTLY_ORDER)
+		return OOM_SKIPPED;
+
 	/*
 	 * We are in the middle of the charge context here, so we
 	 * don't want to block when potentially sitting on a callstack
 	 * that holds all kinds of filesystem and mm locks.
 	 *
-	 * Also, the caller may handle a failed allocation gracefully
-	 * (like optional page cache readahead) and so an OOM killer
-	 * invocation might not even be necessary.
+	 * cgroup1 allows disabling the OOM killer and waiting for outside
+	 * handling until the charge can succeed; remember the context and put
+	 * the task to sleep at the end of the page fault when all locks are
+	 * released.
+	 *
+	 * On the other hand, in-kernel OOM killer allows for an async victim
+	 * memory reclaim (oom_reaper) and that means that we are not solely
+	 * relying on the oom victim to make a forward progress and we can
+	 * invoke the oom killer here.
 	 *
-	 * That's why we don't do anything here except remember the
-	 * OOM context and then deal with it at the end of the page
-	 * fault when the stack is unwound, the locks are released,
-	 * and when we know whether the fault was overall successful.
+	 * Please note that mem_cgroup_out_of_memory might fail to find a
+	 * victim and then we have to bail out from the charge path.
 	 */
-	css_get(&memcg->css);
-	current->memcg_in_oom = memcg;
-	current->memcg_oom_gfp_mask = mask;
-	current->memcg_oom_order = order;
+	if (memcg->oom_kill_disable) {
+		if (!current->in_user_fault)
+			return OOM_SKIPPED;
+		css_get(&memcg->css);
+		current->memcg_in_oom = memcg;
+		current->memcg_oom_gfp_mask = mask;
+		current->memcg_oom_order = order;
+
+		return OOM_ASYNC;
+	}
+
+	if (mem_cgroup_out_of_memory(memcg, mask, order))
+		return OOM_SUCCESS;
+
+	WARN(1,"Memory cgroup charge failed because of no reclaimable memory! "
+		"This looks like a misconfiguration or a kernel bug.");
+	return OOM_FAILED;
 }
 
 /**
@@ -1950,6 +1975,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	unsigned long nr_reclaimed;
 	bool may_swap = true;
 	bool drained = false;
+	bool oomed = false;
+	enum oom_status oom_status;
 
 	if (mem_cgroup_is_root(memcg))
 		return 0;
@@ -2037,6 +2064,9 @@ retry:
 	if (nr_retries--)
 		goto retry;
 
+	if (gfp_mask & __GFP_RETRY_MAYFAIL && oomed)
+		goto nomem;
+
 	if (gfp_mask & __GFP_NOFAIL)
 		goto force;
 
@@ -2045,8 +2075,23 @@ retry:
 
 	memcg_memory_event(mem_over_limit, MEMCG_OOM);
 
-	mem_cgroup_oom(mem_over_limit, gfp_mask,
+	/*
+	 * keep retrying as long as the memcg oom killer is able to make
+	 * a forward progress or bypass the charge if the oom killer
+	 * couldn't make any progress.
+	 */
+	oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
 		       get_order(nr_pages * PAGE_SIZE));
+	switch (oom_status) {
+	case OOM_SUCCESS:
+		nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+		oomed = true;
+		goto retry;
+	case OOM_FAILED:
+		goto force;
+	default:
+		goto nomem;
+	}
 nomem:
 	if (!(gfp_mask & __GFP_NOFAIL))
 		return -ENOMEM;
diff --git a/mm/memory.c b/mm/memory.c
index 175f344e1523..ae2ec887508b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4153,7 +4153,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	 * space.  Kernel faults are handled more gracefully.
 	 */
 	if (flags & FAULT_FLAG_USER)
-		mem_cgroup_oom_enable();
+		mem_cgroup_enter_user_fault();
 
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
@@ -4161,7 +4161,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 		ret = __handle_mm_fault(vma, address, flags);
 
 	if (flags & FAULT_FLAG_USER) {
-		mem_cgroup_oom_disable();
+		mem_cgroup_exit_user_fault();
 		/*
 		 * The task may have entered a memcg OOM situation but
 		 * if the allocation error was handled gracefully (no
-- 
cgit v1.2.3


From 84c07d11aa619c6d24c682f469b10f344f0c02aa Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:25 -0700
Subject: mm: introduce CONFIG_MEMCG_KMEM as combination of CONFIG_MEMCG &&
 !CONFIG_SLOB

Introduce new config option, which is used to replace repeating
CONFIG_MEMCG && !CONFIG_SLOB pattern.  Next patches add a little more
memcg+kmem related code, so let's keep the defines more clearly.

Link: http://lkml.kernel.org/r/153063053670.1818.15013136946600481138.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h   |  4 ++--
 include/linux/memcontrol.h |  6 +++---
 include/linux/sched.h      |  2 +-
 include/linux/slab.h       |  2 +-
 init/Kconfig               |  5 +++++
 mm/list_lru.c              |  8 ++++----
 mm/memcontrol.c            | 16 ++++++++--------
 mm/slab.h                  |  6 +++---
 mm/slab_common.c           |  8 ++++----
 9 files changed, 31 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 96def9d15b1b..2d23b5b745be 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -42,7 +42,7 @@ struct list_lru_node {
 	spinlock_t		lock;
 	/* global list, used for the root cgroup in cgroup aware lrus */
 	struct list_lru_one	lru;
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 	/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
 	struct list_lru_memcg	__rcu *memcg_lrus;
 #endif
@@ -51,7 +51,7 @@ struct list_lru_node {
 
 struct list_lru {
 	struct list_lru_node	*node;
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 	struct list_head	list;
 #endif
 };
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 57a202f31683..f3c026df7443 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -271,7 +271,7 @@ struct mem_cgroup {
 	bool			tcpmem_active;
 	int			tcpmem_pressure;
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
         /* Index in the kmem_cache->memcg_params.memcg_caches array */
 	int kmemcg_id;
 	enum memcg_kmem_state kmem_state;
@@ -1231,7 +1231,7 @@ int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
 void memcg_kmem_uncharge(struct page *page, int order);
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 extern struct static_key_false memcg_kmem_enabled_key;
 extern struct workqueue_struct *memcg_kmem_cache_wq;
 
@@ -1284,6 +1284,6 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 066a2c328653..789923fbee3a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -723,7 +723,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_MEMCG
 	unsigned			in_user_fault:1;
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 	unsigned			memcg_kmem_skip_account:1;
 #endif
 #endif
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 14e3fe4bd6a1..ed9cbddeb4a6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -97,7 +97,7 @@
 # define SLAB_FAILSLAB		0
 #endif
 /* Account to memcg */
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 # define SLAB_ACCOUNT		((slab_flags_t __force)0x04000000U)
 #else
 # define SLAB_ACCOUNT		0
diff --git a/init/Kconfig b/init/Kconfig
index 4dc783023e43..9bd50ba8253f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -708,6 +708,11 @@ config MEMCG_SWAP_ENABLED
 	  select this option (if, for some reason, they need to disable it
 	  then swapaccount=0 does the trick).
 
+config MEMCG_KMEM
+	bool
+	depends on MEMCG && !SLOB
+	default y
+
 config BLK_CGROUP
 	bool "IO controller"
 	depends on BLOCK
diff --git a/mm/list_lru.c b/mm/list_lru.c
index b65e0b9b0646..c5217d84c6e1 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -12,7 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/memcontrol.h>
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 static LIST_HEAD(list_lrus);
 static DEFINE_MUTEX(list_lrus_mutex);
 
@@ -103,7 +103,7 @@ list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
 {
 	return &nlru->lru;
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 bool list_lru_add(struct list_lru *lru, struct list_head *item)
 {
@@ -284,7 +284,7 @@ static void init_one_lru(struct list_lru_one *l)
 	l->nr_items = 0;
 }
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
 					  int begin, int end)
 {
@@ -543,7 +543,7 @@ static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
 static void memcg_destroy_list_lru(struct list_lru *lru)
 {
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 		    struct lock_class_key *key)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d6724bed57d8..2f00b455080f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -251,7 +251,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 	return (memcg == root_mem_cgroup);
 }
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
  * The main reason for not using cgroup id for this:
@@ -305,7 +305,7 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 struct workqueue_struct *memcg_kmem_cache_wq;
 
-#endif /* !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 /**
  * mem_cgroup_css_from_page - css of the memcg associated with a page
@@ -2215,7 +2215,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 		unlock_page_lru(page, isolated);
 }
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 static int memcg_alloc_cache_id(void)
 {
 	int id, size;
@@ -2480,7 +2480,7 @@ void memcg_kmem_uncharge(struct page *page, int order)
 
 	css_put_many(&memcg->css, nr_pages);
 }
-#endif /* !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
@@ -2875,7 +2875,7 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 	}
 }
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 static int memcg_online_kmem(struct mem_cgroup *memcg)
 {
 	int memcg_id;
@@ -2975,7 +2975,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 static void memcg_free_kmem(struct mem_cgroup *memcg)
 {
 }
-#endif /* !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 static int memcg_update_kmem_max(struct mem_cgroup *memcg,
 				 unsigned long max)
@@ -4279,7 +4279,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	INIT_LIST_HEAD(&memcg->event_list);
 	spin_lock_init(&memcg->event_list_lock);
 	memcg->socket_pressure = jiffies;
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 	memcg->kmemcg_id = -1;
 #endif
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -6119,7 +6119,7 @@ static int __init mem_cgroup_init(void)
 {
 	int cpu, node;
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 	/*
 	 * Kmem cache creation is mostly done with the slab_mutex held,
 	 * so use a workqueue with limited concurrency to avoid stalling
diff --git a/mm/slab.h b/mm/slab.h
index 68bdf498da3b..58c6c1c2a78e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -203,7 +203,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
 int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 
 /* List of all root caches. */
 extern struct list_head		slab_root_caches;
@@ -296,7 +296,7 @@ extern void memcg_link_cache(struct kmem_cache *s);
 extern void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
 				void (*deact_fn)(struct kmem_cache *));
 
-#else /* CONFIG_MEMCG && !CONFIG_SLOB */
+#else /* CONFIG_MEMCG_KMEM */
 
 /* If !memcg, all caches are root. */
 #define slab_root_caches	slab_caches
@@ -351,7 +351,7 @@ static inline void memcg_link_cache(struct kmem_cache *s)
 {
 }
 
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2296caf87bfb..fea3376f9816 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -127,7 +127,7 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 	return i;
 }
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 
 LIST_HEAD(slab_root_caches);
 
@@ -256,7 +256,7 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
 static inline void memcg_unlink_cache(struct kmem_cache *s)
 {
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 /*
  * Figure out what the alignment of the objects will be given a set of
@@ -584,7 +584,7 @@ static int shutdown_cache(struct kmem_cache *s)
 	return 0;
 }
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 /*
  * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  * @memcg: The memory cgroup the new cache is for.
@@ -861,7 +861,7 @@ static inline int shutdown_memcg_caches(struct kmem_cache *s)
 static inline void flush_memcg_workqueue(struct kmem_cache *s)
 {
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
-- 
cgit v1.2.3


From b4c2b231c3ba155623591fb6301ed97b95e1c039 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:29 -0700
Subject: mm: assign id to every memcg-aware shrinker

Introduce shrinker::id number, which is used to enumerate memcg-aware
shrinkers.  The number start from 0, and the code tries to maintain it
as small as possible.

This will be used to represent a memcg-aware shrinkers in memcg
shrinkers map.

Since all memcg-aware shrinkers are based on list_lru, which is
per-memcg in case of !CONFIG_MEMCG_KMEM only, the new functionality will
be under this config option.

[ktkhai@virtuozzo.com: v9]
  Link: http://lkml.kernel.org/r/153112546435.4097.10607140323811756557.stgit@localhost.localdomain
Link: http://lkml.kernel.org/r/153063054586.1818.6041047871606697364.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h |  4 +++
 mm/vmscan.c              | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 6794490f25b2..7ca9c18cf130 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -66,6 +66,10 @@ struct shrinker {
 
 	/* These are for internal use */
 	struct list_head list;
+#ifdef CONFIG_MEMCG_KMEM
+	/* ID in shrinker_idr */
+	int id;
+#endif
 	/* objs pending delete, per node */
 	atomic_long_t *nr_deferred;
 };
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a00d94530e57..5cb4f779ea4a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -169,6 +169,50 @@ unsigned long vm_total_pages;
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
+#ifdef CONFIG_MEMCG_KMEM
+static DEFINE_IDR(shrinker_idr);
+static int shrinker_nr_max;
+
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+	int id, ret = -ENOMEM;
+
+	down_write(&shrinker_rwsem);
+	/* This may call shrinker, so it must use down_read_trylock() */
+	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
+	if (id < 0)
+		goto unlock;
+
+	if (id >= shrinker_nr_max)
+		shrinker_nr_max = id + 1;
+	shrinker->id = id;
+	ret = 0;
+unlock:
+	up_write(&shrinker_rwsem);
+	return ret;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+	int id = shrinker->id;
+
+	BUG_ON(id < 0);
+
+	down_write(&shrinker_rwsem);
+	idr_remove(&shrinker_idr, id);
+	up_write(&shrinker_rwsem);
+}
+#else /* CONFIG_MEMCG_KMEM */
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+	return 0;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
 #ifdef CONFIG_MEMCG
 static bool global_reclaim(struct scan_control *sc)
 {
@@ -313,11 +357,28 @@ int prealloc_shrinker(struct shrinker *shrinker)
 	shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
 	if (!shrinker->nr_deferred)
 		return -ENOMEM;
+
+	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
+		if (prealloc_memcg_shrinker(shrinker))
+			goto free_deferred;
+	}
+
 	return 0;
+
+free_deferred:
+	kfree(shrinker->nr_deferred);
+	shrinker->nr_deferred = NULL;
+	return -ENOMEM;
 }
 
 void free_prealloced_shrinker(struct shrinker *shrinker)
 {
+	if (!shrinker->nr_deferred)
+		return;
+
+	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+		unregister_memcg_shrinker(shrinker);
+
 	kfree(shrinker->nr_deferred);
 	shrinker->nr_deferred = NULL;
 }
@@ -347,6 +408,8 @@ void unregister_shrinker(struct shrinker *shrinker)
 {
 	if (!shrinker->nr_deferred)
 		return;
+	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+		unregister_memcg_shrinker(shrinker);
 	down_write(&shrinker_rwsem);
 	list_del(&shrinker->list);
 	up_write(&shrinker_rwsem);
-- 
cgit v1.2.3


From 0a4465d340282f92719f4e3a56545a848e638d15 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:37 -0700
Subject: mm, memcg: assign memcg-aware shrinkers bitmap to memcg

Imagine a big node with many cpus, memory cgroups and containers.  Let
we have 200 containers, every container has 10 mounts, and 10 cgroups.
All container tasks don't touch foreign containers mounts.  If there is
intensive pages write, and global reclaim happens, a writing task has to
iterate over all memcgs to shrink slab, before it's able to go to
shrink_page_list().

Iteration over all the memcg slabs is very expensive: the task has to
visit 200 * 10 = 2000 shrinkers for every memcg, and since there are
2000 memcgs, the total calls are 2000 * 2000 = 4000000.

So, the shrinker makes 4 million do_shrink_slab() calls just to try to
isolate SWAP_CLUSTER_MAX pages in one of the actively writing memcg via
shrink_page_list().  I've observed a node spending almost 100% in
kernel, making useless iteration over already shrinked slab.

This patch adds bitmap of memcg-aware shrinkers to memcg.  The size of
the bitmap depends on bitmap_nr_ids, and during memcg life it's
maintained to be enough to fit bitmap_nr_ids shrinkers.  Every bit in
the map is related to corresponding shrinker id.

Next patches will maintain set bit only for really charged memcg.  This
will allow shrink_slab() to increase its performance in significant way.
See the last patch for the numbers.

[ktkhai@virtuozzo.com: v9]
  Link: http://lkml.kernel.org/r/153112549031.4097.3576147070498769979.stgit@localhost.localdomain
[ktkhai@virtuozzo.com: add comment to mem_cgroup_css_online()]
  Link: http://lkml.kernel.org/r/521f9e5f-c436-b388-fe83-4dc870bfb489@virtuozzo.com
Link: http://lkml.kernel.org/r/153063056619.1818.12550500883688681076.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  14 +++++
 mm/memcontrol.c            | 124 +++++++++++++++++++++++++++++++++++++++++++++
 mm/vmscan.c                |   8 ++-
 3 files changed, 145 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f3c026df7443..2cccbb9e1b3e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -111,6 +111,15 @@ struct lruvec_stat {
 	long count[NR_VM_NODE_STAT_ITEMS];
 };
 
+/*
+ * Bitmap of shrinker::id corresponding to memcg-aware shrinkers,
+ * which have elements charged to this memcg.
+ */
+struct memcg_shrinker_map {
+	struct rcu_head rcu;
+	unsigned long map[0];
+};
+
 /*
  * per-zone information in memory controller.
  */
@@ -124,6 +133,9 @@ struct mem_cgroup_per_node {
 
 	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_shrinker_map __rcu	*shrinker_map;
+#endif
 	struct rb_node		tree_node;	/* RB tree node */
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
@@ -1262,6 +1274,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
+extern int memcg_expand_shrinker_maps(int new_id);
+
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 313355dddf66..827c9e87ca08 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -320,6 +320,119 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 struct workqueue_struct *memcg_kmem_cache_wq;
 
+static int memcg_shrinker_map_size;
+static DEFINE_MUTEX(memcg_shrinker_map_mutex);
+
+static void memcg_free_shrinker_map_rcu(struct rcu_head *head)
+{
+	kvfree(container_of(head, struct memcg_shrinker_map, rcu));
+}
+
+static int memcg_expand_one_shrinker_map(struct mem_cgroup *memcg,
+					 int size, int old_size)
+{
+	struct memcg_shrinker_map *new, *old;
+	int nid;
+
+	lockdep_assert_held(&memcg_shrinker_map_mutex);
+
+	for_each_node(nid) {
+		old = rcu_dereference_protected(
+			mem_cgroup_nodeinfo(memcg, nid)->shrinker_map, true);
+		/* Not yet online memcg */
+		if (!old)
+			return 0;
+
+		new = kvmalloc(sizeof(*new) + size, GFP_KERNEL);
+		if (!new)
+			return -ENOMEM;
+
+		/* Set all old bits, clear all new bits */
+		memset(new->map, (int)0xff, old_size);
+		memset((void *)new->map + old_size, 0, size - old_size);
+
+		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_map, new);
+		call_rcu(&old->rcu, memcg_free_shrinker_map_rcu);
+	}
+
+	return 0;
+}
+
+static void memcg_free_shrinker_maps(struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_per_node *pn;
+	struct memcg_shrinker_map *map;
+	int nid;
+
+	if (mem_cgroup_is_root(memcg))
+		return;
+
+	for_each_node(nid) {
+		pn = mem_cgroup_nodeinfo(memcg, nid);
+		map = rcu_dereference_protected(pn->shrinker_map, true);
+		if (map)
+			kvfree(map);
+		rcu_assign_pointer(pn->shrinker_map, NULL);
+	}
+}
+
+static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
+{
+	struct memcg_shrinker_map *map;
+	int nid, size, ret = 0;
+
+	if (mem_cgroup_is_root(memcg))
+		return 0;
+
+	mutex_lock(&memcg_shrinker_map_mutex);
+	size = memcg_shrinker_map_size;
+	for_each_node(nid) {
+		map = kvzalloc(sizeof(*map) + size, GFP_KERNEL);
+		if (!map) {
+			memcg_free_shrinker_maps(memcg);
+			ret = -ENOMEM;
+			break;
+		}
+		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_map, map);
+	}
+	mutex_unlock(&memcg_shrinker_map_mutex);
+
+	return ret;
+}
+
+int memcg_expand_shrinker_maps(int new_id)
+{
+	int size, old_size, ret = 0;
+	struct mem_cgroup *memcg;
+
+	size = DIV_ROUND_UP(new_id + 1, BITS_PER_LONG) * sizeof(unsigned long);
+	old_size = memcg_shrinker_map_size;
+	if (size <= old_size)
+		return 0;
+
+	mutex_lock(&memcg_shrinker_map_mutex);
+	if (!root_mem_cgroup)
+		goto unlock;
+
+	for_each_mem_cgroup(memcg) {
+		if (mem_cgroup_is_root(memcg))
+			continue;
+		ret = memcg_expand_one_shrinker_map(memcg, size, old_size);
+		if (ret)
+			goto unlock;
+	}
+unlock:
+	if (!ret)
+		memcg_shrinker_map_size = size;
+	mutex_unlock(&memcg_shrinker_map_mutex);
+	return ret;
+}
+#else /* CONFIG_MEMCG_KMEM */
+static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
+{
+	return 0;
+}
+static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
 #endif /* CONFIG_MEMCG_KMEM */
 
 /**
@@ -4356,6 +4469,16 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
+	/*
+	 * A memcg must be visible for memcg_expand_shrinker_maps()
+	 * by the time the maps are allocated. So, we allocate maps
+	 * here, when for_each_mem_cgroup() can't skip it.
+	 */
+	if (memcg_alloc_shrinker_maps(memcg)) {
+		mem_cgroup_id_remove(memcg);
+		return -ENOMEM;
+	}
+
 	/* Online state pins memcg ID, memcg ID pins CSS */
 	atomic_set(&memcg->id.ref, 1);
 	css_get(css);
@@ -4408,6 +4531,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	vmpressure_cleanup(&memcg->vmpressure);
 	cancel_work_sync(&memcg->high_work);
 	mem_cgroup_remove_from_trees(memcg);
+	memcg_free_shrinker_maps(memcg);
 	memcg_free_kmem(memcg);
 	mem_cgroup_free(memcg);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5cb4f779ea4a..db0970ba340d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,8 +183,14 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
 	if (id < 0)
 		goto unlock;
 
-	if (id >= shrinker_nr_max)
+	if (id >= shrinker_nr_max) {
+		if (memcg_expand_shrinker_maps(id)) {
+			idr_remove(&shrinker_idr, id);
+			goto unlock;
+		}
+
 		shrinker_nr_max = id + 1;
+	}
 	shrinker->id = id;
 	ret = 0;
 unlock:
-- 
cgit v1.2.3


From c92e8e10cafeaaedc84f23fed1bfcf9cf07399c2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:50 -0700
Subject: fs: propagate shrinker::id to list_lru

Add list_lru::shrinker_id field and populate it by registered shrinker
id.

This will be used to set correct bit in memcg shrinkers map by lru code
in next patches, after there appeared the first related to memcg element
in list_lru.

Link: http://lkml.kernel.org/r/153063059758.1818.14866596416857717800.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/super.c               |  4 ++--
 include/linux/list_lru.h | 14 +++++++++-----
 mm/list_lru.c            | 11 ++++++++++-
 mm/workingset.c          |  3 ++-
 4 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index 78227c4ddb21..f5f96e52e0cd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -261,9 +261,9 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
 	if (prealloc_shrinker(&s->s_shrink))
 		goto fail;
-	if (list_lru_init_memcg(&s->s_dentry_lru))
+	if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
 		goto fail;
-	if (list_lru_init_memcg(&s->s_inode_lru))
+	if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
 		goto fail;
 	return s;
 
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 2d23b5b745be..9e75bb33766b 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -53,16 +53,20 @@ struct list_lru {
 	struct list_lru_node	*node;
 #ifdef CONFIG_MEMCG_KMEM
 	struct list_head	list;
+	int			shrinker_id;
 #endif
 };
 
 void list_lru_destroy(struct list_lru *lru);
 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
-		    struct lock_class_key *key);
-
-#define list_lru_init(lru)		__list_lru_init((lru), false, NULL)
-#define list_lru_init_key(lru, key)	__list_lru_init((lru), false, (key))
-#define list_lru_init_memcg(lru)	__list_lru_init((lru), true, NULL)
+		    struct lock_class_key *key, struct shrinker *shrinker);
+
+#define list_lru_init(lru)				\
+	__list_lru_init((lru), false, NULL, NULL)
+#define list_lru_init_key(lru, key)			\
+	__list_lru_init((lru), false, (key), NULL)
+#define list_lru_init_memcg(lru, shrinker)		\
+	__list_lru_init((lru), true, NULL, shrinker)
 
 int memcg_update_all_list_lrus(int num_memcgs);
 void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
diff --git a/mm/list_lru.c b/mm/list_lru.c
index c5217d84c6e1..5aebbb9b2f5b 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -546,12 +546,18 @@ static void memcg_destroy_list_lru(struct list_lru *lru)
 #endif /* CONFIG_MEMCG_KMEM */
 
 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
-		    struct lock_class_key *key)
+		    struct lock_class_key *key, struct shrinker *shrinker)
 {
 	int i;
 	size_t size = sizeof(*lru->node) * nr_node_ids;
 	int err = -ENOMEM;
 
+#ifdef CONFIG_MEMCG_KMEM
+	if (shrinker)
+		lru->shrinker_id = shrinker->id;
+	else
+		lru->shrinker_id = -1;
+#endif
 	memcg_get_cache_ids();
 
 	lru->node = kzalloc(size, GFP_KERNEL);
@@ -594,6 +600,9 @@ void list_lru_destroy(struct list_lru *lru)
 	kfree(lru->node);
 	lru->node = NULL;
 
+#ifdef CONFIG_MEMCG_KMEM
+	lru->shrinker_id = -1;
+#endif
 	memcg_put_cache_ids();
 }
 EXPORT_SYMBOL_GPL(list_lru_destroy);
diff --git a/mm/workingset.c b/mm/workingset.c
index 4e0b2523aae2..cd0b2ae615e4 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -526,7 +526,8 @@ static int __init workingset_init(void)
 	ret = prealloc_shrinker(&workingset_shadow_shrinker);
 	if (ret)
 		goto err;
-	ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
+	ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key,
+			      &workingset_shadow_shrinker);
 	if (ret)
 		goto err_list_lru;
 	register_shrinker_prepared(&workingset_shadow_shrinker);
-- 
cgit v1.2.3


From 9bec5c35bfa3d41b046594b5890f772ed737f1fd Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:58 -0700
Subject: mm/list_lru: pass dst_memcg argument to memcg_drain_list_lru_node()

This is just refactoring to allow the next patches to have dst_memcg
pointer in memcg_drain_list_lru_node().

Link: http://lkml.kernel.org/r/153063062118.1818.2761273817739499749.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h |  2 +-
 mm/list_lru.c            | 11 ++++++-----
 mm/memcontrol.c          |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 9e75bb33766b..d9c16f2f2f00 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -69,7 +69,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 	__list_lru_init((lru), true, NULL, shrinker)
 
 int memcg_update_all_list_lrus(int num_memcgs);
-void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
+void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg);
 
 /**
  * list_lru_add: add an element to the lru list's tail
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 1fc5be746e69..5384cda08984 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -502,8 +502,9 @@ fail:
 }
 
 static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
-				      int src_idx, int dst_idx)
+				      int src_idx, struct mem_cgroup *dst_memcg)
 {
+	int dst_idx = dst_memcg->kmemcg_id;
 	struct list_lru_one *src, *dst;
 
 	/*
@@ -523,7 +524,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
 }
 
 static void memcg_drain_list_lru(struct list_lru *lru,
-				 int src_idx, int dst_idx)
+				 int src_idx, struct mem_cgroup *dst_memcg)
 {
 	int i;
 
@@ -531,16 +532,16 @@ static void memcg_drain_list_lru(struct list_lru *lru,
 		return;
 
 	for_each_node(i)
-		memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx);
+		memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_memcg);
 }
 
-void memcg_drain_all_list_lrus(int src_idx, int dst_idx)
+void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
 {
 	struct list_lru *lru;
 
 	mutex_lock(&list_lrus_mutex);
 	list_for_each_entry(lru, &list_lrus, list)
-		memcg_drain_list_lru(lru, src_idx, dst_idx);
+		memcg_drain_list_lru(lru, src_idx, dst_memcg);
 	mutex_unlock(&list_lrus_mutex);
 }
 #else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 827c9e87ca08..a35dc901424d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3060,7 +3060,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 	}
 	rcu_read_unlock();
 
-	memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id);
+	memcg_drain_all_list_lrus(kmemcg_id, parent);
 
 	memcg_free_cache_id(kmemcg_id);
 }
-- 
cgit v1.2.3


From dfd2f10ccfd7e6bd2a096eaf42e76a7229776322 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:48:06 -0700
Subject: mm/memcontrol.c: export mem_cgroup_is_root()

This will be used in next patch.

Link: http://lkml.kernel.org/r/153063064347.1818.1987011484100392706.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++++++++
 mm/memcontrol.c            |  5 -----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2cccbb9e1b3e..258c8a46959a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -318,6 +318,11 @@ struct mem_cgroup {
 
 extern struct mem_cgroup *root_mem_cgroup;
 
+static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
+{
+	return (memcg == root_mem_cgroup);
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return !cgroup_subsys_enabled(memory_cgrp_subsys);
@@ -784,6 +789,11 @@ void mem_cgroup_split_huge_fixup(struct page *head);
 
 struct mem_cgroup;
 
+static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
+{
+	return true;
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return true;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a35dc901424d..d8cd6c39eca5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -261,11 +261,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
 	return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
 }
 
-static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
-{
-	return (memcg == root_mem_cgroup);
-}
-
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
-- 
cgit v1.2.3


From fae91d6d8be5e20c47e459dbeb3d43bd5f9486f4 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:48:10 -0700
Subject: mm/list_lru.c: set bit in memcg shrinker bitmap on first list_lru
 item appearance

Introduce set_shrinker_bit() function to set shrinker-related bit in
memcg shrinker bitmap, and set the bit after the first item is added and
in case of reparenting destroyed memcg's items.

This will allow next patch to make shrinkers be called only, in case of
they have charged objects at the moment, and to improve shrink_slab()
performance.

[ktkhai@virtuozzo.com: v9]
  Link: http://lkml.kernel.org/r/153112557572.4097.17315791419810749985.stgit@localhost.localdomain
Link: http://lkml.kernel.org/r/153063065671.1818.15914674956134687268.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++++
 mm/list_lru.c              | 22 ++++++++++++++++++++--
 mm/memcontrol.c            | 13 +++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 258c8a46959a..0e6c515fb698 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1286,6 +1286,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 
 extern int memcg_expand_shrinker_maps(int new_id);
 
+extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+				   int nid, int shrinker_id);
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
@@ -1308,6 +1310,8 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
+static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+					  int nid, int shrinker_id) { }
 #endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/list_lru.c b/mm/list_lru.c
index c6131925ec76..c9bdde9c03d1 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -30,6 +30,11 @@ static void list_lru_unregister(struct list_lru *lru)
 	mutex_unlock(&list_lrus_mutex);
 }
 
+static int lru_shrinker_id(struct list_lru *lru)
+{
+	return lru->shrinker_id;
+}
+
 static inline bool list_lru_memcg_aware(struct list_lru *lru)
 {
 	/*
@@ -93,6 +98,11 @@ static void list_lru_unregister(struct list_lru *lru)
 {
 }
 
+static int lru_shrinker_id(struct list_lru *lru)
+{
+	return -1;
+}
+
 static inline bool list_lru_memcg_aware(struct list_lru *lru)
 {
 	return false;
@@ -118,13 +128,17 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
 {
 	int nid = page_to_nid(virt_to_page(item));
 	struct list_lru_node *nlru = &lru->node[nid];
+	struct mem_cgroup *memcg;
 	struct list_lru_one *l;
 
 	spin_lock(&nlru->lock);
 	if (list_empty(item)) {
-		l = list_lru_from_kmem(nlru, item, NULL);
+		l = list_lru_from_kmem(nlru, item, &memcg);
 		list_add_tail(item, &l->list);
-		l->nr_items++;
+		/* Set shrinker bit if the first element was added */
+		if (!l->nr_items++)
+			memcg_set_shrinker_bit(memcg, nid,
+					       lru_shrinker_id(lru));
 		nlru->nr_items++;
 		spin_unlock(&nlru->lock);
 		return true;
@@ -507,6 +521,7 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
 	struct list_lru_node *nlru = &lru->node[nid];
 	int dst_idx = dst_memcg->kmemcg_id;
 	struct list_lru_one *src, *dst;
+	bool set;
 
 	/*
 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
@@ -518,7 +533,10 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
 	dst = list_lru_from_memcg_idx(nlru, dst_idx);
 
 	list_splice_init(&src->list, &dst->list);
+	set = (!dst->nr_items && src->nr_items);
 	dst->nr_items += src->nr_items;
+	if (set)
+		memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
 	src->nr_items = 0;
 
 	spin_unlock_irq(&nlru->lock);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d8cd6c39eca5..55c010a58535 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -422,6 +422,19 @@ unlock:
 	mutex_unlock(&memcg_shrinker_map_mutex);
 	return ret;
 }
+
+void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
+{
+	if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
+		struct memcg_shrinker_map *map;
+
+		rcu_read_lock();
+		map = rcu_dereference(memcg->nodeinfo[nid]->shrinker_map);
+		set_bit(shrinker_id, map->map);
+		rcu_read_unlock();
+	}
+}
+
 #else /* CONFIG_MEMCG_KMEM */
 static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
 {
-- 
cgit v1.2.3


From 9b996468cfdba09f688f52dba4287de596194613 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:48:21 -0700
Subject: mm: add SHRINK_EMPTY shrinker methods return value

We need to distinguish the situations when shrinker has very small
amount of objects (see vfs_pressure_ratio() called from
super_cache_count()), and when it has no objects at all.  Currently, in
the both of these cases, shrinker::count_objects() returns 0.

The patch introduces new SHRINK_EMPTY return value, which will be used
for "no objects at all" case.  It's is a refactoring mostly, as
SHRINK_EMPTY is replaced by 0 by all callers of do_shrink_slab() in this
patch, and all the magic will happen in further.

Link: http://lkml.kernel.org/r/153063069574.1818.11037751256699341813.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/super.c               |  3 +++
 include/linux/shrinker.h |  7 +++++--
 mm/vmscan.c              | 12 +++++++++---
 mm/workingset.c          |  3 +++
 4 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index f5f96e52e0cd..7429588d6b49 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -144,6 +144,9 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 	total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
 	total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
 
+	if (!total_objects)
+		return SHRINK_EMPTY;
+
 	total_objects = vfs_pressure_ratio(total_objects);
 	return total_objects;
 }
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 7ca9c18cf130..b154fd2b084c 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -34,12 +34,15 @@ struct shrink_control {
 };
 
 #define SHRINK_STOP (~0UL)
+#define SHRINK_EMPTY (~0UL - 1)
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
  * @count_objects should return the number of freeable items in the cache. If
- * there are no objects to free or the number of freeable items cannot be
- * determined, it should return 0. No deadlock checks should be done during the
+ * there are no objects to free, it should return SHRINK_EMPTY, while 0 is
+ * returned in cases of the number of freeable items cannot be determined
+ * or shrinker should skip this cache for this time (e.g., their number
+ * is below shrinkable limit). No deadlock checks should be done during the
  * count callback - the shrinker relies on aggregating scan counts that couldn't
  * be executed due to potential deadlocks to be run at a later call when the
  * deadlock condition is no longer pending.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2aa3cb760189..8199e1b9a204 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -456,8 +456,8 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
 	long scanned = 0, next_deferred;
 
 	freeable = shrinker->count_objects(shrinker, shrinkctl);
-	if (freeable == 0)
-		return 0;
+	if (freeable == 0 || freeable == SHRINK_EMPTY)
+		return freeable;
 
 	/*
 	 * copy the current shrinker scan count into a local variable
@@ -596,6 +596,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 			continue;
 
 		ret = do_shrink_slab(&sc, shrinker, priority);
+		if (ret == SHRINK_EMPTY)
+			ret = 0;
 		freed += ret;
 
 		if (rwsem_is_contended(&shrinker_rwsem)) {
@@ -641,6 +643,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 {
 	struct shrinker *shrinker;
 	unsigned long freed = 0;
+	int ret;
 
 	if (!mem_cgroup_is_root(memcg))
 		return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
@@ -658,7 +661,10 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 		if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
 			sc.nid = 0;
 
-		freed += do_shrink_slab(&sc, shrinker, priority);
+		ret = do_shrink_slab(&sc, shrinker, priority);
+		if (ret == SHRINK_EMPTY)
+			ret = 0;
+		freed += ret;
 		/*
 		 * Bail out if someone want to register a new shrinker to
 		 * prevent the regsitration from being stalled for long periods
diff --git a/mm/workingset.c b/mm/workingset.c
index cd0b2ae615e4..bc72ad029b3e 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -399,6 +399,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	}
 	max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3);
 
+	if (!nodes)
+		return SHRINK_EMPTY;
+
 	if (nodes <= max_nodes)
 		return 0;
 	return nodes - max_nodes;
-- 
cgit v1.2.3


From 6518202970c1052148daaef9a8096711775e43a2 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 17 Aug 2018 15:48:57 -0700
Subject: mm/cma: remove unsupported gfp_mask parameter from cma_alloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cma_alloc() doesn't really support gfp flags other than __GFP_NOWARN, so
convert gfp_mask parameter to boolean no_warn parameter.

This will help to avoid giving false feeling that this function supports
standard gfp flags and callers can pass __GFP_ZERO to get zeroed buffer,
what has already been an issue: see commit dd65a941f6ba ("arm64:
dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag").

Link: http://lkml.kernel.org/r/20180709122019eucas1p2340da484acfcc932537e6014f4fd2c29~-sqTPJKij2939229392eucas1p2j@eucas1p2.samsung.com
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Michał Nazarewicz <mina86@mina86.com>
Acked-by: Laura Abbott <labbott@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c       | 2 +-
 drivers/s390/char/vmcp.c                   | 2 +-
 drivers/staging/android/ion/ion_cma_heap.c | 2 +-
 include/linux/cma.h                        | 2 +-
 kernel/dma/contiguous.c                    | 3 ++-
 mm/cma.c                                   | 8 ++++----
 mm/cma_debug.c                             | 2 +-
 7 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index d4a3f4da409b..fc6bb9630a9c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -77,7 +77,7 @@ struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
 	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
-			 GFP_KERNEL);
+			 false);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
 
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 948ce82a7725..0fa1b6b1491a 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -68,7 +68,7 @@ static void vmcp_response_alloc(struct vmcp_session *session)
 	 * anymore the system won't work anyway.
 	 */
 	if (order > 2)
-		page = cma_alloc(vmcp_cma, nr_pages, 0, GFP_KERNEL);
+		page = cma_alloc(vmcp_cma, nr_pages, 0, false);
 	if (page) {
 		session->response = (char *)page_to_phys(page);
 		session->cma_alloc = 1;
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index 49718c96bf9e..3fafd013d80a 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -39,7 +39,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL);
+	pages = cma_alloc(cma_heap->cma, nr_pages, align, false);
 	if (!pages)
 		return -ENOMEM;
 
diff --git a/include/linux/cma.h b/include/linux/cma.h
index bf90f0bb42bd..190184b5ff32 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -33,7 +33,7 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					const char *name,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
-			      gfp_t gfp_mask);
+			      bool no_warn);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
 
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index d987dcd1bd56..19ea5d70150c 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -191,7 +191,8 @@ struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask);
+	return cma_alloc(dev_get_cma_area(dev), count, align,
+			 gfp_mask & __GFP_NOWARN);
 }
 
 /**
diff --git a/mm/cma.c b/mm/cma.c
index 5809bbe360d7..4cb76121a3ab 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -395,13 +395,13 @@ static inline void cma_debug_show_areas(struct cma *cma) { }
  * @cma:   Contiguous memory region for which the allocation is performed.
  * @count: Requested number of pages.
  * @align: Requested alignment of pages (in PAGE_SIZE order).
- * @gfp_mask:  GFP mask to use during compaction
+ * @no_warn: Avoid printing message about failed allocation
  *
  * This function allocates part of contiguous memory on specific
  * contiguous memory area.
  */
 struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
-		       gfp_t gfp_mask)
+		       bool no_warn)
 {
 	unsigned long mask, offset;
 	unsigned long pfn = -1;
@@ -447,7 +447,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
 		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
 		mutex_lock(&cma_mutex);
 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
-					 gfp_mask);
+				     GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
 		mutex_unlock(&cma_mutex);
 		if (ret == 0) {
 			page = pfn_to_page(pfn);
@@ -466,7 +466,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
 
 	trace_cma_alloc(pfn, page, count, align);
 
-	if (ret && !(gfp_mask & __GFP_NOWARN)) {
+	if (ret && !no_warn) {
 		pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n",
 			__func__, count, ret);
 		cma_debug_show_areas(cma);
diff --git a/mm/cma_debug.c b/mm/cma_debug.c
index f23467291cfb..ad6723e9d110 100644
--- a/mm/cma_debug.c
+++ b/mm/cma_debug.c
@@ -139,7 +139,7 @@ static int cma_alloc_mem(struct cma *cma, int count)
 	if (!mem)
 		return -ENOMEM;
 
-	p = cma_alloc(cma, count, 0, GFP_KERNEL);
+	p = cma_alloc(cma, count, 0, false);
 	if (!p) {
 		kfree(mem);
 		return -ENOMEM;
-- 
cgit v1.2.3


From d834c5ab83febf9624ad3b16c3c348aa1e02014c Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 17 Aug 2018 15:49:00 -0700
Subject: kernel/dma: remove unsupported gfp_mask parameter from
 dma_alloc_from_contiguous()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CMA memory allocator doesn't support standard gfp flags for memory
allocation, so there is no point having it as a parameter for
dma_alloc_from_contiguous() function.  Replace it by a boolean no_warn
argument, which covers all the underlaying cma_alloc() function
supports.

This will help to avoid giving false feeling that this function supports
standard gfp flags and callers can pass __GFP_ZERO to get zeroed buffer,
what has already been an issue: see commit dd65a941f6ba ("arm64:
dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag").

Link: http://lkml.kernel.org/r/20180709122020eucas1p21a71b092975cb4a3b9954ffc63f699d1~-sqUFoa-h2939329393eucas1p2Y@eucas1p2.samsung.com
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Michał Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/dma-mapping.c      | 5 +++--
 arch/arm64/mm/dma-mapping.c    | 4 ++--
 arch/xtensa/kernel/pci-dma.c   | 2 +-
 drivers/iommu/amd_iommu.c      | 2 +-
 drivers/iommu/intel-iommu.c    | 3 ++-
 include/linux/dma-contiguous.h | 4 ++--
 kernel/dma/contiguous.c        | 7 +++----
 kernel/dma/direct.c            | 3 ++-
 8 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ba0e786c952e..66566472c153 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -594,7 +594,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
 	struct page *page;
 	void *ptr = NULL;
 
-	page = dma_alloc_from_contiguous(dev, count, order, gfp);
+	page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN);
 	if (!page)
 		return NULL;
 
@@ -1299,7 +1299,8 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
 		unsigned long order = get_order(size);
 		struct page *page;
 
-		page = dma_alloc_from_contiguous(dev, count, order, gfp);
+		page = dma_alloc_from_contiguous(dev, count, order,
+						 gfp & __GFP_NOWARN);
 		if (!page)
 			goto error;
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 61e93f0b5482..072c51fb07d7 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -355,7 +355,7 @@ static int __init atomic_pool_init(void)
 
 	if (dev_get_cma_area(NULL))
 		page = dma_alloc_from_contiguous(NULL, nr_pages,
-						 pool_size_order, GFP_KERNEL);
+						 pool_size_order, false);
 	else
 		page = alloc_pages(GFP_DMA32, pool_size_order);
 
@@ -573,7 +573,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 		struct page *page;
 
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-						 get_order(size), gfp);
+					get_order(size), gfp & __GFP_NOWARN);
 		if (!page)
 			return NULL;
 
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 392b4a80ebc2..a02dc563d290 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -137,7 +137,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 
 	if (gfpflags_allow_blocking(flag))
 		page = dma_alloc_from_contiguous(dev, count, get_order(size),
-						 flag);
+						 flag & __GFP_NOWARN);
 
 	if (!page)
 		page = alloc_pages(flag, get_order(size));
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 596b95c50051..60b2eab29cd8 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2620,7 +2620,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 			return NULL;
 
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-						 get_order(size), flag);
+					get_order(size), flag & __GFP_NOWARN);
 		if (!page)
 			return NULL;
 	}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 115ff26e9ced..6a237d18fabf 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3758,7 +3758,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 	if (gfpflags_allow_blocking(flags)) {
 		unsigned int count = size >> PAGE_SHIFT;
 
-		page = dma_alloc_from_contiguous(dev, count, order, flags);
+		page = dma_alloc_from_contiguous(dev, count, order,
+						 flags & __GFP_NOWARN);
 		if (page && iommu_no_mapping(dev) &&
 		    page_to_phys(page) + size > dev->coherent_dma_mask) {
 			dma_release_from_contiguous(dev, page, count);
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 3c5a4cb3eb95..f247e8aa5e3d 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 }
 
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int order, gfp_t gfp_mask);
+				       unsigned int order, bool no_warn);
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 				 int count);
 
@@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 
 static inline
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int order, gfp_t gfp_mask)
+				       unsigned int order, bool no_warn)
 {
 	return NULL;
 }
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 19ea5d70150c..286d82329eb0 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -178,7 +178,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * @dev:   Pointer to device for which the allocation is performed.
  * @count: Requested number of pages.
  * @align: Requested alignment of pages (in PAGE_SIZE order).
- * @gfp_mask: GFP flags to use for this allocation.
+ * @no_warn: Avoid printing message about failed allocation.
  *
  * This function allocates memory buffer for specified device. It uses
  * device specific contiguous memory area if available or the default
@@ -186,13 +186,12 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * function.
  */
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int align, gfp_t gfp_mask)
+				       unsigned int align, bool no_warn)
 {
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	return cma_alloc(dev_get_cma_area(dev), count, align,
-			 gfp_mask & __GFP_NOWARN);
+	return cma_alloc(dev_get_cma_area(dev), count, align, no_warn);
 }
 
 /**
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index c2860c5a9e96..1c35b7b945d0 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -78,7 +78,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 again:
 	/* CMA can be used only in the context which permits sleeping */
 	if (gfpflags_allow_blocking(gfp)) {
-		page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
+		page = dma_alloc_from_contiguous(dev, count, page_order,
+						 gfp & __GFP_NOWARN);
 		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
 			dma_release_from_contiguous(dev, page, count);
 			page = NULL;
-- 
cgit v1.2.3


From 40d18ebffb3974272a920c41f2d74431152cae98 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 17 Aug 2018 15:49:07 -0700
Subject: mm/hugetlb: remove gigantic page support for HIGHMEM

This reverts ee8f248d266e ("hugetlb: add phys addr to struct
huge_bootmem_page").

At one time powerpc used this field and supporting code.  However that
was removed with commit 79cc38ded1e1 ("powerpc/mm/hugetlb: Add support
for reserving gigantic huge pages via kernel command line").

There are no users of this field and supporting code, so remove it.

Link: http://lkml.kernel.org/r/20180711195913.1294-1-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Cannon Matthews <cannonmatthews@google.com>
Cc: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 3 ---
 mm/hugetlb.c            | 9 +--------
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 36fa6a2a82e3..c39d9170a8a0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -348,9 +348,6 @@ struct hstate {
 struct huge_bootmem_page {
 	struct list_head list;
 	struct hstate *hstate;
-#ifdef CONFIG_HIGHMEM
-	phys_addr_t phys;
-#endif
 };
 
 struct page *alloc_huge_page(struct vm_area_struct *vma,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1bcaae0d73a..4cea30ac5033 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2139,16 +2139,9 @@ static void __init gather_bootmem_prealloc(void)
 	struct huge_bootmem_page *m;
 
 	list_for_each_entry(m, &huge_boot_pages, list) {
+		struct page *page = virt_to_page(m);
 		struct hstate *h = m->hstate;
-		struct page *page;
 
-#ifdef CONFIG_HIGHMEM
-		page = pfn_to_page(m->phys >> PAGE_SHIFT);
-		memblock_free_late(__pa(m),
-				   sizeof(struct huge_bootmem_page));
-#else
-		page = virt_to_page(m);
-#endif
 		WARN_ON(page_count(page) != 1);
 		prep_compound_huge_page(page, h->order);
 		WARN_ON(PageReserved(page));
-- 
cgit v1.2.3


From 35fd1eb1e8212c02f6eae24335a9e5b80f9519b4 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Fri, 17 Aug 2018 15:49:21 -0700
Subject: mm/sparse: abstract sparse buffer allocations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "sparse_init rewrite", v6.

In sparse_init() we allocate two large buffers to temporary hold usemap
and memmap for the whole machine.  However, we can avoid doing that if
we changed sparse_init() to operated on per-node bases instead of doing
it on the whole machine beforehand.

As shown by Baoquan
  http://lkml.kernel.org/r/20180628062857.29658-1-bhe@redhat.com

The buffers are large enough to cause machine stop to boot on small
memory systems.

Another benefit of these changes is that they also obsolete
CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER.

This patch (of 5):

When struct pages are allocated for sparse-vmemmap VA layout, we first try
to allocate one large buffer, and than if that fails allocate struct pages
for each section as we go.

The code that allocates buffer is uses global variables and is spread
across several call sites.

Cleanup the code by introducing three functions to handle the global
buffer:

sparse_buffer_init()	initialize the buffer
sparse_buffer_fini()	free the remaining part of the buffer
sparse_buffer_alloc()	alloc from the buffer, and if buffer is empty
return NULL

Define these functions in sparse.c instead of sparse-vmemmap.c because
later we will use them for non-vmemmap sparse allocations as well.

[akpm@linux-foundation.org: use PTR_ALIGN()]
[akpm@linux-foundation.org: s/BUG_ON/WARN_ON/]
Link: http://lkml.kernel.org/r/20180712203730.8703-2-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Tested-by: Oscar Salvador <osalvador@suse.de>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  4 ++++
 mm/sparse-vmemmap.c | 40 ++++++----------------------------------
 mm/sparse.c         | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2fb32d1561eb..4ace5d50a892 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2671,6 +2671,10 @@ void sparse_mem_maps_populate_node(struct page **map_map,
 				   unsigned long map_count,
 				   int nodeid);
 
+unsigned long __init section_map_size(void);
+void sparse_buffer_init(unsigned long size, int nid);
+void sparse_buffer_fini(void);
+void *sparse_buffer_alloc(unsigned long size);
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 95e2c7638a5c..b05c7663c640 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -43,12 +43,9 @@ static void * __ref __earlyonly_bootmem_alloc(int node,
 				unsigned long goal)
 {
 	return memblock_virt_alloc_try_nid_raw(size, align, goal,
-					    BOOTMEM_ALLOC_ACCESSIBLE, node);
+					       BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
-static void *vmemmap_buf;
-static void *vmemmap_buf_end;
-
 void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 {
 	/* If the main allocator is up use that, fallback to bootmem. */
@@ -76,18 +73,10 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 /* need to make sure size is all the same during early stage */
 void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
 {
-	void *ptr;
-
-	if (!vmemmap_buf)
-		return vmemmap_alloc_block(size, node);
-
-	/* take the from buf */
-	ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
-	if (ptr + size > vmemmap_buf_end)
-		return vmemmap_alloc_block(size, node);
-
-	vmemmap_buf = ptr + size;
+	void *ptr = sparse_buffer_alloc(size);
 
+	if (!ptr)
+		ptr = vmemmap_alloc_block(size, node);
 	return ptr;
 }
 
@@ -279,19 +268,9 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 					  unsigned long map_count, int nodeid)
 {
 	unsigned long pnum;
-	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
-	void *vmemmap_buf_start;
 	int nr_consumed_maps = 0;
 
-	size = ALIGN(size, PMD_SIZE);
-	vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
-			 PMD_SIZE, __pa(MAX_DMA_ADDRESS));
-
-	if (vmemmap_buf_start) {
-		vmemmap_buf = vmemmap_buf_start;
-		vmemmap_buf_end = vmemmap_buf_start + size * map_count;
-	}
-
+	sparse_buffer_init(section_map_size() * map_count, nodeid);
 	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -303,12 +282,5 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
 		       __func__);
 	}
-
-	if (vmemmap_buf_start) {
-		/* need to free left buf */
-		memblock_free_early(__pa(vmemmap_buf),
-				    vmemmap_buf_end - vmemmap_buf);
-		vmemmap_buf = NULL;
-		vmemmap_buf_end = NULL;
-	}
+	sparse_buffer_fini();
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index 2ea8b3dbd0df..9a0a5f598469 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -400,7 +400,14 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
 	}
 }
 
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+unsigned long __init section_map_size(void)
+
+{
+	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
+}
+
+#else
 struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap)
 {
@@ -457,6 +464,42 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 }
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
+static void *sparsemap_buf __meminitdata;
+static void *sparsemap_buf_end __meminitdata;
+
+void __init sparse_buffer_init(unsigned long size, int nid)
+{
+	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
+	sparsemap_buf =
+		memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE,
+						__pa(MAX_DMA_ADDRESS),
+						BOOTMEM_ALLOC_ACCESSIBLE, nid);
+	sparsemap_buf_end = sparsemap_buf + size;
+}
+
+void __init sparse_buffer_fini(void)
+{
+	unsigned long size = sparsemap_buf_end - sparsemap_buf;
+
+	if (sparsemap_buf && size > 0)
+		memblock_free_early(__pa(sparsemap_buf), size);
+	sparsemap_buf = NULL;
+}
+
+void * __meminit sparse_buffer_alloc(unsigned long size)
+{
+	void *ptr = NULL;
+
+	if (sparsemap_buf) {
+		ptr = PTR_ALIGN(sparsemap_buf, size);
+		if (ptr + size > sparsemap_buf_end)
+			ptr = NULL;
+		else
+			sparsemap_buf = ptr + size;
+	}
+	return ptr;
+}
+
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
 static void __init sparse_early_mem_maps_alloc_node(void *data,
 				 unsigned long pnum_begin,
-- 
cgit v1.2.3


From afda57bc13410459fc957e93341ade7bebca36e2 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Fri, 17 Aug 2018 15:49:30 -0700
Subject: mm/sparse: move buffer init/fini to the common place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that both variants of sparse memory use the same buffers to populate
memory map, we can move sparse_buffer_init()/sparse_buffer_fini() to the
common place.

Link: http://lkml.kernel.org/r/20180712203730.8703-4-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Tested-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  3 ---
 mm/sparse-vmemmap.c |  2 --
 mm/sparse.c         | 14 +++++++-------
 3 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4ace5d50a892..48040510df05 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2671,9 +2671,6 @@ void sparse_mem_maps_populate_node(struct page **map_map,
 				   unsigned long map_count,
 				   int nodeid);
 
-unsigned long __init section_map_size(void);
-void sparse_buffer_init(unsigned long size, int nid);
-void sparse_buffer_fini(void);
 void *sparse_buffer_alloc(unsigned long size);
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index b05c7663c640..cd15f3d252c3 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -270,7 +270,6 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 	unsigned long pnum;
 	int nr_consumed_maps = 0;
 
-	sparse_buffer_init(section_map_size() * map_count, nodeid);
 	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -282,5 +281,4 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
 		       __func__);
 	}
-	sparse_buffer_fini();
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index db4867b62fff..20ca292d8f11 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -401,14 +401,14 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-unsigned long __init section_map_size(void)
+static unsigned long __init section_map_size(void)
 
 {
 	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
 }
 
 #else
-unsigned long __init section_map_size(void)
+static unsigned long __init section_map_size(void)
 {
 	return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
 }
@@ -433,10 +433,8 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 					  unsigned long map_count, int nodeid)
 {
 	unsigned long pnum;
-	unsigned long size = section_map_size();
 	int nr_consumed_maps = 0;
 
-	sparse_buffer_init(size * map_count, nodeid);
 	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -447,14 +445,13 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
 		       __func__);
 	}
-	sparse_buffer_fini();
 }
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 static void *sparsemap_buf __meminitdata;
 static void *sparsemap_buf_end __meminitdata;
 
-void __init sparse_buffer_init(unsigned long size, int nid)
+static void __init sparse_buffer_init(unsigned long size, int nid)
 {
 	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
 	sparsemap_buf =
@@ -464,7 +461,7 @@ void __init sparse_buffer_init(unsigned long size, int nid)
 	sparsemap_buf_end = sparsemap_buf + size;
 }
 
-void __init sparse_buffer_fini(void)
+static void __init sparse_buffer_fini(void)
 {
 	unsigned long size = sparsemap_buf_end - sparsemap_buf;
 
@@ -494,8 +491,11 @@ static void __init sparse_early_mem_maps_alloc_node(void *data,
 				 unsigned long map_count, int nodeid)
 {
 	struct page **map_map = (struct page **)data;
+
+	sparse_buffer_init(section_map_size() * map_count, nodeid);
 	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
 					 map_count, nodeid);
+	sparse_buffer_fini();
 }
 #else
 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
-- 
cgit v1.2.3


From 2a3cb8baef71e4dad4a6ec17f5f0db9e05f46a01 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Fri, 17 Aug 2018 15:49:37 -0700
Subject: mm/sparse: delete old sparse_init and enable new one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename new_sparse_init() to sparse_init() which enables it.  Delete old
sparse_init() and all the code that became obsolete with.

[pasha.tatashin@oracle.com: remove unused sparse_mem_maps_populate_node()]
  Link: http://lkml.kernel.org/r/20180716174447.14529-6-pasha.tatashin@oracle.com
Link: http://lkml.kernel.org/r/20180712203730.8703-6-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Tested-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |   6 --
 mm/Kconfig          |   4 -
 mm/sparse-vmemmap.c |  21 -----
 mm/sparse.c         | 237 +---------------------------------------------------
 4 files changed, 1 insertion(+), 267 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 48040510df05..a3cae495f9ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2665,12 +2665,6 @@ extern int randomize_va_space;
 const char * arch_vma_name(struct vm_area_struct *vma);
 void print_vma_addr(char *prefix, unsigned long rip);
 
-void sparse_mem_maps_populate_node(struct page **map_map,
-				   unsigned long pnum_begin,
-				   unsigned long pnum_end,
-				   unsigned long map_count,
-				   int nodeid);
-
 void *sparse_buffer_alloc(unsigned long size);
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap);
diff --git a/mm/Kconfig b/mm/Kconfig
index 08d8399bb93b..adfeae4decb4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -118,10 +118,6 @@ config SPARSEMEM_EXTREME
 config SPARSEMEM_VMEMMAP_ENABLE
 	bool
 
-config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	def_bool y
-	depends on SPARSEMEM && X86_64
-
 config SPARSEMEM_VMEMMAP
 	bool "Sparse Memory virtual memmap"
 	depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index cd15f3d252c3..8301293331a2 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -261,24 +261,3 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
 
 	return map;
 }
-
-void __init sparse_mem_maps_populate_node(struct page **map_map,
-					  unsigned long pnum_begin,
-					  unsigned long pnum_end,
-					  unsigned long map_count, int nodeid)
-{
-	unsigned long pnum;
-	int nr_consumed_maps = 0;
-
-	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-		if (!present_section_nr(pnum))
-			continue;
-
-		map_map[nr_consumed_maps] =
-				sparse_mem_map_populate(pnum, nodeid, NULL);
-		if (map_map[nr_consumed_maps++])
-			continue;
-		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
-		       __func__);
-	}
-}
diff --git a/mm/sparse.c b/mm/sparse.c
index 248d5d7bbf55..10b07eea9a6e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -205,12 +205,6 @@ static inline unsigned long first_present_section_nr(void)
 	return next_present_section_nr(-1);
 }
 
-/*
- * Record how many memory sections are marked as present
- * during system bootup.
- */
-static int __initdata nr_present_sections;
-
 /* Record a memory area against a node. */
 void __init memory_present(int nid, unsigned long start, unsigned long end)
 {
@@ -240,7 +234,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 			ms->section_mem_map = sparse_encode_early_nid(nid) |
 							SECTION_IS_ONLINE;
 			section_mark_present(ms);
-			nr_present_sections++;
 		}
 	}
 }
@@ -377,37 +370,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-static void __init sparse_early_usemaps_alloc_node(void *data,
-				 unsigned long pnum_begin,
-				 unsigned long pnum_end,
-				 unsigned long usemap_count, int nodeid)
-{
-	void *usemap;
-	unsigned long pnum;
-	unsigned long **usemap_map = (unsigned long **)data;
-	int size = usemap_size();
-	int nr_consumed_maps = 0;
-
-	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
-							  size * usemap_count);
-	if (!usemap) {
-		pr_warn("%s: allocation failed\n", __func__);
-		return;
-	}
-
-	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-		if (!present_section_nr(pnum))
-			continue;
-		usemap_map[nr_consumed_maps] = usemap;
-		usemap += size;
-		check_usemap_section_nr(nodeid, usemap_map[nr_consumed_maps]);
-		nr_consumed_maps++;
-	}
-}
-
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 static unsigned long __init section_map_size(void)
-
 {
 	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
 }
@@ -432,25 +396,6 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
 					  BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	return map;
 }
-void __init sparse_mem_maps_populate_node(struct page **map_map,
-					  unsigned long pnum_begin,
-					  unsigned long pnum_end,
-					  unsigned long map_count, int nodeid)
-{
-	unsigned long pnum;
-	int nr_consumed_maps = 0;
-
-	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-		if (!present_section_nr(pnum))
-			continue;
-		map_map[nr_consumed_maps] =
-				sparse_mem_map_populate(pnum, nodeid, NULL);
-		if (map_map[nr_consumed_maps++])
-			continue;
-		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
-		       __func__);
-	}
-}
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 static void *sparsemap_buf __meminitdata;
@@ -489,190 +434,10 @@ void * __meminit sparse_buffer_alloc(unsigned long size)
 	return ptr;
 }
 
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-static void __init sparse_early_mem_maps_alloc_node(void *data,
-				 unsigned long pnum_begin,
-				 unsigned long pnum_end,
-				 unsigned long map_count, int nodeid)
-{
-	struct page **map_map = (struct page **)data;
-
-	sparse_buffer_init(section_map_size() * map_count, nodeid);
-	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
-					 map_count, nodeid);
-	sparse_buffer_fini();
-}
-#else
-static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
-{
-	struct page *map;
-	struct mem_section *ms = __nr_to_section(pnum);
-	int nid = sparse_early_nid(ms);
-
-	map = sparse_mem_map_populate(pnum, nid, NULL);
-	if (map)
-		return map;
-
-	pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
-	       __func__);
-	return NULL;
-}
-#endif
-
 void __weak __meminit vmemmap_populate_print_last(void)
 {
 }
 
-/**
- *  alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
- *  @map: usemap_map for pageblock flags or mmap_map for vmemmap
- *  @unit_size: size of map unit
- */
-static void __init alloc_usemap_and_memmap(void (*alloc_func)
-					(void *, unsigned long, unsigned long,
-					unsigned long, int), void *data,
-					int data_unit_size)
-{
-	unsigned long pnum;
-	unsigned long map_count;
-	int nodeid_begin = 0;
-	unsigned long pnum_begin = 0;
-
-	for_each_present_section_nr(0, pnum) {
-		struct mem_section *ms;
-
-		ms = __nr_to_section(pnum);
-		nodeid_begin = sparse_early_nid(ms);
-		pnum_begin = pnum;
-		break;
-	}
-	map_count = 1;
-	for_each_present_section_nr(pnum_begin + 1, pnum) {
-		struct mem_section *ms;
-		int nodeid;
-
-		ms = __nr_to_section(pnum);
-		nodeid = sparse_early_nid(ms);
-		if (nodeid == nodeid_begin) {
-			map_count++;
-			continue;
-		}
-		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
-		alloc_func(data, pnum_begin, pnum,
-						map_count, nodeid_begin);
-		/* new start, update count etc*/
-		nodeid_begin = nodeid;
-		pnum_begin = pnum;
-		data += map_count * data_unit_size;
-		map_count = 1;
-	}
-	/* ok, last chunk */
-	alloc_func(data, pnum_begin, __highest_present_section_nr+1,
-						map_count, nodeid_begin);
-}
-
-/*
- * Allocate the accumulated non-linear sections, allocate a mem_map
- * for each and record the physical to section mapping.
- */
-void __init sparse_init(void)
-{
-	unsigned long pnum;
-	struct page *map;
-	unsigned long *usemap;
-	unsigned long **usemap_map;
-	int size;
-	int nr_consumed_maps = 0;
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	int size2;
-	struct page **map_map;
-#endif
-
-	/* see include/linux/mmzone.h 'struct mem_section' definition */
-	BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
-
-	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
-	set_pageblock_order();
-
-	/*
-	 * map is using big page (aka 2M in x86 64 bit)
-	 * usemap is less one page (aka 24 bytes)
-	 * so alloc 2M (with 2M align) and 24 bytes in turn will
-	 * make next 2M slip to one more 2M later.
-	 * then in big system, the memory will have a lot of holes...
-	 * here try to allocate 2M pages continuously.
-	 *
-	 * powerpc need to call sparse_init_one_section right after each
-	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
-	 */
-	size = sizeof(unsigned long *) * nr_present_sections;
-	usemap_map = memblock_virt_alloc(size, 0);
-	if (!usemap_map)
-		panic("can not allocate usemap_map\n");
-	alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
-				(void *)usemap_map,
-				sizeof(usemap_map[0]));
-
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	size2 = sizeof(struct page *) * nr_present_sections;
-	map_map = memblock_virt_alloc(size2, 0);
-	if (!map_map)
-		panic("can not allocate map_map\n");
-	alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
-				(void *)map_map,
-				sizeof(map_map[0]));
-#endif
-
-	/*
-	 * The number of present sections stored in nr_present_sections
-	 * are kept the same since mem sections are marked as present in
-	 * memory_present(). In this for loop, we need check which sections
-	 * failed to allocate memmap or usemap, then clear its
-	 * ->section_mem_map accordingly. During this process, we need
-	 * increase 'nr_consumed_maps' whether its allocation of memmap
-	 * or usemap failed or not, so that after we handle the i-th
-	 * memory section, can get memmap and usemap of (i+1)-th section
-	 * correctly.
-	 */
-	for_each_present_section_nr(0, pnum) {
-		struct mem_section *ms;
-
-		if (nr_consumed_maps >= nr_present_sections) {
-			pr_err("nr_consumed_maps goes beyond nr_present_sections\n");
-			break;
-		}
-		ms = __nr_to_section(pnum);
-		usemap = usemap_map[nr_consumed_maps];
-		if (!usemap) {
-			ms->section_mem_map = 0;
-			nr_consumed_maps++;
-			continue;
-		}
-
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-		map = map_map[nr_consumed_maps];
-#else
-		map = sparse_early_mem_map_alloc(pnum);
-#endif
-		if (!map) {
-			ms->section_mem_map = 0;
-			nr_consumed_maps++;
-			continue;
-		}
-
-		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
-								usemap);
-		nr_consumed_maps++;
-	}
-
-	vmemmap_populate_print_last();
-
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	memblock_free_early(__pa(map_map), size2);
-#endif
-	memblock_free_early(__pa(usemap_map), size);
-}
-
 /*
  * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
  * And number of present sections in this node is map_count.
@@ -726,7 +491,7 @@ failed:
  * Allocate the accumulated non-linear sections, allocate a mem_map
  * for each and record the physical to section mapping.
  */
-void __init new_sparse_init(void)
+void __init sparse_init(void)
 {
 	unsigned long pnum_begin = first_present_section_nr();
 	int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
-- 
cgit v1.2.3


From 6b51e88199ca4f75ff647eff28efd30bfcb08dc4 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 17 Aug 2018 15:49:55 -0700
Subject: mm/list_lru: introduce list_lru_shrink_walk_irq()

Provide list_lru_shrink_walk_irq() and let it behave like
list_lru_walk_one() except that it locks the spinlock with
spin_lock_irq().  This is used by scan_shadow_nodes() because its lock
nests within the i_pages lock which is acquired with IRQ.  This change
allows to use proper locking promitives instead hand crafted
lock_irq_disable() plus spin_lock().

There is no EXPORT_SYMBOL provided because the current user is in-kernel
only.

Add list_lru_shrink_walk_irq() which acquires the spinlock with the
proper locking primitives.

Link: http://lkml.kernel.org/r/20180716111921.5365-5-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h | 25 +++++++++++++++++++++++++
 mm/list_lru.c            | 15 +++++++++++++++
 mm/workingset.c          |  8 ++------
 3 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index d9c16f2f2f00..aa5efd9351eb 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -166,6 +166,23 @@ unsigned long list_lru_walk_one(struct list_lru *lru,
 				int nid, struct mem_cgroup *memcg,
 				list_lru_walk_cb isolate, void *cb_arg,
 				unsigned long *nr_to_walk);
+/**
+ * list_lru_walk_one_irq: walk a list_lru, isolating and disposing freeable items.
+ * @lru: the lru pointer.
+ * @nid: the node id to scan from.
+ * @memcg: the cgroup to scan from.
+ * @isolate: callback function that is resposible for deciding what to do with
+ *  the item currently being scanned
+ * @cb_arg: opaque type that will be passed to @isolate
+ * @nr_to_walk: how many items to scan.
+ *
+ * Same as @list_lru_walk_one except that the spinlock is acquired with
+ * spin_lock_irq().
+ */
+unsigned long list_lru_walk_one_irq(struct list_lru *lru,
+				    int nid, struct mem_cgroup *memcg,
+				    list_lru_walk_cb isolate, void *cb_arg,
+				    unsigned long *nr_to_walk);
 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 				 list_lru_walk_cb isolate, void *cb_arg,
 				 unsigned long *nr_to_walk);
@@ -178,6 +195,14 @@ list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
 				 &sc->nr_to_scan);
 }
 
+static inline unsigned long
+list_lru_shrink_walk_irq(struct list_lru *lru, struct shrink_control *sc,
+			 list_lru_walk_cb isolate, void *cb_arg)
+{
+	return list_lru_walk_one_irq(lru, sc->nid, sc->memcg, isolate, cb_arg,
+				     &sc->nr_to_scan);
+}
+
 static inline unsigned long
 list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
 	      void *cb_arg, unsigned long nr_to_walk)
diff --git a/mm/list_lru.c b/mm/list_lru.c
index f5c6a2d1ea66..5b30625fd365 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -282,6 +282,21 @@ list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 }
 EXPORT_SYMBOL_GPL(list_lru_walk_one);
 
+unsigned long
+list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
+		      list_lru_walk_cb isolate, void *cb_arg,
+		      unsigned long *nr_to_walk)
+{
+	struct list_lru_node *nlru = &lru->node[nid];
+	unsigned long ret;
+
+	spin_lock_irq(&nlru->lock);
+	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
+				  nr_to_walk);
+	spin_unlock_irq(&nlru->lock);
+	return ret;
+}
+
 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 				 list_lru_walk_cb isolate, void *cb_arg,
 				 unsigned long *nr_to_walk)
diff --git a/mm/workingset.c b/mm/workingset.c
index bc72ad029b3e..4516dd790129 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -483,13 +483,9 @@ out:
 static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
 				       struct shrink_control *sc)
 {
-	unsigned long ret;
-
 	/* list_lru lock nests inside the IRQ-safe i_pages lock */
-	local_irq_disable();
-	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
-	local_irq_enable();
-	return ret;
+	return list_lru_shrink_walk_irq(&shadow_nodes, sc, shadow_lru_isolate,
+					NULL);
 }
 
 static struct shrinker workingset_shadow_shrinker = {
-- 
cgit v1.2.3


From ddbf369c0a33924f76d092985bd20d9310f43d7f Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 17 Aug 2018 15:49:58 -0700
Subject: mm, vmacache: hash addresses based on pmd

When perf profiling a wide variety of different workloads, it was found
that vmacache_find() had higher than expected cost: up to 0.08% of cpu
utilization in some cases.  This was found to rival other core VM
functions such as alloc_pages_vma() with thp enabled and default
mempolicy, and the conditionals in __get_vma_policy().

VMACACHE_HASH() determines which of the four per-task_struct slots a vma
is cached for a particular address.  This currently depends on the pfn,
so pfn 5212 occupies a different vmacache slot than its neighboring pfn
5213.

vmacache_find() iterates through all four of current's vmacache slots
when looking up an address.  Hashing based on pfn, an address has
~1/VMACACHE_SIZE chance of being cached in the first vmacache slot, or
about 25%, *if* the vma is cached.

This patch hashes an address by its pmd instead of pte to optimize for
workloads with good spatial locality.  This results in a higher
probability of vmas being cached in the first slot that is checked:
normally ~70% on the same workloads instead of 25%.

[rientjes@google.com: various updates]
  Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807231532290.109445@chino.kir.corp.google.com
Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807091749150.114630@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmacache.h |  6 ------
 mm/vmacache.c            | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
index a5b3aa8d281f..3e9a963edd6a 100644
--- a/include/linux/vmacache.h
+++ b/include/linux/vmacache.h
@@ -5,12 +5,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 
-/*
- * Hash based on the page number. Provides a good hit rate for
- * workloads with good locality and those with random accesses as well.
- */
-#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
-
 static inline void vmacache_flush(struct task_struct *tsk)
 {
 	memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
diff --git a/mm/vmacache.c b/mm/vmacache.c
index db7596eb6132..ea517bef7dc5 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -6,6 +6,18 @@
 #include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <linux/vmacache.h>
+#include <asm/pgtable.h>
+
+/*
+ * Hash based on the pmd of addr if configured with MMU, which provides a good
+ * hit rate for workloads with spatial locality.  Otherwise, use pages.
+ */
+#ifdef CONFIG_MMU
+#define VMACACHE_SHIFT	PMD_SHIFT
+#else
+#define VMACACHE_SHIFT	PAGE_SHIFT
+#endif
+#define VMACACHE_HASH(addr) ((addr >> VMACACHE_SHIFT) & VMACACHE_MASK)
 
 /*
  * Flush vma caches for threads that share a given mm.
@@ -87,6 +99,7 @@ static bool vmacache_valid(struct mm_struct *mm)
 
 struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 {
+	int idx = VMACACHE_HASH(addr);
 	int i;
 
 	count_vm_vmacache_event(VMACACHE_FIND_CALLS);
@@ -95,16 +108,20 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache.vmas[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[idx];
 
-		if (!vma)
-			continue;
-		if (WARN_ON_ONCE(vma->vm_mm != mm))
-			break;
-		if (vma->vm_start <= addr && vma->vm_end > addr) {
-			count_vm_vmacache_event(VMACACHE_FIND_HITS);
-			return vma;
+		if (vma) {
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+			if (WARN_ON_ONCE(vma->vm_mm != mm))
+				break;
+#endif
+			if (vma->vm_start <= addr && vma->vm_end > addr) {
+				count_vm_vmacache_event(VMACACHE_FIND_HITS);
+				return vma;
+			}
 		}
+		if (++idx == VMACACHE_SIZE)
+			idx = 0;
 	}
 
 	return NULL;
@@ -115,6 +132,7 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 					   unsigned long start,
 					   unsigned long end)
 {
+	int idx = VMACACHE_HASH(start);
 	int i;
 
 	count_vm_vmacache_event(VMACACHE_FIND_CALLS);
@@ -123,12 +141,14 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache.vmas[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[idx];
 
 		if (vma && vma->vm_start == start && vma->vm_end == end) {
 			count_vm_vmacache_event(VMACACHE_FIND_HITS);
 			return vma;
 		}
+		if (++idx == VMACACHE_SIZE)
+			idx = 0;
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From 771c035372a036f83353eef46dbb829780330234 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 18 Aug 2018 12:19:56 -0700
Subject: deprecate the '__deprecated' attribute warnings entirely and for good

We haven't had lots of deprecation warnings lately, but the rdma use of
it made them flare up again.

They are not useful.  They annoy everybody, and nobody ever does
anything about them, because it's always "somebody elses problem".  And
when people start thinking that warnings are normal, they stop looking
at them, and the real warnings that mean something go unnoticed.

If you want to get rid of a function, just get rid of it.  Convert every
user to the new world order.

And if you can't do that, then don't annoy everybody else with your
marking that says "I couldn't be bothered to fix this, so I'll just spam
everybody elses build logs with warnings about my laziness".

Make a kernelnewbies wiki page about things that could be cleaned up,
write a blog post about it, or talk to people on the mailing lists.  But
don't add warnings to the kernel build about cleanup that you think
should happen but you aren't doing yourself.

Don't.  Just don't.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h   |  1 -
 include/linux/compiler_types.h | 21 ++-------------------
 lib/Kconfig.debug              |  8 --------
 3 files changed, 2 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 573f5a7d42d4..3e70b7d4e9ed 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -103,7 +103,6 @@
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 
-#define __deprecated	__attribute__((deprecated))
 #define __packed	__attribute__((packed))
 #define __weak		__attribute__((weak))
 #define __alias(symbol)	__attribute__((alias(#symbol)))
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index a8ba6b04152c..fbf337933fd8 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -111,21 +111,10 @@ struct ftrace_likely_data {
 #endif /* __ASSEMBLY__ */
 
 #ifdef __KERNEL__
-/*
- * Allow us to mark functions as 'deprecated' and have gcc emit a nice
- * warning for each use, in hopes of speeding the functions removal.
- * Usage is:
- * 		int __deprecated foo(void)
- */
-#ifndef __deprecated
-# define __deprecated		/* unimplemented */
-#endif
 
-#ifdef MODULE
-#define __deprecated_for_modules __deprecated
-#else
+/* Don't. Just don't. */
+#define __deprecated
 #define __deprecated_for_modules
-#endif
 
 #ifndef __must_check
 #define __must_check
@@ -135,12 +124,6 @@ struct ftrace_likely_data {
 #undef __must_check
 #define __must_check
 #endif
-#ifndef CONFIG_ENABLE_WARN_DEPRECATED
-#undef __deprecated
-#undef __deprecated_for_modules
-#define __deprecated
-#define __deprecated_for_modules
-#endif
 
 #ifndef __malloc
 #define __malloc
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c6e73904c5a5..ab1b599202bc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -211,14 +211,6 @@ config GDB_SCRIPTS
 	  instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
 	  for further details.
 
-config ENABLE_WARN_DEPRECATED
-	bool "Enable __deprecated logic"
-	default y
-	help
-	  Enable the __deprecated logic in the kernel build.
-	  Disable this to suppress the "warning: 'foo' is deprecated
-	  (declared at kernel/power/somefile.c:1234)" messages.
-
 config ENABLE_MUST_CHECK
 	bool "Enable __must_check logic"
 	default y
-- 
cgit v1.2.3


From 284ce4011ba60d6c487b668eea729b6294930806 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:32 -0700
Subject: x86/memory_failure: Introduce {set, clear}_mce_nospec()

Currently memory_failure() returns zero if the error was handled. On
that result mce_unmap_kpfn() is called to zap the page out of the kernel
linear mapping to prevent speculative fetches of potentially poisoned
memory. However, in the case of dax mapped devmap pages the page may be
in active permanent use by the device driver, so it cannot be unmapped
from the kernel.

Instead of marking the page not present, marking the page UC should
be sufficient for preventing poison from being pre-fetched into the
cache. Convert mce_unmap_pfn() to set_mce_nospec() remapping the page as
UC, to hide it from speculative accesses.

Given that that persistent memory errors can be cleared by the driver,
include a facility to restore the page to cacheable operation,
clear_mce_nospec().

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: <linux-edac@vger.kernel.org>
Cc: <x86@kernel.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 arch/x86/include/asm/set_memory.h         | 42 +++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 -----------
 arch/x86/kernel/cpu/mcheck/mce.c          | 38 +++-------------------------
 include/linux/set_memory.h                | 14 +++++++++++
 4 files changed, 59 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index bd090367236c..cf5e9124b45e 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -88,4 +88,46 @@ extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
 
+#ifdef CONFIG_X86_64
+static inline int set_mce_nospec(unsigned long pfn)
+{
+	unsigned long decoy_addr;
+	int rc;
+
+	/*
+	 * Mark the linear address as UC to make sure we don't log more
+	 * errors because of speculative access to the page.
+	 * We would like to just call:
+	 *      set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
+	 * but doing that would radically increase the odds of a
+	 * speculative access to the poison page because we'd have
+	 * the virtual address of the kernel 1:1 mapping sitting
+	 * around in registers.
+	 * Instead we get tricky.  We create a non-canonical address
+	 * that looks just like the one we want, but has bit 63 flipped.
+	 * This relies on set_memory_uc() properly sanitizing any __pa()
+	 * results with __PHYSICAL_MASK or PTE_PFN_MASK.
+	 */
+	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
+
+	rc = set_memory_uc(decoy_addr, 1);
+	if (rc)
+		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
+	return rc;
+}
+#define set_mce_nospec set_mce_nospec
+
+/* Restore full speculative operation to the pfn. */
+static inline int clear_mce_nospec(unsigned long pfn)
+{
+	return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
+}
+#define clear_mce_nospec clear_mce_nospec
+#else
+/*
+ * Few people would run a 32-bit kernel on a machine that supports
+ * recoverable errors because they have too much memory to boot 32-bit.
+ */
+#endif
+
 #endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 374d1aa66952..ceb67cd5918f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,21 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
 static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 #endif
 
-#ifndef CONFIG_X86_64
-/*
- * On 32-bit systems it would be difficult to safely unmap a poison page
- * from the kernel 1:1 map because there are no non-canonical addresses that
- * we can use to refer to the address without risking a speculative access.
- * However, this isn't much of an issue because:
- * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
- *    are only mapped into the kernel as needed
- * 2) Few people would run a 32-bit kernel on a machine that supports
- *    recoverable errors because they have too much memory to boot 32-bit.
- */
-static inline void mce_unmap_kpfn(unsigned long pfn) {}
-#define mce_unmap_kpfn mce_unmap_kpfn
-#endif
-
 struct mca_config {
 	bool dont_log_ce;
 	bool cmci_disabled;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c102ad51025e..42a061ce1f5d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -42,6 +42,7 @@
 #include <linux/irq_work.h>
 #include <linux/export.h>
 #include <linux/jump_label.h>
+#include <linux/set_memory.h>
 
 #include <asm/intel-family.h>
 #include <asm/processor.h>
@@ -50,7 +51,6 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/reboot.h>
-#include <asm/set_memory.h>
 
 #include "mce-internal.h"
 
@@ -108,10 +108,6 @@ static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
-#ifndef mce_unmap_kpfn
-static void mce_unmap_kpfn(unsigned long pfn);
-#endif
-
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -602,7 +598,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 	if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
 		pfn = mce->addr >> PAGE_SHIFT;
 		if (!memory_failure(pfn, 0))
-			mce_unmap_kpfn(pfn);
+			set_mce_nospec(pfn);
 	}
 
 	return NOTIFY_OK;
@@ -1072,38 +1068,10 @@ static int do_memory_failure(struct mce *m)
 	if (ret)
 		pr_err("Memory error not recovered");
 	else
-		mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
+		set_mce_nospec(m->addr >> PAGE_SHIFT);
 	return ret;
 }
 
-#ifndef mce_unmap_kpfn
-static void mce_unmap_kpfn(unsigned long pfn)
-{
-	unsigned long decoy_addr;
-
-	/*
-	 * Unmap this page from the kernel 1:1 mappings to make sure
-	 * we don't log more errors because of speculative access to
-	 * the page.
-	 * We would like to just call:
-	 *	set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
-	 * but doing that would radically increase the odds of a
-	 * speculative access to the poison page because we'd have
-	 * the virtual address of the kernel 1:1 mapping sitting
-	 * around in registers.
-	 * Instead we get tricky.  We create a non-canonical address
-	 * that looks just like the one we want, but has bit 63 flipped.
-	 * This relies on set_memory_np() not checking whether we passed
-	 * a legal address.
-	 */
-
-	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-
-	if (set_memory_np(decoy_addr, 1))
-		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-}
-#endif
-
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index da5178216da5..2a986d282a97 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,6 +17,20 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef set_mce_nospec
+static inline int set_mce_nospec(unsigned long pfn)
+{
+	return 0;
+}
+#endif
+
+#ifndef clear_mce_nospec
+static inline int clear_mce_nospec(unsigned long pfn)
+{
+	return 0;
+}
+#endif
+
 #ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
 static inline int set_memory_encrypted(unsigned long addr, int numpages)
 {
-- 
cgit v1.2.3


From cafa0010cd51fb711fdcb50fc55f394c5f167a0a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 20 Aug 2018 13:15:26 -0700
Subject: Raise the minimum required gcc version to 4.6

Various architectures fail to build properly with older versions of the
gcc compiler.

An example from Guenter Roeck in thread [1]:
>
>   In file included from ./include/linux/mm.h:17:0,
>                    from ./include/linux/pid_namespace.h:7,
>                    from ./include/linux/ptrace.h:10,
>                    from arch/openrisc/kernel/asm-offsets.c:32:
>   ./include/linux/mm_types.h:497:16: error: flexible array member in otherwise empty struct
>
> This is just an example with gcc 4.5.1 for or32. I have seen the problem
> with gcc 4.4 (for unicore32) as well.

So update the minimum required version of gcc to 4.6.

[1] https://lore.kernel.org/lkml/20180814170904.GA12768@roeck-us.net/

Miscellanea:

 - Update Documentation/process/changes.rst

 - Remove and consolidate version test blocks in compiler-gcc.h for
   versions lower than 4.6

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/process/changes.rst |  2 +-
 include/linux/compiler-gcc.h      | 86 ++++++++-------------------------------
 2 files changed, 19 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 7a92a06f90de..61f918b10a0c 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -29,7 +29,7 @@ you probably needn't concern yourself with isdn4k-utils.
 ====================== ===============  ========================================
         Program        Minimal version       Command to check the version
 ====================== ===============  ========================================
-GNU C                  3.2              gcc --version
+GNU C                  4.6              gcc --version
 GNU make               3.81             make --version
 binutils               2.20             ld -v
 flex                   2.5.35           flex --version
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 3e70b7d4e9ed..250b9b7cfd60 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -10,6 +10,10 @@
 		     + __GNUC_MINOR__ * 100	\
 		     + __GNUC_PATCHLEVEL__)
 
+#if GCC_VERSION < 40600
+# error Sorry, your compiler is too old - please upgrade it.
+#endif
+
 /* Optimization barrier */
 
 /* The "volatile" is due to gcc bugs */
@@ -58,6 +62,12 @@
 #define OPTIMIZER_HIDE_VAR(var)						\
 	__asm__ ("" : "=r" (var) : "0" (var))
 
+/*
+ * A trick to suppress uninitialized variable warning without generating any
+ * code
+ */
+#define uninitialized_var(x) x = x
+
 #ifdef __CHECKER__
 #define __must_be_array(a)	0
 #else
@@ -91,7 +101,7 @@
  * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
-    !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
+    !defined(CONFIG_OPTIMIZE_INLINING)
 #define inline \
 	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
@@ -147,47 +157,13 @@
 #define __always_unused		__attribute__((unused))
 #define __mode(x)               __attribute__((mode(x)))
 
-/* gcc version specific checks */
-
-#if GCC_VERSION < 30200
-# error Sorry, your compiler is too old - please upgrade it.
-#endif
-
-#if GCC_VERSION < 30300
-# define __used			__attribute__((__unused__))
-#else
-# define __used			__attribute__((__used__))
-#endif
-
-#ifdef CONFIG_GCOV_KERNEL
-# if GCC_VERSION < 30400
-#   error "GCOV profiling support for gcc versions below 3.4 not included"
-# endif /* __GNUC_MINOR__ */
-#endif /* CONFIG_GCOV_KERNEL */
-
-#if GCC_VERSION >= 30400
 #define __must_check		__attribute__((warn_unused_result))
 #define __malloc		__attribute__((__malloc__))
-#endif
-
-#if GCC_VERSION >= 40000
-
-/* GCC 4.1.[01] miscompiles __weak */
-#ifdef __KERNEL__
-# if GCC_VERSION >= 40100 &&  GCC_VERSION <= 40101
-#  error Your version of gcc miscompiles the __weak directive
-# endif
-#endif
 
 #define __used			__attribute__((__used__))
 #define __compiler_offsetof(a, b)					\
 	__builtin_offsetof(a, b)
 
-#if GCC_VERSION >= 40100
-# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-#endif
-
-#if GCC_VERSION >= 40300
 /* Mark functions as cold. gcc will assume any path leading to a call
  * to them will be unlikely.  This means a lot of manual unlikely()s
  * are unnecessary now for any paths leading to the usual suspects
@@ -206,24 +182,19 @@
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#ifndef __CHECKER__
-# define __compiletime_warning(message) __attribute__((warning(message)))
-# define __compiletime_error(message) __attribute__((error(message)))
-#endif /* __CHECKER__ */
-#endif /* GCC_VERSION >= 40300 */
-
-#if GCC_VERSION >= 40400
 #define __optimize(level)	__attribute__((__optimize__(level)))
 #define __nostackprotector	__optimize("no-stack-protector")
-#endif /* GCC_VERSION >= 40400 */
 
-#if GCC_VERSION >= 40500
+#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
 #ifndef __CHECKER__
+#define __compiletime_warning(message) __attribute__((warning(message)))
+#define __compiletime_error(message) __attribute__((error(message)))
+
 #ifdef LATENT_ENTROPY_PLUGIN
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
-#endif
+#endif /* __CHECKER__ */
 
 /*
  * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
@@ -261,10 +232,6 @@
 #define randomized_struct_fields_end	} __randomize_layout;
 #endif
 
-#endif /* GCC_VERSION >= 40500 */
-
-#if GCC_VERSION >= 40600
-
 /*
  * When used with Link Time Optimization, gcc can optimize away C functions or
  * variables which are referenced only from assembly code.  __visible tells the
@@ -273,8 +240,7 @@
  */
 #define __visible	__attribute__((externally_visible))
 
-#endif /* GCC_VERSION >= 40600 */
-
+/* gcc version specific checks */
 
 #if GCC_VERSION >= 40900 && !defined(__CHECKER__)
 /*
@@ -308,10 +274,8 @@
  * folding in __builtin_bswap*() (yet), so don't set these for it.
  */
 #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__)
-#if GCC_VERSION >= 40400
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
-#endif
 #if GCC_VERSION >= 40800
 #define __HAVE_BUILTIN_BSWAP16__
 #endif
@@ -340,10 +304,9 @@
  * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
  */
 #define __designated_init __attribute__((designated_init))
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
 
-#endif	/* gcc version >= 40000 specific checks */
-
 #if !defined(__noclone)
 #define __noclone	/* not needed */
 #endif
@@ -352,16 +315,6 @@
 #define __no_sanitize_address
 #endif
 
-/*
- * A trick to suppress uninitialized variable warning without generating any
- * code
- */
-#define uninitialized_var(x) x = x
-
-#if GCC_VERSION >= 50100
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
@@ -374,12 +327,9 @@
 #define __diag_GCC_warn		warning
 #define __diag_GCC_error	error
 
-/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */
-#if GCC_VERSION >= 40600
 #define __diag_str1(s)		#s
 #define __diag_str(s)		__diag_str1(s)
 #define __diag(s)		_Pragma(__diag_str(GCC diagnostic s))
-#endif
 
 #if GCC_VERSION >= 80000
 #define __diag_GCC_8(s)		__diag(s)
-- 
cgit v1.2.3


From 04f264d3a8b0eb25d378127bd78c3c9a0261c828 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Mon, 20 Aug 2018 15:36:17 -0700
Subject: compiler.h: Allow arch-specific asm/compiler.h

We have a need to override the definition of
barrier_before_unreachable() for MIPS, which means we either need to add
architecture-specific code into linux/compiler-gcc.h or we need to allow
the architecture to provide a header that can define the macro before
the generic definition. The latter seems like the better approach.

A straightforward approach to the per-arch header is to make use of
asm-generic to provide a default empty header & adjust architectures
which don't need anything specific to make use of that by adding the
header to generic-y. Unfortunately this doesn't work so well due to
commit 28128c61e08e ("kconfig.h: Include compiler types to avoid missed
struct attributes") which caused linux/compiler_types.h to be included
in the compilation of every C file via the -include linux/kconfig.h flag
in c_flags.

Because the -include flag is present for all C files we compile, we need
the architecture-provided header to be present before any C files are
compiled. If any C files can be compiled prior to the asm-generic header
wrappers being generated then we hit a build failure due to missing
header. Such cases do exist - one pointed out by the kbuild test robot
is the compilation of arch/ia64/kernel/nr-irqs.c, which occurs as part
of the archprepare target [1].

This leaves us with a few options:

  1) Use generic-y & fix any build failures we find by enforcing
     ordering such that the asm-generic target occurs before any C
     compilation, such that linux/compiler_types.h can always include
     the generated asm-generic wrapper which in turn includes the empty
     asm-generic header. This would rely on us finding all the
     problematic cases - I don't know for sure that the ia64 issue is
     the only one.

  2) Add an actual empty header to each architecture, so that we don't
     need the generated asm-generic wrapper. This seems messy.

  3) Give up & add #ifdef CONFIG_MIPS or similar to
     linux/compiler_types.h. This seems messy too.

  4) Include the arch header only when it's actually needed, removing
     the need for the asm-generic wrapper for all other architectures.

This patch allows us to use approach 4, by including an asm/compiler.h
header from linux/compiler_types.h after the inclusion of the
compiler-specific linux/compiler-*.h header(s). We do this
conditionally, only when CONFIG_HAVE_ARCH_COMPILER_H is selected, in
order to avoid the need for asm-generic wrappers & the associated build
ordering issue described above. The asm/compiler.h header is included
after the generic linux/compiler-*.h header(s) for consistency with the
way linux/compiler-intel.h & linux/compiler-clang.h are included after
the linux/compiler-gcc.h header that they override.

[1] https://lists.01.org/pipermail/kbuild-all/2018-August/051175.html

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Patchwork: https://patchwork.linux-mips.org/patch/20269/
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: James Hogan <jhogan@kernel.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: linux-mips@linux-mips.org
---
 arch/Kconfig                   |  8 ++++++++
 include/linux/compiler_types.h | 12 ++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 1aa59063f1fd..5c7c48e7b727 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -971,4 +971,12 @@ config REFCOUNT_FULL
 	  against various use-after-free conditions that can be used in
 	  security flaw exploits.
 
+config HAVE_ARCH_COMPILER_H
+	bool
+	help
+	  An architecture can select this if it provides an
+	  asm/compiler.h header that should be included after
+	  linux/compiler-*.h in order to override macro definitions that those
+	  headers generally provide.
+
 source "kernel/gcov/Kconfig"
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6b79a9bba9a7..4be464a07612 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -78,6 +78,18 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #include <linux/compiler-clang.h>
 #endif
 
+/*
+ * Some architectures need to provide custom definitions of macros provided
+ * by linux/compiler-*.h, and can do so using asm/compiler.h. We include that
+ * conditionally rather than using an asm-generic wrapper in order to avoid
+ * build failures if any C compilation, which will include this file via an
+ * -include argument in c_flags, occurs prior to the asm-generic wrappers being
+ * generated.
+ */
+#ifdef CONFIG_HAVE_ARCH_COMPILER_H
+#include <asm/compiler.h>
+#endif
+
 /*
  * Generic compiler-dependent macros required for kernel
  * build go below this comment. Actual compiler/compiler version
-- 
cgit v1.2.3


From 5ade60dda43c8906d4554374226c2eb11cc2ffba Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 20 Mar 2018 17:07:11 -0400
Subject: ida: Add new API

Add ida_alloc(), ida_alloc_min(), ida_alloc_max(), ida_alloc_range()
and ida_free().  The ida_alloc_max() and ida_alloc_range() functions
differ from ida_simple_get() in that they take an inclusive 'max'
parameter instead of an exclusive 'end' parameter.  Callers are about
evenly split whether they'd like inclusive or exclusive parameters and
'max' is easier to document than 'end'.

Change the IDA allocation to first attempt to allocate a bit using
existing memory, and only allocate memory afterwards.  Also change the
behaviour of 'min' > INT_MAX from being a BUG() to returning -ENOSPC.

Leave compatibility wrappers in place for ida_simple_get() and
ida_simple_remove() to avoid changing all callers.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h | 59 +++++++++++++++++++++++++++++++++++++++++---
 lib/idr.c           | 70 ++++++++++++++++++++++++-----------------------------
 2 files changed, 87 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..cd339da0b1aa 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -230,15 +230,68 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
 
-int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
-		   gfp_t gfp_mask);
-void ida_simple_remove(struct ida *ida, unsigned int id);
+int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
+void ida_free(struct ida *, unsigned int id);
+
+/**
+ * ida_alloc() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between 0 and %INT_MAX, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc(struct ida *ida, gfp_t gfp)
+{
+	return ida_alloc_range(ida, 0, ~0, gfp);
+}
+
+/**
+ * ida_alloc_min() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and %INT_MAX, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
+{
+	return ida_alloc_range(ida, min, ~0, gfp);
+}
+
+/**
+ * ida_alloc_max() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between 0 and @max, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
+{
+	return ida_alloc_range(ida, 0, max, gfp);
+}
 
 static inline void ida_init(struct ida *ida)
 {
 	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
 }
 
+#define ida_simple_get(ida, start, end, gfp)	\
+			ida_alloc_range(ida, start, (end) - 1, gfp)
+#define ida_simple_remove(ida, id)	ida_free(ida, id)
+
 /**
  * ida_get_new - allocate new ID
  * @ida:	idr handle
diff --git a/lib/idr.c b/lib/idr.c
index 352c6160e2e0..760fce92f1fb 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -317,7 +317,8 @@ EXPORT_SYMBOL(idr_replace);
  * bit per ID, and so is more space efficient than an IDR.  To use an IDA,
  * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,
  * then initialise it using ida_init()).  To allocate a new ID, call
- * ida_simple_get().  To free an ID, call ida_simple_remove().
+ * ida_alloc(), ida_alloc_min(), ida_alloc_max() or ida_alloc_range().
+ * To free an ID, call ida_free().
  *
  * If you have more complex locking requirements, use a loop around
  * ida_pre_get() and ida_get_new() to allocate a new ID.  Then use
@@ -378,7 +379,7 @@ EXPORT_SYMBOL(idr_replace);
  * Allocate new ID above or equal to @start.  It should be called
  * with any required locks to ensure that concurrent calls to
  * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
- * Consider using ida_simple_get() if you do not have complex locking
+ * Consider using ida_alloc_range() if you do not have complex locking
  * requirements.
  *
  * If memory is required, it will return %-EAGAIN, you should unlock
@@ -551,43 +552,34 @@ void ida_destroy(struct ida *ida)
 EXPORT_SYMBOL(ida_destroy);
 
 /**
- * ida_simple_get - get a new id.
- * @ida: the (initialized) ida.
- * @start: the minimum id (inclusive, < 0x8000000)
- * @end: the maximum id (exclusive, < 0x8000000 or 0)
- * @gfp_mask: memory allocation flags
- *
- * Allocates an id in the range start <= id < end, or returns -ENOSPC.
- * On memory allocation failure, returns -ENOMEM.
+ * ida_alloc_range() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
  *
- * Compared to ida_get_new_above() this function does its own locking, and
- * should be used unless there are special requirements.
+ * Allocate an ID between @min and @max, inclusive.  The allocated ID will
+ * not exceed %INT_MAX, even if @max is larger.
  *
- * Use ida_simple_remove() to get rid of an id.
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
  */
-int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
-		   gfp_t gfp_mask)
+int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
+			gfp_t gfp)
 {
 	int ret, id;
-	unsigned int max;
 	unsigned long flags;
 
-	BUG_ON((int)start < 0);
-	BUG_ON((int)end < 0);
+	if ((int)min < 0)
+		return -ENOSPC;
 
-	if (end == 0)
-		max = 0x80000000;
-	else {
-		BUG_ON(end < start);
-		max = end - 1;
-	}
+	if ((int)max < 0)
+		max = INT_MAX;
 
 again:
-	if (!ida_pre_get(ida, gfp_mask))
-		return -ENOMEM;
-
 	xa_lock_irqsave(&ida->ida_rt, flags);
-	ret = ida_get_new_above(ida, start, &id);
+	ret = ida_get_new_above(ida, min, &id);
 	if (!ret) {
 		if (id > max) {
 			ida_remove(ida, id);
@@ -598,24 +590,24 @@ again:
 	}
 	xa_unlock_irqrestore(&ida->ida_rt, flags);
 
-	if (unlikely(ret == -EAGAIN))
+	if (unlikely(ret == -EAGAIN)) {
+		if (!ida_pre_get(ida, gfp))
+			return -ENOMEM;
 		goto again;
+	}
 
 	return ret;
 }
-EXPORT_SYMBOL(ida_simple_get);
+EXPORT_SYMBOL(ida_alloc_range);
 
 /**
- * ida_simple_remove - remove an allocated id.
- * @ida: the (initialized) ida.
- * @id: the id returned by ida_simple_get.
- *
- * Use to release an id allocated with ida_simple_get().
+ * ida_free() - Release an allocated ID.
+ * @ida: IDA handle.
+ * @id: Previously allocated ID.
  *
- * Compared to ida_remove() this function does its own locking, and should be
- * used unless there are special requirements.
+ * Context: Any context.
  */
-void ida_simple_remove(struct ida *ida, unsigned int id)
+void ida_free(struct ida *ida, unsigned int id)
 {
 	unsigned long flags;
 
@@ -624,4 +616,4 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
 	ida_remove(ida, id);
 	xa_unlock_irqrestore(&ida->ida_rt, flags);
 }
-EXPORT_SYMBOL(ida_simple_remove);
+EXPORT_SYMBOL(ida_free);
-- 
cgit v1.2.3


From b03f8e43c9261878bf29d8cc1c3ba458cc98287e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 18 Jun 2018 19:02:48 -0400
Subject: ida: Remove old API

Delete ida_pre_get(), ida_get_new(), ida_get_new_above() and ida_remove()
from the public API.  Some of these functions still exist as internal
helpers, but they should not be called by consumers.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h | 21 ++++-----------------
 lib/idr.c           | 48 +++++++-----------------------------------------
 lib/radix-tree.c    |  9 ---------
 3 files changed, 11 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index cd339da0b1aa..2e1db0e36486 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -225,13 +225,9 @@ struct ida {
 }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
-int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
-void ida_remove(struct ida *ida, int id);
-void ida_destroy(struct ida *ida);
-
 int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
 void ida_free(struct ida *, unsigned int id);
+void ida_destroy(struct ida *ida);
 
 /**
  * ida_alloc() - Allocate an unused ID.
@@ -292,20 +288,11 @@ static inline void ida_init(struct ida *ida)
 			ida_alloc_range(ida, start, (end) - 1, gfp)
 #define ida_simple_remove(ida, id)	ida_free(ida, id)
 
-/**
- * ida_get_new - allocate new ID
- * @ida:	idr handle
- * @p_id:	pointer to the allocated handle
- *
- * Simple wrapper around ida_get_new_above() w/ @starting_id of zero.
- */
-static inline int ida_get_new(struct ida *ida, int *p_id)
-{
-	return ida_get_new_above(ida, 0, p_id);
-}
-
 static inline bool ida_is_empty(const struct ida *ida)
 {
 	return radix_tree_empty(&ida->ida_rt);
 }
+
+/* in lib/radix-tree.c */
+int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 #endif /* __IDR_H__ */
diff --git a/lib/idr.c b/lib/idr.c
index 760fce92f1fb..1dc52191acb6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -320,16 +320,9 @@ EXPORT_SYMBOL(idr_replace);
  * ida_alloc(), ida_alloc_min(), ida_alloc_max() or ida_alloc_range().
  * To free an ID, call ida_free().
  *
- * If you have more complex locking requirements, use a loop around
- * ida_pre_get() and ida_get_new() to allocate a new ID.  Then use
- * ida_remove() to free an ID.  You must make sure that ida_get_new() and
- * ida_remove() cannot be called at the same time as each other for the
- * same IDA.
- *
- * You can also use ida_get_new_above() if you need an ID to be allocated
- * above a particular number.  ida_destroy() can be used to dispose of an
- * IDA without needing to free the individual IDs in it.  You can use
- * ida_is_empty() to find out whether the IDA has any IDs currently allocated.
+ * ida_destroy() can be used to dispose of an IDA without needing to
+ * free the individual IDs in it.  You can use ida_is_empty() to find
+ * out whether the IDA has any IDs currently allocated.
  *
  * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
  * limitation, it should be quite straightforward to raise the maximum.
@@ -370,25 +363,7 @@ EXPORT_SYMBOL(idr_replace);
 
 #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
 
-/**
- * ida_get_new_above - allocate new ID above or equal to a start id
- * @ida: ida handle
- * @start: id to start search at
- * @id: pointer to the allocated handle
- *
- * Allocate new ID above or equal to @start.  It should be called
- * with any required locks to ensure that concurrent calls to
- * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
- * Consider using ida_alloc_range() if you do not have complex locking
- * requirements.
- *
- * If memory is required, it will return %-EAGAIN, you should unlock
- * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return %-ENOSPC.  On success, it will return 0.
- *
- * @id returns a value in the range @start ... %0x7fffffff.
- */
-int ida_get_new_above(struct ida *ida, int start, int *id)
+static int ida_get_new_above(struct ida *ida, int start, int *id)
 {
 	struct radix_tree_root *root = &ida->ida_rt;
 	void __rcu **slot;
@@ -473,16 +448,8 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(ida_get_new_above);
 
-/**
- * ida_remove - Free the given ID
- * @ida: ida handle
- * @id: ID to free
- *
- * This function should not be called at the same time as ida_get_new_above().
- */
-void ida_remove(struct ida *ida, int id)
+static void ida_remove(struct ida *ida, int id)
 {
 	unsigned long index = id / IDA_BITMAP_BITS;
 	unsigned offset = id % IDA_BITMAP_BITS;
@@ -519,9 +486,8 @@ void ida_remove(struct ida *ida, int id)
 	}
 	return;
  err:
-	WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
+	WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
 }
-EXPORT_SYMBOL(ida_remove);
 
 /**
  * ida_destroy() - Free all IDs.
@@ -568,7 +534,7 @@ EXPORT_SYMBOL(ida_destroy);
 int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
 			gfp_t gfp)
 {
-	int ret, id;
+	int ret, id = 0;
 	unsigned long flags;
 
 	if ((int)min < 0)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index cc6096b97afb..bc03ecc4dfd2 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2106,14 +2106,6 @@ void idr_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(idr_preload);
 
-/**
- * ida_pre_get - reserve resources for ida allocation
- * @ida: ida handle
- * @gfp: memory allocation flags
- *
- * This function should be called before calling ida_get_new_above().  If it
- * is unable to allocate memory, it will return %0.  On success, it returns %1.
- */
 int ida_pre_get(struct ida *ida, gfp_t gfp)
 {
 	/*
@@ -2134,7 +2126,6 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 
 	return 1;
 }
-EXPORT_SYMBOL(ida_pre_get);
 
 void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
-- 
cgit v1.2.3


From fd991a23c8f6ef30692f77409602ccf3614353b2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 21 Aug 2018 22:33:00 +0200
Subject: y2038: Provide aliases for compat helpers

As part of the system call rework for 64-bit time_t, we are restructuring
the way that compat syscalls deal with 32-bit time_t, reusing the
implementation for 32-bit architectures. Christoph Hellwig suggested a
rename of the associated types and interfaces to avoid the confusing usage
of the 'compat' prefix for 32-bit architectures.

To prepare for doing that in linux-4.20, add a set of macros that allows to
convert subsystems separately to the new names and avoids some of the
nastier merge conflicts.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: y2038@lists.linaro.org
Cc: John Stultz <john.stultz@linaro.org>
Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Link: https://lkml.kernel.org/r/20180821203329.2089473-1-arnd@arndb.de
---
 include/linux/time32.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time32.h b/include/linux/time32.h
index 0b14f936100a..d1ae43c13e25 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -207,4 +207,19 @@ static inline s64 timeval_to_ns(const struct timeval *tv)
 extern struct timeval ns_to_timeval(const s64 nsec);
 extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec);
 
+/*
+ * New aliases for compat time functions. These will be used to replace
+ * the compat code so it can be shared between 32-bit and 64-bit builds
+ * both of which provide compatibility with old 32-bit tasks.
+ */
+#define old_time32_t		compat_time_t
+#define old_timeval32		compat_timeval
+#define old_timespec32		compat_timespec
+#define old_itimerspec32	compat_itimerspec
+#define ns_to_old_timeval32	ns_to_compat_timeval
+#define get_old_itimerspec32	get_compat_itimerspec64
+#define put_old_itimerspec32	put_compat_itimerspec64
+#define get_old_timespec32	compat_get_timespec64
+#define put_old_timespec32	compat_put_timespec64
+
 #endif
-- 
cgit v1.2.3


From c4df32c80d04987023844c1fb13734a872c8f2e2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 19 Aug 2018 07:34:47 +0900
Subject: export.h: remove VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR()

With the special case handling for Blackfin and Metag was removed by
commit 94e58e0ac312 ("export.h: remove code for prefixing symbols with
underscore"), VMLINUX_SYMBOL() is no-op.

Replace the remaining usages, then remove the definition of
VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 certs/system_certificates.S | 16 ++++++++--------
 include/linux/export.h      |  7 -------
 usr/initramfs_data.S        |  4 ++--
 3 files changed, 10 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/certs/system_certificates.S b/certs/system_certificates.S
index 3918ff7235ed..8f29058adf93 100644
--- a/certs/system_certificates.S
+++ b/certs/system_certificates.S
@@ -5,8 +5,8 @@
 	__INITRODATA
 
 	.align 8
-	.globl VMLINUX_SYMBOL(system_certificate_list)
-VMLINUX_SYMBOL(system_certificate_list):
+	.globl system_certificate_list
+system_certificate_list:
 __cert_list_start:
 #ifdef CONFIG_MODULE_SIG
 	.incbin "certs/signing_key.x509"
@@ -15,21 +15,21 @@ __cert_list_start:
 __cert_list_end:
 
 #ifdef CONFIG_SYSTEM_EXTRA_CERTIFICATE
-	.globl VMLINUX_SYMBOL(system_extra_cert)
+	.globl system_extra_cert
 	.size system_extra_cert, CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE
-VMLINUX_SYMBOL(system_extra_cert):
+system_extra_cert:
 	.fill CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE, 1, 0
 
 	.align 4
-	.globl VMLINUX_SYMBOL(system_extra_cert_used)
-VMLINUX_SYMBOL(system_extra_cert_used):
+	.globl system_extra_cert_used
+system_extra_cert_used:
 	.int 0
 
 #endif /* CONFIG_SYSTEM_EXTRA_CERTIFICATE */
 
 	.align 8
-	.globl VMLINUX_SYMBOL(system_certificate_list_size)
-VMLINUX_SYMBOL(system_certificate_list_size):
+	.globl system_certificate_list_size
+system_certificate_list_size:
 #ifdef CONFIG_64BIT
 	.quad __cert_list_end - __cert_list_start
 #else
diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6dd3c90..c363bde21bbe 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -10,13 +10,6 @@
  * hackers place grumpy comments in header files.
  */
 
-#define __VMLINUX_SYMBOL(x) x
-#define __VMLINUX_SYMBOL_STR(x) #x
-
-/* Indirect, so macros are expanded before pasting. */
-#define VMLINUX_SYMBOL(x) __VMLINUX_SYMBOL(x)
-#define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
-
 #ifndef __ASSEMBLY__
 struct kernel_symbol
 {
diff --git a/usr/initramfs_data.S b/usr/initramfs_data.S
index b28da799f6a6..d07648f05bbf 100644
--- a/usr/initramfs_data.S
+++ b/usr/initramfs_data.S
@@ -30,8 +30,8 @@ __irf_start:
 .incbin __stringify(INITRAMFS_IMAGE)
 __irf_end:
 .section .init.ramfs.info,"a"
-.globl VMLINUX_SYMBOL(__initramfs_size)
-VMLINUX_SYMBOL(__initramfs_size):
+.globl __initramfs_size
+__initramfs_size:
 #ifdef CONFIG_64BIT
 	.quad __irf_end - __irf_start
 #else
-- 
cgit v1.2.3


From 16af2d65842d343c2f95733c3993a0b5baab08f9 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Wed, 22 Aug 2018 21:13:01 +0900
Subject: ata: add an extra argument to ahci_platform_get_resources()

Add an extra argument to ahci_platform_get_resources(), that is
for the bitmap representing the resource to get in this function.

Currently there is no resources to be defined, so all the callers set
'0' to the argument.

Suggested-by: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_brcm.c        | 2 +-
 drivers/ata/ahci_ceva.c        | 2 +-
 drivers/ata/ahci_da850.c       | 2 +-
 drivers/ata/ahci_dm816.c       | 2 +-
 drivers/ata/ahci_imx.c         | 2 +-
 drivers/ata/ahci_mtk.c         | 2 +-
 drivers/ata/ahci_mvebu.c       | 2 +-
 drivers/ata/ahci_platform.c    | 2 +-
 drivers/ata/ahci_qoriq.c       | 2 +-
 drivers/ata/ahci_seattle.c     | 2 +-
 drivers/ata/ahci_st.c          | 2 +-
 drivers/ata/ahci_sunxi.c       | 2 +-
 drivers/ata/ahci_tegra.c       | 2 +-
 drivers/ata/ahci_xgene.c       | 2 +-
 drivers/ata/libahci_platform.c | 4 +++-
 include/linux/ahci_platform.h  | 2 +-
 16 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c
index ea430819c80b..f3d557777d82 100644
--- a/drivers/ata/ahci_brcm.c
+++ b/drivers/ata/ahci_brcm.c
@@ -425,7 +425,7 @@ static int brcm_ahci_probe(struct platform_device *pdev)
 
 	brcm_sata_phys_enable(priv);
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 	hpriv->plat_data = priv;
diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c
index 5ecc9d46cb54..dc78c98cb9f1 100644
--- a/drivers/ata/ahci_ceva.c
+++ b/drivers/ata/ahci_ceva.c
@@ -213,7 +213,7 @@ static int ceva_ahci_probe(struct platform_device *pdev)
 
 	cevapriv->ahci_pdev = pdev;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c
index 9b34dff64536..ebaa657f28c4 100644
--- a/drivers/ata/ahci_da850.c
+++ b/drivers/ata/ahci_da850.c
@@ -171,7 +171,7 @@ static int ahci_da850_probe(struct platform_device *pdev)
 	u32 mpy;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_dm816.c b/drivers/ata/ahci_dm816.c
index fbd827c3a75c..89509c3efb01 100644
--- a/drivers/ata/ahci_dm816.c
+++ b/drivers/ata/ahci_dm816.c
@@ -148,7 +148,7 @@ static int ahci_dm816_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 6822e2f33f7e..b00799d208f5 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -1127,7 +1127,7 @@ static int imx_ahci_probe(struct platform_device *pdev)
 			return ret;
 	}
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c
index 0ae6971c2a4c..8bc1a26ffc31 100644
--- a/drivers/ata/ahci_mtk.c
+++ b/drivers/ata/ahci_mtk.c
@@ -142,7 +142,7 @@ static int mtk_ahci_probe(struct platform_device *pdev)
 	if (!plat)
 		return -ENOMEM;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 0045dacd814b..adbe38fb9c35 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -158,7 +158,7 @@ static int ahci_mvebu_probe(struct platform_device *pdev)
 	const struct mbus_dram_target_info *dram;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 99f9a895a459..570927316962 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -43,7 +43,7 @@ static int ahci_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index cfdef4d44ae9..ce59253ec158 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -250,7 +250,7 @@ static int ahci_qoriq_probe(struct platform_device *pdev)
 	struct resource *res;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c
index 1d31c0c0fc20..e57b6f92c288 100644
--- a/drivers/ata/ahci_seattle.c
+++ b/drivers/ata/ahci_seattle.c
@@ -164,7 +164,7 @@ static int ahci_seattle_probe(struct platform_device *pdev)
 	int rc;
 	struct ahci_host_priv *hpriv;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index bc345f249555..21c5c44832ef 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -156,7 +156,7 @@ static int st_ahci_probe(struct platform_device *pdev)
 	if (!drv_data)
 		return -ENOMEM;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 	hpriv->plat_data = drv_data;
diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index b26437430163..631610b72aa5 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c
@@ -181,7 +181,7 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_tegra.c b/drivers/ata/ahci_tegra.c
index 64d848409fe2..004f2608818e 100644
--- a/drivers/ata/ahci_tegra.c
+++ b/drivers/ata/ahci_tegra.c
@@ -494,7 +494,7 @@ static int tegra_ahci_probe(struct platform_device *pdev)
 	int ret;
 	unsigned int i;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index ad58da7c9aff..7e157e1bf65e 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -759,7 +759,7 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 					      &xgene_ahci_v2_port_info };
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 8fbb532b62dd..679f763410c0 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -332,6 +332,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
 /**
  * ahci_platform_get_resources - Get platform resources
  * @pdev: platform device to get resources for
+ * @flags: bitmap representing the resource to get
  *
  * This function allocates an ahci_host_priv struct, and gets the following
  * resources, storing a reference to them inside the returned struct:
@@ -345,7 +346,8 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
  * RETURNS:
  * The allocated ahci_host_priv on success, otherwise an ERR_PTR value
  */
-struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
+struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
+						   unsigned int flags)
 {
 	struct device *dev = &pdev->dev;
 	struct ahci_host_priv *hpriv;
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1b0a17b22cd3..6490be1f8a16 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -30,7 +30,7 @@ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv);
 int ahci_platform_enable_resources(struct ahci_host_priv *hpriv);
 void ahci_platform_disable_resources(struct ahci_host_priv *hpriv);
 struct ahci_host_priv *ahci_platform_get_resources(
-	struct platform_device *pdev);
+	struct platform_device *pdev, unsigned int flags);
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
-- 
cgit v1.2.3


From 9d2ab99573970838108add835442a03e23f8577b Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Wed, 22 Aug 2018 21:13:02 +0900
Subject: ata: libahci_platform: add reset control support

Add support to get and control a list of resets for the device
as optional and shared. These resets must be kept de-asserted until
the device is enabled.

This is specified as shared because some SoCs like UniPhier series
have common reset controls with all ahci controller instances.

However, according to Thierry's view,
https://www.spinics.net/lists/linux-ide/msg55357.html
some hardware-specific drivers already use their own resets,
and the common reset make a path to occur double controls of resets.

The ahci_platform_get_resources() can get and control the reset
only when the second argument includes AHCI_PLATFORM_GET_RESETS bit.

Suggested-by: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 .../devicetree/bindings/ata/ahci-platform.txt      |  1 +
 drivers/ata/ahci.h                                 |  1 +
 drivers/ata/libahci_platform.c                     | 31 ++++++++++++++++++----
 include/linux/ahci_platform.h                      |  2 ++
 4 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index c760ecb81381..f4006d3c9fdf 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -30,6 +30,7 @@ compatible:
 Optional properties:
 - dma-coherent      : Present if dma operations are coherent
 - clocks            : a list of phandle + clock specifier pairs
+- resets            : a list of phandle + reset specifier pairs
 - target-supply     : regulator for SATA target power
 - phys              : reference to the SATA PHY node
 - phy-names         : must be "sata-phy"
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 1609ebab4e23..6a1515f0da40 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -350,6 +350,7 @@ struct ahci_host_priv {
 	u32			em_msg_type;	/* EM message type */
 	bool			got_runtime_pm; /* Did we do pm_runtime_get? */
 	struct clk		*clks[AHCI_MAX_CLKS]; /* Optional */
+	struct reset_control	*rsts;		/* Optional */
 	struct regulator	**target_pwrs;	/* Optional */
 	/*
 	 * If platform uses PHYs. There is a 1:1 relation between the port number and
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 679f763410c0..c92c10d55374 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -25,6 +25,7 @@
 #include <linux/phy/phy.h>
 #include <linux/pm_runtime.h>
 #include <linux/of_platform.h>
+#include <linux/reset.h>
 #include "ahci.h"
 
 static void ahci_host_stop(struct ata_host *host);
@@ -195,7 +196,8 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators);
  * following order:
  * 1) Regulator
  * 2) Clocks (through ahci_platform_enable_clks)
- * 3) Phys
+ * 3) Resets
+ * 4) Phys
  *
  * If resource enabling fails at any point the previous enabled resources
  * are disabled in reverse order.
@@ -215,12 +217,19 @@ int ahci_platform_enable_resources(struct ahci_host_priv *hpriv)
 	if (rc)
 		goto disable_regulator;
 
-	rc = ahci_platform_enable_phys(hpriv);
+	rc = reset_control_deassert(hpriv->rsts);
 	if (rc)
 		goto disable_clks;
 
+	rc = ahci_platform_enable_phys(hpriv);
+	if (rc)
+		goto disable_resets;
+
 	return 0;
 
+disable_resets:
+	reset_control_assert(hpriv->rsts);
+
 disable_clks:
 	ahci_platform_disable_clks(hpriv);
 
@@ -238,13 +247,16 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_resources);
  * This function disables all ahci_platform managed resources in the
  * following order:
  * 1) Phys
- * 2) Clocks (through ahci_platform_disable_clks)
- * 3) Regulator
+ * 2) Resets
+ * 3) Clocks (through ahci_platform_disable_clks)
+ * 4) Regulator
  */
 void ahci_platform_disable_resources(struct ahci_host_priv *hpriv)
 {
 	ahci_platform_disable_phys(hpriv);
 
+	reset_control_assert(hpriv->rsts);
+
 	ahci_platform_disable_clks(hpriv);
 
 	ahci_platform_disable_regulators(hpriv);
@@ -341,7 +353,8 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
  * 2) regulator for controlling the targets power (optional)
  * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node,
  *    or for non devicetree enabled platforms a single clock
- * 4) phys (optional)
+ * 4) resets, if flags has AHCI_PLATFORM_GET_RESETS (optional)
+ * 5) phys (optional)
  *
  * RETURNS:
  * The allocated ahci_host_priv on success, otherwise an ERR_PTR value
@@ -395,6 +408,14 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
 		hpriv->clks[i] = clk;
 	}
 
+	if (flags & AHCI_PLATFORM_GET_RESETS) {
+		hpriv->rsts = devm_reset_control_array_get_optional_shared(dev);
+		if (IS_ERR(hpriv->rsts)) {
+			rc = PTR_ERR(hpriv->rsts);
+			goto err_out;
+		}
+	}
+
 	hpriv->nports = child_nodes = of_get_child_count(dev->of_node);
 
 	/*
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6490be1f8a16..eaedca5fe6fc 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -43,4 +43,6 @@ int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
 
+#define AHCI_PLATFORM_GET_RESETS	0x01
+
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 92be775a3db343889ab159c44d55315c5ee0be4e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 21 Aug 2018 21:51:53 -0700
Subject: mm: struct shrink_control: keep int fields together

Patch series "Reorderings in struct shrinker and struct shrink_control".

These structures are intensively used during reclaim and, displace other
data in cache, so there is no a reason they have int fields not grouped
together.

This patch (of 2):

gfp_t is of unsigned type, so let's move nid to keep them together.

Link: http://lkml.kernel.org/r/153199747930.21131.861043607301997810.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index b154fd2b084c..d58aaaed34a4 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -12,6 +12,9 @@
 struct shrink_control {
 	gfp_t gfp_mask;
 
+	/* current node being shrunk (for NUMA aware shrinkers) */
+	int nid;
+
 	/*
 	 * How many objects scan_objects should scan and try to reclaim.
 	 * This is reset before every call, so it is safe for callees
@@ -26,9 +29,6 @@ struct shrink_control {
 	 */
 	unsigned long nr_scanned;
 
-	/* current node being shrunk (for NUMA aware shrinkers) */
-	int nid;
-
 	/* current memcg being shrunk (for memcg aware shrinkers) */
 	struct mem_cgroup *memcg;
 };
-- 
cgit v1.2.3


From e50ef89b0f58a2cb0e7d496a97b851e0dced62a6 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 21 Aug 2018 21:51:57 -0700
Subject: mm: struct shrinker: make flags of unsigned type

Currently, there are two flags only, so unsigned is more then enough.
Also, move int seeks to keep these fields together.

Link: http://lkml.kernel.org/r/153199748720.21131.6476256940113102483.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index d58aaaed34a4..9443cafd1969 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -63,9 +63,9 @@ struct shrinker {
 	unsigned long (*scan_objects)(struct shrinker *,
 				      struct shrink_control *sc);
 
-	int seeks;	/* seeks to recreate an obj */
 	long batch;	/* reclaim batch size, 0 = default */
-	unsigned long flags;
+	int seeks;	/* seeks to recreate an obj */
+	unsigned flags;
 
 	/* These are for internal use */
 	struct list_head list;
-- 
cgit v1.2.3


From 5d5e8f19544a35ae1609bd96ae6b28c9fcd1baf6 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Tue, 21 Aug 2018 21:52:20 -0700
Subject: mm, swap, get_swap_pages: use entry_size instead of cluster in
 parameter

As suggested by Matthew Wilcox, it is better to use "int entry_size"
instead of "bool cluster" as parameter to specify whether to operate for
huge or normal swap entries.  Because this improve the flexibility to
support other swap entry size.  And Dave Hansen thinks that this
improves code readability too.

So in this patch, the "bool cluster" parameter of get_swap_pages() is
replaced by "int entry_size".

And nr_swap_entries() trick is used to reduce the binary size when
!CONFIG_TRANSPARENT_HUGE_PAGE.

       text	   data	    bss	    dec	    hex	filename
base  24215	   2028	    340	  26583	   67d7	mm/swapfile.o
head  24123	   2004	    340	  26467	   6763	mm/swapfile.o

Link: http://lkml.kernel.org/r/20180720071845.17920-7-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  2 +-
 mm/swap_slots.c      |  8 ++++----
 mm/swapfile.c        | 16 ++++++++--------
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1a8bd05a335e..8e2c11e692ba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -447,7 +447,7 @@ extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(struct page *page);
 extern void put_swap_page(struct page *page, swp_entry_t entry);
 extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 008ccb22fee6..63a7b4563a57 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -269,8 +269,8 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
 
 	cache->cur = 0;
 	if (swap_slot_cache_active)
-		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, false,
-					   cache->slots);
+		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
+					   cache->slots, 1);
 
 	return cache->nr;
 }
@@ -316,7 +316,7 @@ swp_entry_t get_swap_page(struct page *page)
 
 	if (PageTransHuge(page)) {
 		if (IS_ENABLED(CONFIG_THP_SWAP))
-			get_swap_pages(1, true, &entry);
+			get_swap_pages(1, &entry, HPAGE_PMD_NR);
 		goto out;
 	}
 
@@ -350,7 +350,7 @@ repeat:
 			goto out;
 	}
 
-	get_swap_pages(1, false, &entry);
+	get_swap_pages(1, &entry, 1);
 out:
 	if (mem_cgroup_try_charge_swap(page, entry)) {
 		put_swap_page(page, entry);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 043645e7f0b5..b30dd0642ccf 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -940,18 +940,18 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 
 }
 
-int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
+int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
 {
-	unsigned long nr_pages = cluster ? SWAPFILE_CLUSTER : 1;
+	unsigned long size = swap_entry_size(entry_size);
 	struct swap_info_struct *si, *next;
 	long avail_pgs;
 	int n_ret = 0;
 	int node;
 
 	/* Only single cluster request supported */
-	WARN_ON_ONCE(n_goal > 1 && cluster);
+	WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER);
 
-	avail_pgs = atomic_long_read(&nr_swap_pages) / nr_pages;
+	avail_pgs = atomic_long_read(&nr_swap_pages) / size;
 	if (avail_pgs <= 0)
 		goto noswap;
 
@@ -961,7 +961,7 @@ int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
 	if (n_goal > avail_pgs)
 		n_goal = avail_pgs;
 
-	atomic_long_sub(n_goal * nr_pages, &nr_swap_pages);
+	atomic_long_sub(n_goal * size, &nr_swap_pages);
 
 	spin_lock(&swap_avail_lock);
 
@@ -988,14 +988,14 @@ start_over:
 			spin_unlock(&si->lock);
 			goto nextsi;
 		}
-		if (cluster) {
+		if (size == SWAPFILE_CLUSTER) {
 			if (!(si->flags & SWP_FILE))
 				n_ret = swap_alloc_cluster(si, swp_entries);
 		} else
 			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
 						    n_goal, swp_entries);
 		spin_unlock(&si->lock);
-		if (n_ret || cluster)
+		if (n_ret || size == SWAPFILE_CLUSTER)
 			goto check_out;
 		pr_debug("scan_swap_map of si %d failed to find offset\n",
 			si->type);
@@ -1021,7 +1021,7 @@ nextsi:
 
 check_out:
 	if (n_ret < n_goal)
-		atomic_long_add((long)(n_goal - n_ret) * nr_pages,
+		atomic_long_add((long)(n_goal - n_ret) * size,
 				&nr_swap_pages);
 noswap:
 	return n_ret;
-- 
cgit v1.2.3


From 93065ac753e4443840a057bfef4be71ec766fde9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 21 Aug 2018 21:52:33 -0700
Subject: mm, oom: distinguish blockable mode for mmu notifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are several blockable mmu notifiers which might sleep in
mmu_notifier_invalidate_range_start and that is a problem for the
oom_reaper because it needs to guarantee a forward progress so it cannot
depend on any sleepable locks.

Currently we simply back off and mark an oom victim with blockable mmu
notifiers as done after a short sleep.  That can result in selecting a new
oom victim prematurely because the previous one still hasn't torn its
memory down yet.

We can do much better though.  Even if mmu notifiers use sleepable locks
there is no reason to automatically assume those locks are held.  Moreover
majority of notifiers only care about a portion of the address space and
there is absolutely zero reason to fail when we are unmapping an unrelated
range.  Many notifiers do really block and wait for HW which is harder to
handle and we have to bail out though.

This patch handles the low hanging fruit.
__mmu_notifier_invalidate_range_start gets a blockable flag and callbacks
are not allowed to sleep if the flag is set to false.  This is achieved by
using trylock instead of the sleepable lock for most callbacks and
continue as long as we do not block down the call chain.

I think we can improve that even further because there is a common pattern
to do a range lookup first and then do something about that.  The first
part can be done without a sleeping lock in most cases AFAICS.

The oom_reaper end then simply retries if there is at least one notifier
which couldn't make any progress in !blockable mode.  A retry loop is
already implemented to wait for the mmap_sem and this is basically the
same thing.

The simplest way for driver developers to test this code path is to wrap
userspace code which uses these notifiers into a memcg and set the hard
limit to hit the oom.  This can be done e.g.  after the test faults in all
the mmu notifier managed memory and set the hard limit to something really
small.  Then we are looking for a proper process tear down.

[akpm@linux-foundation.org: coding style fixes]
[akpm@linux-foundation.org: minor code simplification]
Link: http://lkml.kernel.org/r/20180716115058.5559-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Christian König <christian.koenig@amd.com> # AMD notifiers
Acked-by: Leon Romanovsky <leonro@mellanox.com> # mlx and umem_odp
Reported-by: David Rientjes <rientjes@google.com>
Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
Cc: Dennis Dalessandro <dennis.dalessandro@intel.com>
Cc: Sudeep Dutt <sudeep.dutt@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kvm/x86.c                      |  7 ++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  | 43 ++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_userptr.c | 13 +++++++---
 drivers/gpu/drm/radeon/radeon_mn.c      | 22 ++++++++++++++---
 drivers/infiniband/core/umem_odp.c      | 33 +++++++++++++++++++------
 drivers/infiniband/hw/hfi1/mmu_rb.c     | 11 ++++++---
 drivers/infiniband/hw/mlx5/odp.c        |  2 +-
 drivers/misc/mic/scif/scif_dma.c        |  7 ++++--
 drivers/misc/sgi-gru/grutlbpurge.c      |  7 ++++--
 drivers/xen/gntdev.c                    | 44 ++++++++++++++++++++++++++-------
 include/linux/kvm_host.h                |  4 +--
 include/linux/mmu_notifier.h            | 33 +++++++++++++++++++------
 include/linux/oom.h                     |  2 +-
 include/rdma/ib_umem_odp.h              |  3 ++-
 mm/hmm.c                                |  7 ++++--
 mm/mmap.c                               |  2 +-
 mm/mmu_notifier.c                       | 19 +++++++++++---
 mm/oom_kill.c                           | 29 +++++++++++-----------
 virt/kvm/kvm_main.c                     | 15 +++++++----
 19 files changed, 223 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f7dff0457846..4a74a7cf0a8b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7305,8 +7305,9 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end)
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end,
+		bool blockable)
 {
 	unsigned long apic_address;
 
@@ -7317,6 +7318,8 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 	apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
 	if (start <= apic_address && apic_address < end)
 		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
+	return 0;
 }
 
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index a365ea2383d1..e55508b39496 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -178,12 +178,18 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
  *
  * @amn: our notifier
  */
-static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
+static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
 {
-	mutex_lock(&amn->read_lock);
+	if (blockable)
+		mutex_lock(&amn->read_lock);
+	else if (!mutex_trylock(&amn->read_lock))
+		return -EAGAIN;
+
 	if (atomic_inc_return(&amn->recursion) == 1)
 		down_read_non_owner(&amn->lock);
 	mutex_unlock(&amn->read_lock);
+
+	return 0;
 }
 
 /**
@@ -239,10 +245,11 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 						 struct mm_struct *mm,
 						 unsigned long start,
-						 unsigned long end)
+						 unsigned long end,
+						 bool blockable)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -250,17 +257,28 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	amdgpu_mn_read_lock(amn);
+	/* TODO we should be able to split locking for interval tree and
+	 * amdgpu_mn_invalidate_node
+	 */
+	if (amdgpu_mn_read_lock(amn, blockable))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 
+		if (!blockable) {
+			amdgpu_mn_read_unlock(amn);
+			return -EAGAIN;
+		}
+
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
 		amdgpu_mn_invalidate_node(node, start, end);
 	}
+
+	return 0;
 }
 
 /**
@@ -275,10 +293,11 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 						 struct mm_struct *mm,
 						 unsigned long start,
-						 unsigned long end)
+						 unsigned long end,
+						 bool blockable)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -286,13 +305,19 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	amdgpu_mn_read_lock(amn);
+	if (amdgpu_mn_read_lock(amn, blockable))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 		struct amdgpu_bo *bo;
 
+		if (!blockable) {
+			amdgpu_mn_read_unlock(amn);
+			return -EAGAIN;
+		}
+
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
@@ -304,6 +329,8 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 				amdgpu_amdkfd_evict_userptr(mem, mm);
 		}
 	}
+
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index dcd6e230d16a..2c9b284036d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -112,10 +112,11 @@ static void del_object(struct i915_mmu_object *mo)
 	mo->attached = false;
 }
 
-static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 						       struct mm_struct *mm,
 						       unsigned long start,
-						       unsigned long end)
+						       unsigned long end,
+						       bool blockable)
 {
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
@@ -124,7 +125,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	LIST_HEAD(cancelled);
 
 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
-		return;
+		return 0;
 
 	/* interval ranges are inclusive, but invalidate range is exclusive */
 	end--;
@@ -132,6 +133,10 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	spin_lock(&mn->lock);
 	it = interval_tree_iter_first(&mn->objects, start, end);
 	while (it) {
+		if (!blockable) {
+			spin_unlock(&mn->lock);
+			return -EAGAIN;
+		}
 		/* The mmu_object is released late when destroying the
 		 * GEM object so it is entirely possible to gain a
 		 * reference on an object in the process of being freed
@@ -154,6 +159,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 
 	if (!list_empty(&cancelled))
 		flush_workqueue(mn->wq);
+
+	return 0;
 }
 
 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index abd24975c9b1..f8b35df44c60 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -118,19 +118,27 @@ static void radeon_mn_release(struct mmu_notifier *mn,
  * We block for all BOs between start and end to be idle and
  * unmap them by move them into system domain again.
  */
-static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
+static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 					     struct mm_struct *mm,
 					     unsigned long start,
-					     unsigned long end)
+					     unsigned long end,
+					     bool blockable)
 {
 	struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
 	struct ttm_operation_ctx ctx = { false, false };
 	struct interval_tree_node *it;
+	int ret = 0;
 
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	mutex_lock(&rmn->lock);
+	/* TODO we should be able to split locking for interval tree and
+	 * the tear down.
+	 */
+	if (blockable)
+		mutex_lock(&rmn->lock);
+	else if (!mutex_trylock(&rmn->lock))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&rmn->objects, start, end);
 	while (it) {
@@ -138,6 +146,11 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 		struct radeon_bo *bo;
 		long r;
 
+		if (!blockable) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+
 		node = container_of(it, struct radeon_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
@@ -166,7 +179,10 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 		}
 	}
 	
+out_unlock:
 	mutex_unlock(&rmn->lock);
+
+	return ret;
 }
 
 static const struct mmu_notifier_ops radeon_mn_ops = {
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 182436b92ba9..6ec748eccff7 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -186,6 +186,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
 	rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
 				      ULLONG_MAX,
 				      ib_umem_notifier_release_trampoline,
+				      true,
 				      NULL);
 	up_read(&context->umem_rwsem);
 }
@@ -207,22 +208,31 @@ static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
 	return 0;
 }
 
-static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    struct mm_struct *mm,
 						    unsigned long start,
-						    unsigned long end)
+						    unsigned long end,
+						    bool blockable)
 {
 	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+	int ret;
 
 	if (!context->invalidate_range)
-		return;
+		return 0;
+
+	if (blockable)
+		down_read(&context->umem_rwsem);
+	else if (!down_read_trylock(&context->umem_rwsem))
+		return -EAGAIN;
 
 	ib_ucontext_notifier_start_account(context);
-	down_read(&context->umem_rwsem);
-	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+	ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
 				      end,
-				      invalidate_range_start_trampoline, NULL);
+				      invalidate_range_start_trampoline,
+				      blockable, NULL);
 	up_read(&context->umem_rwsem);
+
+	return ret;
 }
 
 static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
@@ -242,10 +252,15 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
 	if (!context->invalidate_range)
 		return;
 
+	/*
+	 * TODO: we currently bail out if there is any sleepable work to be done
+	 * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
+	 * here. But this is ugly and fragile.
+	 */
 	down_read(&context->umem_rwsem);
 	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
 				      end,
-				      invalidate_range_end_trampoline, NULL);
+				      invalidate_range_end_trampoline, true, NULL);
 	up_read(&context->umem_rwsem);
 	ib_ucontext_notifier_end_account(context);
 }
@@ -798,6 +813,7 @@ EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
 int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 				  u64 start, u64 last,
 				  umem_call_back cb,
+				  bool blockable,
 				  void *cookie)
 {
 	int ret_val = 0;
@@ -809,6 +825,9 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 
 	for (node = rbt_ib_umem_iter_first(root, start, last - 1);
 			node; node = next) {
+		/* TODO move the blockable decision up to the callback */
+		if (!blockable)
+			return -EAGAIN;
 		next = rbt_ib_umem_iter_next(node, start, last - 1);
 		umem = container_of(node, struct ib_umem_odp, interval_tree);
 		ret_val = cb(umem->umem, start, last, cookie) || ret_val;
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 70aceefe14d5..e1c7996c018e 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -67,9 +67,9 @@ struct mmu_rb_handler {
 
 static unsigned long mmu_node_start(struct mmu_rb_node *);
 static unsigned long mmu_node_last(struct mmu_rb_node *);
-static void mmu_notifier_range_start(struct mmu_notifier *,
+static int mmu_notifier_range_start(struct mmu_notifier *,
 				     struct mm_struct *,
-				     unsigned long, unsigned long);
+				     unsigned long, unsigned long, bool);
 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
 					   unsigned long, unsigned long);
 static void do_remove(struct mmu_rb_handler *handler,
@@ -284,10 +284,11 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
 	handler->ops->remove(handler->ops_arg, node);
 }
 
-static void mmu_notifier_range_start(struct mmu_notifier *mn,
+static int mmu_notifier_range_start(struct mmu_notifier *mn,
 				     struct mm_struct *mm,
 				     unsigned long start,
-				     unsigned long end)
+				     unsigned long end,
+				     bool blockable)
 {
 	struct mmu_rb_handler *handler =
 		container_of(mn, struct mmu_rb_handler, mn);
@@ -313,6 +314,8 @@ static void mmu_notifier_range_start(struct mmu_notifier *mn,
 
 	if (added)
 		queue_work(handler->wq, &handler->del_work);
+
+	return 0;
 }
 
 /*
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index f1a87a690a4c..d216e0d2921d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -488,7 +488,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 
 	down_read(&ctx->umem_rwsem);
 	rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
-				      mr_leaf_free, imr);
+				      mr_leaf_free, true, imr);
 	up_read(&ctx->umem_rwsem);
 
 	wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
index 63d6246d6dff..6369aeaa7056 100644
--- a/drivers/misc/mic/scif/scif_dma.c
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -200,15 +200,18 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn,
 	schedule_work(&scif_info.misc_work);
 }
 
-static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						     struct mm_struct *mm,
 						     unsigned long start,
-						     unsigned long end)
+						     unsigned long end,
+						     bool blockable)
 {
 	struct scif_mmu_notif	*mmn;
 
 	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
 	scif_rma_destroy_tcw(mmn, start, end - start);
+
+	return 0;
 }
 
 static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index a3454eb56fbf..be28f05bfafa 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -219,9 +219,10 @@ void gru_flush_all_tlb(struct gru_state *gru)
 /*
  * MMUOPS notifier callout functions
  */
-static void gru_invalidate_range_start(struct mmu_notifier *mn,
+static int gru_invalidate_range_start(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
-				       unsigned long start, unsigned long end)
+				       unsigned long start, unsigned long end,
+				       bool blockable)
 {
 	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
 						 ms_notifier);
@@ -231,6 +232,8 @@ static void gru_invalidate_range_start(struct mmu_notifier *mn,
 	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
 		start, end, atomic_read(&gms->ms_range_active));
 	gru_flush_tlb_range(gms, start, end - start);
+
+	return 0;
 }
 
 static void gru_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index c866a62f766d..57390c7666e5 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -479,18 +479,25 @@ static const struct vm_operations_struct gntdev_vmops = {
 
 /* ------------------------------------------------------------------ */
 
+static bool in_range(struct gntdev_grant_map *map,
+			      unsigned long start, unsigned long end)
+{
+	if (!map->vma)
+		return false;
+	if (map->vma->vm_start >= end)
+		return false;
+	if (map->vma->vm_end <= start)
+		return false;
+
+	return true;
+}
+
 static void unmap_if_in_range(struct gntdev_grant_map *map,
 			      unsigned long start, unsigned long end)
 {
 	unsigned long mstart, mend;
 	int err;
 
-	if (!map->vma)
-		return;
-	if (map->vma->vm_start >= end)
-		return;
-	if (map->vma->vm_end <= start)
-		return;
 	mstart = max(start, map->vma->vm_start);
 	mend   = min(end,   map->vma->vm_end);
 	pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
@@ -503,21 +510,40 @@ static void unmap_if_in_range(struct gntdev_grant_map *map,
 	WARN_ON(err);
 }
 
-static void mn_invl_range_start(struct mmu_notifier *mn,
+static int mn_invl_range_start(struct mmu_notifier *mn,
 				struct mm_struct *mm,
-				unsigned long start, unsigned long end)
+				unsigned long start, unsigned long end,
+				bool blockable)
 {
 	struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
 	struct gntdev_grant_map *map;
+	int ret = 0;
+
+	/* TODO do we really need a mutex here? */
+	if (blockable)
+		mutex_lock(&priv->lock);
+	else if (!mutex_trylock(&priv->lock))
+		return -EAGAIN;
 
-	mutex_lock(&priv->lock);
 	list_for_each_entry(map, &priv->maps, next) {
+		if (in_range(map, start, end)) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
 		unmap_if_in_range(map, start, end);
 	}
 	list_for_each_entry(map, &priv->freeable_maps, next) {
+		if (in_range(map, start, end)) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
 		unmap_if_in_range(map, start, end);
 	}
+
+out_unlock:
 	mutex_unlock(&priv->lock);
+
+	return ret;
 }
 
 static void mn_release(struct mmu_notifier *mn,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c7362dd2faa..0205aee44ded 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1289,8 +1289,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end);
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end, bool blockable);
 
 #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 392e6af82701..133ba78820ee 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -151,13 +151,15 @@ struct mmu_notifier_ops {
 	 * address space but may still be referenced by sptes until
 	 * the last refcount is dropped.
 	 *
-	 * If both of these callbacks cannot block, and invalidate_range
-	 * cannot block, mmu_notifier_ops.flags should have
-	 * MMU_INVALIDATE_DOES_NOT_BLOCK set.
+	 * If blockable argument is set to false then the callback cannot
+	 * sleep and has to return with -EAGAIN. 0 should be returned
+	 * otherwise.
+	 *
 	 */
-	void (*invalidate_range_start)(struct mmu_notifier *mn,
+	int (*invalidate_range_start)(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
-				       unsigned long start, unsigned long end);
+				       unsigned long start, unsigned long end,
+				       bool blockable);
 	void (*invalidate_range_end)(struct mmu_notifier *mn,
 				     struct mm_struct *mm,
 				     unsigned long start, unsigned long end);
@@ -229,8 +231,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
 				     unsigned long address);
 extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 				      unsigned long address, pte_t pte);
-extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
-				  unsigned long start, unsigned long end);
+extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end,
+				  bool blockable);
 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end,
 				  bool only_end);
@@ -281,7 +284,15 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
 	if (mm_has_notifiers(mm))
-		__mmu_notifier_invalidate_range_start(mm, start, end);
+		__mmu_notifier_invalidate_range_start(mm, start, end, true);
+}
+
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	if (mm_has_notifiers(mm))
+		return __mmu_notifier_invalidate_range_start(mm, start, end, false);
+	return 0;
 }
 
 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -461,6 +472,12 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 {
 }
 
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	return 0;
+}
+
 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6adac113e96d..92f70e4c6252 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,7 +95,7 @@ static inline int check_stable_address_space(struct mm_struct *mm)
 	return 0;
 }
 
-void __oom_reap_task_mm(struct mm_struct *mm);
+bool __oom_reap_task_mm(struct mm_struct *mm);
 
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 6a17f856f841..381cdf5a9bd1 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -119,7 +119,8 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
  */
 int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 				  u64 start, u64 end,
-				  umem_call_back cb, void *cookie);
+				  umem_call_back cb,
+				  bool blockable, void *cookie);
 
 /*
  * Find first region intersecting with address range.
diff --git a/mm/hmm.c b/mm/hmm.c
index 76e7a058b32f..0b0554591610 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -177,16 +177,19 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	up_write(&hmm->mirrors_sem);
 }
 
-static void hmm_invalidate_range_start(struct mmu_notifier *mn,
+static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
 				       unsigned long start,
-				       unsigned long end)
+				       unsigned long end,
+				       bool blockable)
 {
 	struct hmm *hmm = mm->hmm;
 
 	VM_BUG_ON(!hmm);
 
 	atomic_inc(&hmm->sequence);
+
+	return 0;
 }
 
 static void hmm_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/mm/mmap.c b/mm/mmap.c
index 8d6449e74431..bb2a7e097c7d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3064,7 +3064,7 @@ void exit_mmap(struct mm_struct *mm)
 		 * reliably test it.
 		 */
 		mutex_lock(&oom_lock);
-		__oom_reap_task_mm(mm);
+		(void)__oom_reap_task_mm(mm);
 		mutex_unlock(&oom_lock);
 
 		set_bit(MMF_OOM_SKIP, &mm->flags);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index eff6b88a993f..82bb1a939c0e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -174,18 +174,29 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
 	srcu_read_unlock(&srcu, id);
 }
 
-void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
-				  unsigned long start, unsigned long end)
+int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end,
+				  bool blockable)
 {
 	struct mmu_notifier *mn;
+	int ret = 0;
 	int id;
 
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
-		if (mn->ops->invalidate_range_start)
-			mn->ops->invalidate_range_start(mn, mm, start, end);
+		if (mn->ops->invalidate_range_start) {
+			int _ret = mn->ops->invalidate_range_start(mn, mm, start, end, blockable);
+			if (_ret) {
+				pr_info("%pS callback failed with %d in %sblockable context.\n",
+						mn->ops->invalidate_range_start, _ret,
+						!blockable ? "non-" : "");
+				ret = _ret;
+			}
+		}
 	}
 	srcu_read_unlock(&srcu, id);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 412f43453a68..be31a1e0fe78 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -487,9 +487,10 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
 static struct task_struct *oom_reaper_list;
 static DEFINE_SPINLOCK(oom_reaper_lock);
 
-void __oom_reap_task_mm(struct mm_struct *mm)
+bool __oom_reap_task_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	bool ret = true;
 
 	/*
 	 * Tell all users of get_user/copy_from_user etc... that the content
@@ -519,12 +520,17 @@ void __oom_reap_task_mm(struct mm_struct *mm)
 			struct mmu_gather tlb;
 
 			tlb_gather_mmu(&tlb, mm, start, end);
-			mmu_notifier_invalidate_range_start(mm, start, end);
+			if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
+				ret = false;
+				continue;
+			}
 			unmap_page_range(&tlb, vma, start, end, NULL);
 			mmu_notifier_invalidate_range_end(mm, start, end);
 			tlb_finish_mmu(&tlb, start, end);
 		}
 	}
+
+	return ret;
 }
 
 static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
@@ -553,18 +559,6 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 		goto unlock_oom;
 	}
 
-	/*
-	 * If the mm has invalidate_{start,end}() notifiers that could block,
-	 * sleep to give the oom victim some more time.
-	 * TODO: we really want to get rid of this ugly hack and make sure that
-	 * notifiers cannot block for unbounded amount of time
-	 */
-	if (mm_has_blockable_invalidate_notifiers(mm)) {
-		up_read(&mm->mmap_sem);
-		schedule_timeout_idle(HZ);
-		goto unlock_oom;
-	}
-
 	/*
 	 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
 	 * work on the mm anymore. The check for MMF_OOM_SKIP must run
@@ -579,7 +573,12 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	trace_start_task_reaping(tsk->pid);
 
-	__oom_reap_task_mm(mm);
+	/* failed to reap part of the address space. Try again later */
+	if (!__oom_reap_task_mm(mm)) {
+		up_read(&mm->mmap_sem);
+		ret = false;
+		goto unlock_oom;
+	}
 
 	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
 			task_pid_nr(tsk), tsk->comm,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9263ead9fd32..0116b449b993 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -140,9 +140,10 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
 static unsigned long long kvm_createvm_count;
 static unsigned long long kvm_active_vms;
 
-__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end)
+__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end, bool blockable)
 {
+	return 0;
 }
 
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
@@ -360,13 +361,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
-static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    struct mm_struct *mm,
 						    unsigned long start,
-						    unsigned long end)
+						    unsigned long end,
+						    bool blockable)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
 	int need_tlb_flush = 0, idx;
+	int ret;
 
 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
@@ -384,9 +387,11 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 
 	spin_unlock(&kvm->mmu_lock);
 
-	kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+	ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable);
 
 	srcu_read_unlock(&kvm->srcu, idx);
+
+	return ret;
 }
 
 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
-- 
cgit v1.2.3


From a670468f5e0b5fad4db6e4d195f15915dc2a35c1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 21 Aug 2018 21:53:06 -0700
Subject: mm: zero out the vma in vma_init()

Rather than in vm_area_alloc().  To ensure that the various oddball
stack-based vmas are in a good state.  Some of the callers were zeroing
them out, others were not.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/process.c | 9 ++++-----
 fs/hugetlbfs/inode.c      | 2 --
 include/linux/mm.h        | 1 +
 kernel/fork.c             | 3 ++-
 mm/mempolicy.c            | 1 -
 mm/shmem.c                | 1 -
 6 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d9c299133111..82ab015bf42b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -330,16 +330,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
  * atomic helpers. Insert it into the gate_vma so that it is visible
  * through ptrace and /proc/<pid>/mem.
  */
-static struct vm_area_struct gate_vma = {
-	.vm_start	= 0xffff0000,
-	.vm_end		= 0xffff0000 + PAGE_SIZE,
-	.vm_flags	= VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC,
-};
+static struct vm_area_struct gate_vma;
 
 static int __init gate_vma_init(void)
 {
 	vma_init(&gate_vma, NULL);
 	gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+	gate_vma.vm_start = 0xffff0000;
+	gate_vma.vm_end	= 0xffff0000 + PAGE_SIZE;
+	gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
 	return 0;
 }
 arch_initcall(gate_vma_init);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 346a146c7617..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -410,7 +410,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 	int i, freed = 0;
 	bool truncate_op = (lend == LLONG_MAX);
 
-	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
 	vma_init(&pseudo_vma, current->mm);
 	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
 	pagevec_init(&pvec);
@@ -595,7 +594,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 	 * allocation routines.  If NUMA is configured, use page index
 	 * as input to create an allocation policy.
 	 */
-	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
 	vma_init(&pseudo_vma, mm);
 	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
 	pseudo_vma.vm_file = file;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a3cae495f9ce..3a4b87d1a59a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -456,6 +456,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
 	static const struct vm_operations_struct dummy_vm_ops = {};
 
+	memset(vma, 0, sizeof(*vma));
 	vma->vm_mm = mm;
 	vma->vm_ops = &dummy_vm_ops;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5ee74c113381..8c760effa42e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -310,8 +310,9 @@ static struct kmem_cache *mm_cachep;
 
 struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
 {
-	struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	struct vm_area_struct *vma;
 
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (vma)
 		vma_init(vma, mm);
 	return vma;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 01f1a14facc4..4861ba738d6f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2504,7 +2504,6 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 			goto put_new;
 
 		/* Create pseudo-vma that contains just the policy */
-		memset(&pvma, 0, sizeof(struct vm_area_struct));
 		vma_init(&pvma, NULL);
 		pvma.vm_end = TASK_SIZE;	/* policy covers entire file */
 		mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
diff --git a/mm/shmem.c b/mm/shmem.c
index c48c79018a7c..fb04baacc9fa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1421,7 +1421,6 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
 		struct shmem_inode_info *info, pgoff_t index)
 {
 	/* Create a pseudo vma that just contains the policy */
-	memset(vma, 0, sizeof(*vma));
 	vma_init(vma, NULL);
 	/* Bias interleave by inode number to distribute better across nodes */
 	vma->vm_pgoff = index + info->vfs_inode.i_ino;
-- 
cgit v1.2.3


From 89696701ea847786f88ccc1c580fb4c3c20c4a3a Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:53:24 -0700
Subject: mm: remove zone_id() and make use of zone_idx() in is_dev_zone()

is_dev_zone() is using zone_id() to check if the zone is ZONE_DEVICE.
zone_id() looks pretty much the same as zone_idx(), and while the use of
zone_idx() is quite spread in the kernel, zone_id() is only being used by
is_dev_zone().

This patch removes zone_id() and makes is_dev_zone() use zone_idx() to
check the zone, so we do not have two things with the same functionality
around.

Link: http://lkml.kernel.org/r/20180730133718.28683-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 32699b2dc52a..3d4577a3ae7e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,25 +755,6 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
 	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
 }
 
-static inline int zone_id(const struct zone *zone)
-{
-	struct pglist_data *pgdat = zone->zone_pgdat;
-
-	return zone - pgdat->node_zones;
-}
-
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_dev_zone(const struct zone *zone)
-{
-	return zone_id(zone) == ZONE_DEVICE;
-}
-#else
-static inline bool is_dev_zone(const struct zone *zone)
-{
-	return false;
-}
-#endif
-
 #include <linux/memory_hotplug.h>
 
 void build_all_zonelists(pg_data_t *pgdat);
@@ -824,6 +805,18 @@ static inline int local_memory_node(int node_id) { return node_id; };
  */
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return zone_idx(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return false;
+}
+#endif
+
 /*
  * Returns true if a zone has pages managed by the buddy allocator.
  * All the reclaim decisions have to use this function rather than
-- 
cgit v1.2.3


From c1093b746c0576ed81c4d568d1e39cab651d37e6 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Tue, 21 Aug 2018 21:53:32 -0700
Subject: mm: access zone->node via zone_to_nid() and zone_set_nid()

zone->node is configured only when CONFIG_NUMA=y, so it is a good idea to
have inline functions to access this field in order to avoid ifdef's in c
files.

Link: http://lkml.kernel.org/r/20180730101757.28058-3-osalvador@techadventures.net
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h     |  9 ---------
 include/linux/mmzone.h | 26 ++++++++++++++++++++------
 mm/mempolicy.c         |  4 ++--
 mm/mm_init.c           |  9 ++-------
 mm/page_alloc.c        | 10 ++++------
 5 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3a4b87d1a59a..a55f5389c491 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -960,15 +960,6 @@ static inline int page_zone_id(struct page *page)
 	return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
 }
 
-static inline int zone_to_nid(struct zone *zone)
-{
-#ifdef CONFIG_NUMA
-	return zone->node;
-#else
-	return 0;
-#endif
-}
-
 #ifdef NODE_NOT_IN_PAGE_FLAGS
 extern int page_to_nid(const struct page *page);
 #else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3d4577a3ae7e..1e22d96734e0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -834,6 +834,25 @@ static inline bool populated_zone(struct zone *zone)
 	return zone->present_pages;
 }
 
+#ifdef CONFIG_NUMA
+static inline int zone_to_nid(struct zone *zone)
+{
+	return zone->node;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid)
+{
+	zone->node = nid;
+}
+#else
+static inline int zone_to_nid(struct zone *zone)
+{
+	return 0;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid) {}
+#endif
+
 extern int movable_zone;
 
 #ifdef CONFIG_HIGHMEM
@@ -949,12 +968,7 @@ static inline int zonelist_zone_idx(struct zoneref *zoneref)
 
 static inline int zonelist_node_idx(struct zoneref *zoneref)
 {
-#ifdef CONFIG_NUMA
-	/* zone_to_nid not available in this context */
-	return zoneref->zone->node;
-#else
-	return 0;
-#endif /* CONFIG_NUMA */
+	return zone_to_nid(zoneref->zone);
 }
 
 struct zoneref *__next_zones_zonelist(struct zoneref *z,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4861ba738d6f..da858f794eb6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1784,7 +1784,7 @@ unsigned int mempolicy_slab_node(void)
 		zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK];
 		z = first_zones_zonelist(zonelist, highest_zoneidx,
 							&policy->v.nodes);
-		return z->zone ? z->zone->node : node;
+		return z->zone ? zone_to_nid(z->zone) : node;
 	}
 
 	default:
@@ -2326,7 +2326,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 				node_zonelist(numa_node_id(), GFP_HIGHUSER),
 				gfp_zone(GFP_HIGHUSER),
 				&pol->v.nodes);
-		polnid = z->zone->node;
+		polnid = zone_to_nid(z->zone);
 		break;
 
 	default:
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5b72266b4b03..6838a530789b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -53,13 +53,8 @@ void __init mminit_verify_zonelist(void)
 				zone->name);
 
 			/* Iterate the zonelist */
-			for_each_zone_zonelist(zone, z, zonelist, zoneid) {
-#ifdef CONFIG_NUMA
-				pr_cont("%d:%s ", zone->node, zone->name);
-#else
-				pr_cont("0:%s ", zone->name);
-#endif /* CONFIG_NUMA */
-			}
+			for_each_zone_zonelist(zone, z, zonelist, zoneid)
+				pr_cont("%d:%s ", zone_to_nid(zone), zone->name);
 			pr_cont("\n");
 		}
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 455cc3bfae99..6b2324bc61db 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2909,10 +2909,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
 	if (!static_branch_likely(&vm_numa_stat_key))
 		return;
 
-	if (z->node != numa_node_id())
+	if (zone_to_nid(z) != numa_node_id())
 		local_stat = NUMA_OTHER;
 
-	if (z->node == preferred_zone->node)
+	if (zone_to_nid(z) == zone_to_nid(preferred_zone))
 		__inc_numa_state(z, NUMA_HIT);
 	else {
 		__inc_numa_state(z, NUMA_MISS);
@@ -5278,7 +5278,7 @@ int local_memory_node(int node)
 	z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
 				   gfp_zone(GFP_KERNEL),
 				   NULL);
-	return z->zone->node;
+	return zone_to_nid(z->zone);
 }
 #endif
 
@@ -6299,9 +6299,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 		 * And all highmem pages will be managed by the buddy system.
 		 */
 		zone->managed_pages = freesize;
-#ifdef CONFIG_NUMA
-		zone->node = nid;
-#endif
+		zone_set_nid(zone, nid);
 		zone->name = zone_names[j];
 		zone->zone_pgdat = pgdat;
 		spin_lock_init(&zone->lock);
-- 
cgit v1.2.3


From 03e85f9d5f1f8c74f127c5f7a87575d74a78d248 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:53:43 -0700
Subject: mm/page_alloc: Introduce free_area_init_core_hotplug

Currently, whenever a new node is created/re-used from the memhotplug
path, we call free_area_init_node()->free_area_init_core().  But there is
some code that we do not really need to run when we are coming from such
path.

free_area_init_core() performs the following actions:

1) Initializes pgdat internals, such as spinlock, waitqueues and more.
2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on
   when creating hash tables.
3) Account number of managed_pages per zone, substracting dma_reserved and
   memmap pages.
4) Initializes some fields of the zone structure data
5) Calls init_currently_empty_zone to initialize all the freelists
6) Calls memmap_init to initialize all pages belonging to certain zone

When called from memhotplug path, free_area_init_core() only performs
actions #1 and #4.

Action #2 is pointless as the zones do not have any pages since either the
node was freed, or we are re-using it, eitherway all zones belonging to
this node should have 0 pages.  For the same reason, action #3 results
always in manages_pages being 0.

Action #5 and #6 are performed later on when onlining the pages:
 online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone()
 online_pages()->move_pfn_range_to_zone()->memmap_init_zone()

This patch does two things:

First, moves the node/zone initializtion to their own function, so it
allows us to create a small version of free_area_init_core, where we only
perform:

1) Initialization of pgdat internals, such as spinlock, waitqueues and more
4) Initialization of some fields of the zone structure data

These two functions are: pgdat_init_internals() and zone_init_internals().

The second thing this patch does, is to introduce
free_area_init_core_hotplug(), the memhotplug version of
free_area_init_core():

Currently, we call free_area_init_node() from the memhotplug path.  In
there, we set some pgdat's fields, and call calculate_node_totalpages().
calculate_node_totalpages() calculates the # of pages the node has.

Since the node is either new, or we are re-using it, the zones belonging
to this node should not have any pages, so there is no point to calculate
this now.

Actually, we re-set these values to 0 later on with the calls to:

reset_node_managed_pages()
reset_node_present_pages()

The # of pages per node and the # of pages per zone will be calculated when
onlining the pages:

online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range()
online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range()

Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace
__paginginit with __init, so their code gets freed up.

[osalvador@techadventures.net: fix section usage]
  Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net
[osalvador@suse.de: v6]
  Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net
Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  1 +
 include/linux/mm.h             |  2 +-
 mm/memory_hotplug.c            | 16 +++------
 mm/page_alloc.c                | 78 +++++++++++++++++++++++++++++-------------
 4 files changed, 61 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4e9828cda7a2..34a28227068d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 static inline void remove_memory(int nid, u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+extern void __ref free_area_init_core_hotplug(int nid);
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a55f5389c491..a9e733b5fb76 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2015,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
 
 extern void __init pagecache_init(void);
 extern void free_area_init(unsigned long * zones_size);
-extern void free_area_init_node(int nid, unsigned long * zones_size,
+extern void __init free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
 extern void free_initmem(void);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4eb6e824a80c..9eea6e809a4e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -982,8 +982,6 @@ static void reset_node_present_pages(pg_data_t *pgdat)
 static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 {
 	struct pglist_data *pgdat;
-	unsigned long zones_size[MAX_NR_ZONES] = {0};
-	unsigned long zholes_size[MAX_NR_ZONES] = {0};
 	unsigned long start_pfn = PFN_DOWN(start);
 
 	pgdat = NODE_DATA(nid);
@@ -1006,8 +1004,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 
 	/* we can use NODE_DATA(nid) from here */
 
+	pgdat->node_id = nid;
+	pgdat->node_start_pfn = start_pfn;
+
 	/* init node's zones as empty zones, we don't have any present pages.*/
-	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+	free_area_init_core_hotplug(nid);
 	pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 
 	/*
@@ -1016,19 +1017,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 	 */
 	build_all_zonelists(pgdat);
 
-	/*
-	 * zone->managed_pages is set to an approximate value in
-	 * free_area_init_core(), which will cause
-	 * /sys/device/system/node/nodeX/meminfo has wrong data.
-	 * So reset it to 0 before any memory is onlined.
-	 */
-	reset_node_managed_pages(pgdat);
-
 	/*
 	 * When memory is hot-added, all the memory is in offline state. So
 	 * clear all zones' present_pages because they will be updated in
 	 * online_pages() and offline_pages().
 	 */
+	reset_node_managed_pages(pgdat);
 	reset_node_present_pages(pgdat);
 
 	return pgdat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b939bd1bff9..c677c1506d73 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 
 /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __meminit set_pageblock_order(void)
+void __init set_pageblock_order(void)
 {
 	unsigned int order;
 
@@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void)
  * include/linux/pageblock-flags.h for the values of pageblock_order based on
  * the kernel config
  */
-void __meminit set_pageblock_order(void)
+void __init set_pageblock_order(void)
 {
 }
 
 #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 
-static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages,
+static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 						unsigned long present_pages)
 {
 	unsigned long pages = spanned_pages;
@@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
 static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
 #endif
 
-/*
- * Set up the zone data structures:
- *   - mark all pages reserved
- *   - mark all memory queues empty
- *   - clear the memory bitmaps
- *
- * NOTE: pgdat should get zeroed by caller.
- */
-static void __meminit free_area_init_core(struct pglist_data *pgdat)
+static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 {
-	enum zone_type j;
-	int nid = pgdat->node_id;
-
 	pgdat_resize_init(pgdat);
 
 	pgdat_init_numabalancing(pgdat);
@@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
 	pgdat_page_ext_init(pgdat);
 	spin_lock_init(&pgdat->lru_lock);
 	lruvec_init(node_lruvec(pgdat));
+}
+
+static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
+							unsigned long remaining_pages)
+{
+	zone->managed_pages = remaining_pages;
+	zone_set_nid(zone, nid);
+	zone->name = zone_names[idx];
+	zone->zone_pgdat = NODE_DATA(nid);
+	spin_lock_init(&zone->lock);
+	zone_seqlock_init(zone);
+	zone_pcp_init(zone);
+}
+
+/*
+ * Set up the zone data structures
+ * - init pgdat internals
+ * - init all zones belonging to this node
+ *
+ * NOTE: this function is only called during memory hotplug
+ */
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __ref free_area_init_core_hotplug(int nid)
+{
+	enum zone_type z;
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	pgdat_init_internals(pgdat);
+	for (z = 0; z < MAX_NR_ZONES; z++)
+		zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+}
+#endif
+
+/*
+ * Set up the zone data structures:
+ *   - mark all pages reserved
+ *   - mark all memory queues empty
+ *   - clear the memory bitmaps
+ *
+ * NOTE: pgdat should get zeroed by caller.
+ * NOTE: this function is only called during early init.
+ */
+static void __init free_area_init_core(struct pglist_data *pgdat)
+{
+	enum zone_type j;
+	int nid = pgdat->node_id;
 
+	pgdat_init_internals(pgdat);
 	pgdat->per_cpu_nodestats = &boot_nodestats;
 
 	for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
 		 * when the bootmem allocator frees pages into the buddy system.
 		 * And all highmem pages will be managed by the buddy system.
 		 */
-		zone->managed_pages = freesize;
-		zone_set_nid(zone, nid);
-		zone->name = zone_names[j];
-		zone->zone_pgdat = pgdat;
-		spin_lock_init(&zone->lock);
-		zone_seqlock_init(zone);
-		zone_pcp_init(zone);
+		zone_init_internals(zone, j, nid, freesize);
 
 		if (!size)
 			continue;
@@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
 #endif
 
-void __meminit free_area_init_node(int nid, unsigned long *zones_size,
+void __init free_area_init_node(int nid, unsigned long *zones_size,
 				   unsigned long node_start_pfn,
 				   unsigned long *zholes_size)
 {
@@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size,
  * may be accessed (for example page_to_pfn() on some configuration accesses
  * flags). We must explicitly zero those struct pages.
  */
-void __meminit zero_resv_unavail(void)
+void __init zero_resv_unavail(void)
 {
 	phys_addr_t start, end;
 	unsigned long pfn;
-- 
cgit v1.2.3


From 3d8b38eb81cac81395f6a823f6bf401b327268e6 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 21 Aug 2018 21:53:54 -0700
Subject: mm, oom: introduce memory.oom.group

For some workloads an intervention from the OOM killer can be painful.
Killing a random task can bring the workload into an inconsistent state.

Historically, there are two common solutions for this
problem:
1) enabling panic_on_oom,
2) using a userspace daemon to monitor OOMs and kill
   all outstanding processes.

Both approaches have their downsides: rebooting on each OOM is an obvious
waste of capacity, and handling all in userspace is tricky and requires a
userspace agent, which will monitor all cgroups for OOMs.

In most cases an in-kernel after-OOM cleaning-up mechanism can eliminate
the necessity of enabling panic_on_oom.  Also, it can simplify the cgroup
management for userspace applications.

This commit introduces a new knob for cgroup v2 memory controller:
memory.oom.group.  The knob determines whether the cgroup should be
treated as an indivisible workload by the OOM killer.  If set, all tasks
belonging to the cgroup or to its descendants (if the memory cgroup is not
a leaf cgroup) are killed together or not at all.

To determine which cgroup has to be killed, we do traverse the cgroup
hierarchy from the victim task's cgroup up to the OOMing cgroup (or root)
and looking for the highest-level cgroup with memory.oom.group set.

Tasks with the OOM protection (oom_score_adj set to -1000) are treated as
an exception and are never killed.

This patch doesn't change the OOM victim selection algorithm.

Link: http://lkml.kernel.org/r/20180802003201.817-4-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v2.rst | 18 +++++++
 include/linux/memcontrol.h              | 18 +++++++
 mm/memcontrol.c                         | 93 +++++++++++++++++++++++++++++++++
 mm/oom_kill.c                           | 30 +++++++++++
 4 files changed, 159 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 1746131bc9cb..184193bcb262 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1072,6 +1072,24 @@ PAGE_SIZE multiple when read back.
 	high limit is used and monitored properly, this limit's
 	utility is limited to providing the final safety net.
 
+  memory.oom.group
+	A read-write single value file which exists on non-root
+	cgroups.  The default value is "0".
+
+	Determines whether the cgroup should be treated as
+	an indivisible workload by the OOM killer. If set,
+	all tasks belonging to the cgroup or to its descendants
+	(if the memory cgroup is not a leaf cgroup) are killed
+	together or not at all. This can be used to avoid
+	partial kills to guarantee workload integrity.
+
+	Tasks with the OOM protection (oom_score_adj set to -1000)
+	are treated as an exception and are never killed.
+
+	If the OOM killer is invoked in a cgroup, it's not going
+	to kill any tasks outside of this cgroup, regardless
+	memory.oom.group values of ancestor cgroups.
+
   memory.events
 	A read-only flat-keyed file which exists on non-root cgroups.
 	The following entries are defined.  Unless specified
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0e6c515fb698..652f602167df 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -225,6 +225,11 @@ struct mem_cgroup {
 	 */
 	bool use_hierarchy;
 
+	/*
+	 * Should the OOM killer kill all belonging tasks, had it kill one?
+	 */
+	bool oom_group;
+
 	/* protected by memcg_oom_lock */
 	bool		oom_lock;
 	int		under_oom;
@@ -542,6 +547,9 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 }
 
 bool mem_cgroup_oom_synchronize(bool wait);
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+					    struct mem_cgroup *oom_domain);
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
@@ -1001,6 +1009,16 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
+static inline struct mem_cgroup *mem_cgroup_get_oom_group(
+	struct task_struct *victim, struct mem_cgroup *oom_domain)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+}
+
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 					     int idx)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 59c14c988143..4ead5a4817de 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1776,6 +1776,62 @@ cleanup:
 	return true;
 }
 
+/**
+ * mem_cgroup_get_oom_group - get a memory cgroup to clean up after OOM
+ * @victim: task to be killed by the OOM killer
+ * @oom_domain: memcg in case of memcg OOM, NULL in case of system-wide OOM
+ *
+ * Returns a pointer to a memory cgroup, which has to be cleaned up
+ * by killing all belonging OOM-killable tasks.
+ *
+ * Caller has to call mem_cgroup_put() on the returned non-NULL memcg.
+ */
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+					    struct mem_cgroup *oom_domain)
+{
+	struct mem_cgroup *oom_group = NULL;
+	struct mem_cgroup *memcg;
+
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+		return NULL;
+
+	if (!oom_domain)
+		oom_domain = root_mem_cgroup;
+
+	rcu_read_lock();
+
+	memcg = mem_cgroup_from_task(victim);
+	if (memcg == root_mem_cgroup)
+		goto out;
+
+	/*
+	 * Traverse the memory cgroup hierarchy from the victim task's
+	 * cgroup up to the OOMing cgroup (or root) to find the
+	 * highest-level memory cgroup with oom.group set.
+	 */
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+		if (memcg->oom_group)
+			oom_group = memcg;
+
+		if (memcg == oom_domain)
+			break;
+	}
+
+	if (oom_group)
+		css_get(&oom_group->css);
+out:
+	rcu_read_unlock();
+
+	return oom_group;
+}
+
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+	pr_info("Tasks in ");
+	pr_cont_cgroup_path(memcg->css.cgroup);
+	pr_cont(" are going to be killed due to memory.oom.group set\n");
+}
+
 /**
  * lock_page_memcg - lock a page->mem_cgroup binding
  * @page: the page
@@ -5561,6 +5617,37 @@ static int memory_stat_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int memory_oom_group_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%d\n", memcg->oom_group);
+
+	return 0;
+}
+
+static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, oom_group;
+
+	buf = strstrip(buf);
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtoint(buf, 0, &oom_group);
+	if (ret)
+		return ret;
+
+	if (oom_group != 0 && oom_group != 1)
+		return -EINVAL;
+
+	memcg->oom_group = oom_group;
+
+	return nbytes;
+}
+
 static struct cftype memory_files[] = {
 	{
 		.name = "current",
@@ -5602,6 +5689,12 @@ static struct cftype memory_files[] = {
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = memory_stat_show,
 	},
+	{
+		.name = "oom.group",
+		.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE,
+		.seq_show = memory_oom_group_show,
+		.write = memory_oom_group_write,
+	},
 	{ }	/* terminate */
 };
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 330416c67ce5..0e10b864e074 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -908,6 +908,19 @@ static void __oom_kill_process(struct task_struct *victim)
 }
 #undef K
 
+/*
+ * Kill provided task unless it's secured by setting
+ * oom_score_adj to OOM_SCORE_ADJ_MIN.
+ */
+static int oom_kill_memcg_member(struct task_struct *task, void *unused)
+{
+	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+		get_task_struct(task);
+		__oom_kill_process(task);
+	}
+	return 0;
+}
+
 static void oom_kill_process(struct oom_control *oc, const char *message)
 {
 	struct task_struct *p = oc->chosen;
@@ -915,6 +928,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	struct task_struct *victim = p;
 	struct task_struct *child;
 	struct task_struct *t;
+	struct mem_cgroup *oom_group;
 	unsigned int victim_points = 0;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
@@ -968,7 +982,23 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	}
 	read_unlock(&tasklist_lock);
 
+	/*
+	 * Do we need to kill the entire memory cgroup?
+	 * Or even one of the ancestor memory cgroups?
+	 * Check this out before killing the victim task.
+	 */
+	oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
+
 	__oom_kill_process(victim);
+
+	/*
+	 * If necessary, kill all tasks in the selected memory cgroup.
+	 */
+	if (oom_group) {
+		mem_cgroup_print_oom_group(oom_group);
+		mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
+		mem_cgroup_put(oom_group);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 7e8a6304d5419cbf056a59de92939e5eef039c57 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 21 Aug 2018 21:53:58 -0700
Subject: /proc/meminfo: add percpu populated pages count

Currently, percpu memory only exposes allocation and utilization
information via debugfs.  This more or less is only really useful for
understanding the fragmentation and allocation information at a per-chunk
level with a few global counters.  This is also gated behind a config.
BPF and cgroup, for example, have seen an increase in use causing
increased use of percpu memory.  Let's make it easier for someone to
identify how much memory is being used.

This patch adds the "Percpu" stat to meminfo to more easily look up how
much percpu memory is in use.  This number includes the cost for all
allocated backing pages and not just insight at the per a unit, per chunk
level.  Metadata is excluded.  I think excluding metadata is fair because
the backing memory scales with the numbere of cpus and can quickly
outweigh the metadata.  It also makes this calculation light.

Link: http://lkml.kernel.org/r/20180807184723.74919-1-dennisszhou@gmail.com
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  3 +++
 fs/proc/meminfo.c                  |  2 ++
 include/linux/percpu.h             |  2 ++
 mm/percpu.c                        | 29 +++++++++++++++++++++++++++++
 4 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 1605acbb23b6..22b4b00dee31 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -870,6 +870,7 @@ Committed_AS:   100056 kB
 VmallocTotal:   112216 kB
 VmallocUsed:       428 kB
 VmallocChunk:   111088 kB
+Percpu:          62080 kB
 HardwareCorrupted:   0 kB
 AnonHugePages:   49152 kB
 ShmemHugePages:      0 kB
@@ -962,6 +963,8 @@ Committed_AS: The amount of memory presently allocated on the system.
 VmallocTotal: total size of vmalloc memory area
  VmallocUsed: amount of vmalloc area which is used
 VmallocChunk: largest contiguous block of vmalloc area which is free
+      Percpu: Memory allocated to the percpu allocator used to back percpu
+              allocations. This stat excludes the cost of metadata.
 
 ..............................................................................
 
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 2fb04846ed11..edda898714eb 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -7,6 +7,7 @@
 #include <linux/mman.h>
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
+#include <linux/percpu.h>
 #include <linux/quicklist.h>
 #include <linux/seq_file.h>
 #include <linux/swap.h>
@@ -121,6 +122,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		   (unsigned long)VMALLOC_TOTAL >> 10);
 	show_val_kb(m, "VmallocUsed:    ", 0ul);
 	show_val_kb(m, "VmallocChunk:   ", 0ul);
+	show_val_kb(m, "Percpu:         ", pcpu_nr_pages());
 
 #ifdef CONFIG_MEMORY_FAILURE
 	seq_printf(m, "HardwareCorrupted: %5lu kB\n",
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 296bbe49d5d1..70b7123f38c7 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -149,4 +149,6 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 	(typeof(type) __percpu *)__alloc_percpu(sizeof(type),		\
 						__alignof__(type))
 
+extern unsigned long pcpu_nr_pages(void);
+
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/percpu.c b/mm/percpu.c
index 0b6480979ac7..a749d4d96e3e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -169,6 +169,14 @@ static LIST_HEAD(pcpu_map_extend_chunks);
  */
 int pcpu_nr_empty_pop_pages;
 
+/*
+ * The number of populated pages in use by the allocator, protected by
+ * pcpu_lock.  This number is kept per a unit per chunk (i.e. when a page gets
+ * allocated/deallocated, it is allocated/deallocated in all units of a chunk
+ * and increments/decrements this count by 1).
+ */
+static unsigned long pcpu_nr_populated;
+
 /*
  * Balance work is used to populate or destroy chunks asynchronously.  We
  * try to keep the number of populated free pages between
@@ -1232,6 +1240,7 @@ static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
 
 	bitmap_set(chunk->populated, page_start, nr);
 	chunk->nr_populated += nr;
+	pcpu_nr_populated += nr;
 
 	if (!for_alloc) {
 		chunk->nr_empty_pop_pages += nr;
@@ -1260,6 +1269,7 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
 	chunk->nr_populated -= nr;
 	chunk->nr_empty_pop_pages -= nr;
 	pcpu_nr_empty_pop_pages -= nr;
+	pcpu_nr_populated -= nr;
 }
 
 /*
@@ -2176,6 +2186,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
+	/* include all regions of the first chunk */
+	pcpu_nr_populated += PFN_DOWN(size_sum);
+
 	pcpu_stats_chunk_alloc();
 	trace_percpu_create_chunk(base_addr);
 
@@ -2745,6 +2758,22 @@ void __init setup_per_cpu_areas(void)
 
 #endif	/* CONFIG_SMP */
 
+/*
+ * pcpu_nr_pages - calculate total number of populated backing pages
+ *
+ * This reflects the number of pages populated to back chunks.  Metadata is
+ * excluded in the number exposed in meminfo as the number of backing pages
+ * scales with the number of cpus and can quickly outweigh the memory used for
+ * metadata.  It also keeps this calculation nice and simple.
+ *
+ * RETURNS:
+ * Total number of populated backing pages in use by the allocator.
+ */
+unsigned long pcpu_nr_pages(void)
+{
+	return pcpu_nr_populated * pcpu_nr_units;
+}
+
 /*
  * Percpu allocator is initialized early during boot when neither slab or
  * workqueue is available.  Plug async management until everything is up
-- 
cgit v1.2.3


From 5df66d306ec9b1952d3d183fe4e2ced1a7b2bddb Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:54:06 -0700
Subject: mm: fix comment for NODEMASK_ALLOC

Currently, NODEMASK_ALLOC allocates a nodemask_t with kmalloc when
NODES_SHIFT is higher than 8, otherwise it declares it within the stack.

The comment says that the reasoning behind this, is that nodemask_t will
be 256 bytes when NODES_SHIFT is higher than 8, but this is not true.  For
example, NODES_SHIFT = 9 will give us a 64 bytes nodemask_t.  Let us fix
up the comment for that.

Another thing is that it might make sense to let values lower than
128bytes be allocated in the stack.  Although this all depends on the
depth of the stack (and this changes from function to function), I think
that 64 bytes is something we can easily afford.  So we could even bump
the limit by 1 (from > 8 to > 9).

Link: http://lkml.kernel.org/r/20180820085516.9687-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 1fbde8a880d9..5a30ad594ccc 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -518,7 +518,7 @@ static inline int node_random(const nodemask_t *mask)
  * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
  * name.
  */
-#if NODES_SHIFT > 8 /* nodemask_t > 256 bytes */
+#if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */
 #define NODEMASK_ALLOC(type, name, gfp_flags)	\
 			type *name = kmalloc(sizeof(*name), gfp_flags)
 #define NODEMASK_FREE(m)			kfree(m)
-- 
cgit v1.2.3


From 891ae71dc4fbd2454a3fa569e115a7ca86630949 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 21 Aug 2018 21:54:37 -0700
Subject: proc: spread "const" a bit

Link: http://lkml.kernel.org/r/20180627200614.GB18434@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h      | 4 ++--
 include/linux/proc_fs.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c9d97818a421..5185d7f6a51e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -113,12 +113,12 @@ static inline void *__PDE_DATA(const struct inode *inode)
 	return PDE(inode)->data;
 }
 
-static inline struct pid *proc_pid(struct inode *inode)
+static inline struct pid *proc_pid(const struct inode *inode)
 {
 	return PROC_I(inode)->pid;
 }
 
-static inline struct task_struct *get_proc_task(struct inode *inode)
+static inline struct task_struct *get_proc_task(const struct inode *inode)
 {
 	return get_pid_task(proc_pid(inode), PIDTYPE_PID);
 }
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 626fc65c4336..d0e1f1522a78 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -129,7 +129,7 @@ int open_related_ns(struct ns_common *ns,
 		   struct ns_common *(*get_ns)(struct ns_common *ns));
 
 /* get the associated pid namespace for a file in procfs */
-static inline struct pid_namespace *proc_pid_ns(struct inode *inode)
+static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
 {
 	return inode->i_sb->s_fs_info;
 }
-- 
cgit v1.2.3


From a8dd9c4df18edc873d244790d163564a5d17626b Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 21 Aug 2018 21:54:51 -0700
Subject: proc/kcore: don't grab lock for kclist_add()

Patch series "/proc/kcore improvements", v4.

This series makes a few improvements to /proc/kcore.  It fixes a couple of
small issues in v3 but is otherwise the same.  Patches 1, 2, and 3 are
prep patches.  Patch 4 is a fix/cleanup.  Patch 5 is another prep patch.
Patches 6 and 7 are optimizations to ->read().  Patch 8 makes it possible
to enable CRASH_CORE on any architecture, which is needed for patch 9.
Patch 9 adds vmcoreinfo to /proc/kcore.

This patch (of 9):

kclist_add() is only called at init time, so there's no point in grabbing
any locks.  We're also going to replace the rwlock with a rwsem, which we
don't want to try grabbing during early boot.

While we're here, mark kclist_add() with __init so that we'll get a
warning if it's called from non-init code.

Link: http://lkml.kernel.org/r/98208db1faf167aa8b08eebfa968d95c70527739.1531953780.git.osandov@fb.com
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c       | 7 +++----
 include/linux/kcore.h | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 66c373230e60..b0b9a76f28d6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -62,16 +62,15 @@ static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 static int kcore_need_update = 1;
 
-void
-kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
+/* This doesn't grab kclist_lock, so it should only be used at init time. */
+void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
+		       int type)
 {
 	new->addr = (unsigned long)addr;
 	new->size = size;
 	new->type = type;
 
-	write_lock(&kclist_lock);
 	list_add_tail(&new->list, &kclist_head);
-	write_unlock(&kclist_lock);
 }
 
 static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8de55e4b5ee9..c20f296438fb 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -35,7 +35,7 @@ struct vmcoredd_node {
 };
 
 #ifdef CONFIG_PROC_KCORE
-extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+void __init kclist_add(struct kcore_list *, void *, size_t, int type);
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
-- 
cgit v1.2.3


From 23c85094fe1895caefdd19ef624ee687ec5f4507 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 21 Aug 2018 21:55:20 -0700
Subject: proc/kcore: add vmcoreinfo note to /proc/kcore

The vmcoreinfo information is useful for runtime debugging tools, not just
for crash dumps.  A lot of this information can be determined by other
means, but this is much more convenient, and it only adds a page at most
to the file.

Link: http://lkml.kernel.org/r/fddbcd08eed76344863303878b12de1c1e2a04b6.1531953780.git.osandov@fb.com
Signed-off-by: Omar Sandoval <osandov@fb.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/Kconfig            |  1 +
 fs/proc/kcore.c            | 18 ++++++++++++++++--
 include/linux/crash_core.h |  2 ++
 kernel/crash_core.c        |  4 ++--
 4 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 0eaeb41453f5..817c02b13b1d 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -31,6 +31,7 @@ config PROC_FS
 config PROC_KCORE
 	bool "/proc/kcore support" if !ARM
 	depends on PROC_FS && MMU
+	select CRASH_CORE
 	help
 	  Provides a virtual ELF core file of the live kernel.  This can
 	  be read with gdb and other ELF tools.  No modifications can be
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 758c14e46a44..80464432dfe6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -10,6 +10,7 @@
  *	Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  */
 
+#include <linux/crash_core.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/kcore.h>
@@ -81,10 +82,13 @@ static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
 	}
 
 	*phdrs_len = *nphdr * sizeof(struct elf_phdr);
-	*notes_len = (3 * (sizeof(struct elf_note) + ALIGN(sizeof(CORE_STR), 4)) +
+	*notes_len = (4 * sizeof(struct elf_note) +
+		      3 * ALIGN(sizeof(CORE_STR), 4) +
+		      VMCOREINFO_NOTE_NAME_BYTES +
 		      ALIGN(sizeof(struct elf_prstatus), 4) +
 		      ALIGN(sizeof(struct elf_prpsinfo), 4) +
-		      ALIGN(arch_task_struct_size, 4));
+		      ALIGN(arch_task_struct_size, 4) +
+		      ALIGN(vmcoreinfo_size, 4));
 	*data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
 				  *notes_len);
 	return *data_offset + size;
@@ -406,6 +410,16 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				  sizeof(prpsinfo));
 		append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
 				  arch_task_struct_size);
+		/*
+		 * vmcoreinfo_size is mostly constant after init time, but it
+		 * can be changed by crash_save_vmcoreinfo(). Racing here with a
+		 * panic on another CPU before the machine goes down is insanely
+		 * unlikely, but it's better to not leave potential buffer
+		 * overflows lying around, regardless.
+		 */
+		append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
+				  vmcoreinfo_data,
+				  min(vmcoreinfo_size, notes_len - i));
 
 		tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
 		if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index b511f6d24b42..525510a9f965 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -60,6 +60,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
+extern unsigned char *vmcoreinfo_data;
+extern size_t vmcoreinfo_size;
 extern u32 *vmcoreinfo_note;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index e7b4025c7b24..a683bfb0530f 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,8 +14,8 @@
 #include <asm/sections.h>
 
 /* vmcoreinfo stuff */
-static unsigned char *vmcoreinfo_data;
-static size_t vmcoreinfo_size;
+unsigned char *vmcoreinfo_data;
+size_t vmcoreinfo_size;
 u32 *vmcoreinfo_note;
 
 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
-- 
cgit v1.2.3


From cedc5b6aab493f6b1b1d381dccc0cc082da7d3d8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 21 Aug 2018 21:55:28 -0700
Subject: kernel.h: documentation for roundup() vs round_up()

Things like 3619dec5103d ("dh key: fix rounding up KDF output length")
expose the lack of explicit documentation for roundup() vs round_up().  At
least we can try to document it better if anyone goes looking.

Link: http://lkml.kernel.org/r/20180703041950.GA43464@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 941dc0a5a877..d6aac75b51ba 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -85,7 +85,23 @@
  * arguments just once each.
  */
 #define __round_mask(x, y) ((__typeof__(x))((y)-1))
+/**
+ * round_up - round up to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round up to (must be a power of 2)
+ *
+ * Rounds @x up to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding up, use roundup() below.
+ */
 #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+/**
+ * round_down - round down to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round down to (must be a power of 2)
+ *
+ * Rounds @x down to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding down, use rounddown() below.
+ */
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
 /**
@@ -110,13 +126,30 @@
 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
 #endif
 
-/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
+/**
+ * roundup - round up to the next specified multiple
+ * @x: the value to up
+ * @y: multiple to round up to
+ *
+ * Rounds @x up to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_up().
+ *
+ * The `const' here prevents gcc-3.3 from calling __divdi3
+ */
 #define roundup(x, y) (					\
 {							\
 	const typeof(y) __y = y;			\
 	(((x) + (__y - 1)) / __y) * __y;		\
 }							\
 )
+/**
+ * rounddown - round down to next specified multiple
+ * @x: the value to round
+ * @y: multiple to round down to
+ *
+ * Rounds @x down to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_down().
+ */
 #define rounddown(x, y) (				\
 {							\
 	typeof(x) __x = (x);				\
-- 
cgit v1.2.3


From e58dd0de5eadf145895b13451a1fef8ef03946eb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 21 Aug 2018 21:55:31 -0700
Subject: bdi: use refcount_t for reference counting instead atomic_t

refcount_t type and corresponding API should be used instead of atomic_t
when the variable is used as a reference counter.  This permits avoiding
accidental refcounter overflows that might lead to use-after-free
situations.

Link: http://lkml.kernel.org/r/20180703200141.28415-4-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev-defs.h |  3 ++-
 include/linux/backing-dev.h      |  4 ++--
 mm/backing-dev.c                 | 12 ++++++------
 3 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 24251762c20c..9a6bc0951cfa 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -12,6 +12,7 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/kref.h>
+#include <linux/refcount.h>
 
 struct page;
 struct device;
@@ -75,7 +76,7 @@ enum wb_reason {
  */
 struct bdi_writeback_congested {
 	unsigned long state;		/* WB_[a]sync_congested flags */
-	atomic_t refcnt;		/* nr of attached wb's and blkg */
+	refcount_t refcnt;		/* nr of attached wb's and blkg */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct backing_dev_info *__bdi;	/* the associated bdi, set to NULL
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 72ca0f3d39f3..c28a47cbe355 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -404,13 +404,13 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 static inline struct bdi_writeback_congested *
 wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 {
-	atomic_inc(&bdi->wb_congested->refcnt);
+	refcount_inc(&bdi->wb_congested->refcnt);
 	return bdi->wb_congested;
 }
 
 static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 {
-	if (atomic_dec_and_test(&congested->refcnt))
+	if (refcount_dec_and_test(&congested->refcnt))
 		kfree(congested);
 }
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 2e5d3df0853d..55a233d75f39 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -438,10 +438,10 @@ retry:
 	if (new_congested) {
 		/* !found and storage for new one already allocated, insert */
 		congested = new_congested;
-		new_congested = NULL;
 		rb_link_node(&congested->rb_node, parent, node);
 		rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
-		goto found;
+		spin_unlock_irqrestore(&cgwb_lock, flags);
+		return congested;
 	}
 
 	spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -451,13 +451,13 @@ retry:
 	if (!new_congested)
 		return NULL;
 
-	atomic_set(&new_congested->refcnt, 0);
+	refcount_set(&new_congested->refcnt, 1);
 	new_congested->__bdi = bdi;
 	new_congested->blkcg_id = blkcg_id;
 	goto retry;
 
 found:
-	atomic_inc(&congested->refcnt);
+	refcount_inc(&congested->refcnt);
 	spin_unlock_irqrestore(&cgwb_lock, flags);
 	kfree(new_congested);
 	return congested;
@@ -474,7 +474,7 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
+	if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
 		local_irq_restore(flags);
 		return;
 	}
@@ -804,7 +804,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 	if (!bdi->wb_congested)
 		return -ENOMEM;
 
-	atomic_set(&bdi->wb_congested->refcnt, 1);
+	refcount_set(&bdi->wb_congested->refcnt, 1);
 
 	err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
 	if (err) {
-- 
cgit v1.2.3


From fc37191272a972d449bab3d8247cf52cadf5d4e6 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 21 Aug 2018 21:55:38 -0700
Subject: userns: use refcount_t for reference counting instead atomic_t

refcount_t type and corresponding API should be used instead of atomic_t
wh en the variable is used as a reference counter.  This avoids accidental
refcounter overflows that might lead to use-after-free situations.

Link: http://lkml.kernel.org/r/20180703200141.28415-6-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/user.h | 5 +++--
 kernel/user.c              | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 96fe289c4c6e..39ad98c09c58 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
 
 #include <linux/uidgid.h>
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/ratelimit.h>
 
 struct key;
@@ -12,7 +13,7 @@ struct key;
  * Some day this will be a full-fledged user tracking system..
  */
 struct user_struct {
-	atomic_t __count;	/* reference count */
+	refcount_t __count;	/* reference count */
 	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
@@ -59,7 +60,7 @@ extern struct user_struct root_user;
 extern struct user_struct * alloc_uid(kuid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
-	atomic_inc(&u->__count);
+	refcount_inc(&u->__count);
 	return u;
 }
 extern void free_uid(struct user_struct *);
diff --git a/kernel/user.c b/kernel/user.c
index 36288d840675..5f65ef195259 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock);
 
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
-	.__count	= ATOMIC_INIT(1),
+	.__count	= REFCOUNT_INIT(1),
 	.processes	= ATOMIC_INIT(1),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)
 
 	hlist_for_each_entry(user, hashent, uidhash_node) {
 		if (uid_eq(user->uid, uid)) {
-			atomic_inc(&user->__count);
+			refcount_inc(&user->__count);
 			return user;
 		}
 	}
@@ -170,7 +170,7 @@ void free_uid(struct user_struct *up)
 		return;
 
 	local_irq_save(flags);
-	if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+	if (refcount_dec_and_lock(&up->__count, &uidhash_lock))
 		free_user(up, flags);
 	else
 		local_irq_restore(flags);
@@ -191,7 +191,7 @@ struct user_struct *alloc_uid(kuid_t uid)
 			goto out_unlock;
 
 		new->uid = uid;
-		atomic_set(&new->__count, 1);
+		refcount_set(&new->__count, 1);
 		ratelimit_state_init(&new->ratelimit, HZ, 100);
 		ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
 
-- 
cgit v1.2.3


From 203583990cb675aeddff6d7623f68268068c078c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 21 Aug 2018 21:55:45 -0700
Subject: linux/compiler.h: don't use bool

Appararently, it's possible to have a non-trivial TU include a few
headers, including linux/build_bug.h, without ending up with
linux/types.h.  So the 0day bot sent me

config: um-x86_64_defconfig (attached as .config)

>> include/linux/compiler.h:316:3: error: unknown type name 'bool'; did you mean '_Bool'?
      bool __cond = !(condition);    \

for something I'm working on.

Rather than contributing to the #include madness and including
linux/types.h from compiler.h, just use int.

Link: http://lkml.kernel.org/r/20180817101036.20969-1-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Christopher Li <sparse@chrisli.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 42506e4d1f53..c8eab637a2a7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -313,7 +313,7 @@ unsigned long read_word_at_a_time(const void *addr)
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
-		bool __cond = !(condition);				\
+		int __cond = !(condition);				\
 		extern void prefix ## suffix(void) __compiletime_error(msg); \
 		if (__cond)						\
 			prefix ## suffix();				\
-- 
cgit v1.2.3


From a2e514453861dd39b53b7a50b6771bd3f9852078 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 21 Aug 2018 21:55:52 -0700
Subject: kernel/hung_task.c: allow to set checking interval separately from
 timeout

Currently task hung checking interval is equal to timeout, as the result
hung is detected anywhere between timeout and 2*timeout.  This is fine for
most interactive environments, but this hurts automated testing setups
(syzbot).  In an automated setup we need to strictly order CPU lockup <
RCU stall < workqueue lockup < task hung < silent loss, so that RCU stall
is not detected as task hung and task hung is not detected as silent
machine loss.  The large variance in task hung detection timeout requires
setting silent machine loss timeout to a very large value (e.g.  if task
hung is 3 mins, then silent loss need to be set to ~7 mins).  The
additional 3 minutes significantly reduce testing efficiency because
usually we crash kernel within a minute, and this can add hours to bug
localization process as it needs to do dozens of tests.

Allow setting checking interval separately from timeout.  This allows to
set timeout to, say, 3 minutes, but checking interval to 10 secs.

The interval is controlled via a new hung_task_check_interval_secs sysctl,
similar to the existing hung_task_timeout_secs sysctl.  The default value
of 0 results in the current behavior: checking interval is equal to
timeout.

[akpm@linux-foundation.org: update hung_task_timeout_max's comment]
Link: http://lkml.kernel.org/r/20180611111004.203513-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/kernel.txt | 15 ++++++++++++++-
 include/linux/sched.h           |  1 +
 include/linux/sched/sysctl.h    |  1 +
 kernel/fork.c                   |  1 +
 kernel/hung_task.c              | 15 ++++++++++++++-
 kernel/sysctl.c                 | 13 ++++++++++++-
 6 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 59585030cbaf..9d38e75b19a0 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -38,6 +38,7 @@ show up in /proc/sys/kernel:
 - hung_task_panic
 - hung_task_check_count
 - hung_task_timeout_secs
+- hung_task_check_interval_secs
 - hung_task_warnings
 - hyperv_record_panic_msg
 - kexec_load_disabled
@@ -355,7 +356,7 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
 
 hung_task_timeout_secs:
 
-Check interval. When a task in D state did not get scheduled
+When a task in D state did not get scheduled
 for more than this value report a warning.
 This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
 
@@ -364,6 +365,18 @@ Possible values to set are in range {0..LONG_MAX/HZ}.
 
 ==============================================================
 
+hung_task_check_interval_secs:
+
+Hung task check interval. If hung task checking is enabled
+(see hung_task_timeout_secs), the check is done every
+hung_task_check_interval_secs seconds.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0 (default): means use hung_task_timeout_secs as checking interval.
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+==============================================================
+
 hung_task_warnings:
 
 The maximum number of warnings to report. During a check interval
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 789923fbee3a..58eb3a2bc695 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -853,6 +853,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 	unsigned long			last_switch_count;
+	unsigned long			last_switch_time;
 #endif
 	/* Filesystem information: */
 	struct fs_struct		*fs;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 913488d828cb..a9c32daeb9d8 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -10,6 +10,7 @@ struct ctl_table;
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 					 void __user *buffer,
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c760effa42e..8bcef2413f1b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1302,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
 	tsk->nvcsw = tsk->nivcsw = 0;
 #ifdef CONFIG_DETECT_HUNG_TASK
 	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+	tsk->last_switch_time = 0;
 #endif
 
 	tsk->mm = NULL;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 32b479468e4d..b9132d1269ef 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -40,6 +40,11 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
  */
 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
 
+/*
+ * Zero (default value) means use sysctl_hung_task_timeout_secs:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
@@ -98,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 
 	if (switch_count != t->last_switch_count) {
 		t->last_switch_count = switch_count;
+		t->last_switch_time = jiffies;
 		return;
 	}
+	if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+		return;
 
 	trace_sched_process_hang(t);
 
@@ -245,8 +253,13 @@ static int watchdog(void *dummy)
 
 	for ( ; ; ) {
 		unsigned long timeout = sysctl_hung_task_timeout_secs;
-		long t = hung_timeout_jiffies(hung_last_checked, timeout);
+		unsigned long interval = sysctl_hung_task_check_interval_secs;
+		long t;
 
+		if (interval == 0)
+			interval = timeout;
+		interval = min_t(unsigned long, interval, timeout);
+		t = hung_timeout_jiffies(hung_last_checked, interval);
 		if (t <= 0) {
 			if (!atomic_xchg(&reset_hung_task, 0))
 				check_hung_uninterruptible_tasks(timeout);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f22f76b7a138..dbdcb6673b27 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -145,7 +145,10 @@ static int minolduid;
 static int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
 
-/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
 #ifdef CONFIG_DETECT_HUNG_TASK
 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 #endif
@@ -1090,6 +1093,14 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dohung_task_timeout_secs,
 		.extra2		= &hung_task_timeout_max,
 	},
+	{
+		.procname	= "hung_task_check_interval_secs",
+		.data		= &sysctl_hung_task_check_interval_secs,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_dohung_task_timeout_secs,
+		.extra2		= &hung_task_timeout_max,
+	},
 	{
 		.procname	= "hung_task_warnings",
 		.data		= &sysctl_hung_task_warnings,
-- 
cgit v1.2.3


From f922c4abdf7648523589abee9460c87f51630d2f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:04 -0700
Subject: module: allow symbol exports to be disabled

To allow existing C code to be incorporated into the decompressor or the
UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx
declarations into nops, and #define it in places where such exports are
undesirable.  Note that this gets rid of a rather dodgy redefine of
linux/export.h's header guard.

Link: http://lkml.kernel.org/r/20180704083651.24360-3-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/boot/compressed/kaslr.c      |  5 +----
 drivers/firmware/efi/libstub/Makefile |  1 +
 include/linux/export.h                | 11 ++++++++++-
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 302517929932..d1e19f358b6e 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
  * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
  * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
  * which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
  */
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
 
 #include "misc.h"
 #include "error.h"
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 88c322d7c71e..14c40a7750d1 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -24,6 +24,7 @@ KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
 				   -D__NO_FORTIFY \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector) \
+				   -D__DISABLE_EXPORTS
 
 GCOV_PROFILE			:= n
 KASAN_SANITIZE			:= n
diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6dd3c90..ea7df303d68d 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -66,7 +66,16 @@ extern struct module __this_module;
 	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
 	= { (unsigned long)&sym, __kstrtab_##sym }
 
-#if defined(__KSYM_DEPS__)
+#if defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
+
+#elif defined(__KSYM_DEPS__)
 
 /*
  * For fine grained build dependencies, we want to tell the build system
-- 
cgit v1.2.3


From 7290d58095712a89f845e1bca05334796dd49ed2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:09 -0700
Subject: module: use relative references for __ksymtab entries

An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries,
each consisting of two 64-bit fields containing absolute references, to
the symbol itself and to a char array containing its name, respectively.

When we build the same configuration with KASLR enabled, we end up with an
additional ~192 KB of relocations in the .init section, i.e., one 24 byte
entry for each absolute reference, which all need to be processed at boot
time.

Given how the struct kernel_symbol that describes each entry is completely
local to module.c (except for the references emitted by EXPORT_SYMBOL()
itself), we can easily modify it to contain two 32-bit relative references
instead.  This reduces the size of the __ksymtab section by 50% for all
64-bit architectures, and gets rid of the runtime relocations entirely for
architectures implementing KASLR, either via standard PIE linking (arm64)
or using custom host tools (x86).

Note that the binary search involving __ksymtab contents relies on each
section being sorted by symbol name.  This is implemented based on the
input section names, not the names in the ksymtab entries, so this patch
does not interfere with that.

Given that the use of place-relative relocations requires support both in
the toolchain and in the module loader, we cannot enable this feature for
all architectures.  So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.

Link: http://lkml.kernel.org/r/20180704083651.24360-4-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/Kbuild   |  1 +
 arch/x86/include/asm/export.h |  5 -----
 include/asm-generic/export.h  | 12 +++++++++--
 include/linux/compiler.h      | 19 ++++++++++++++++++
 include/linux/export.h        | 46 ++++++++++++++++++++++++++++++++-----------
 kernel/module.c               | 32 ++++++++++++++++++++++++------
 6 files changed, 91 insertions(+), 24 deletions(-)
 delete mode 100644 arch/x86/include/asm/export.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index de690c2d2e33..a0ab9ab61c75 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h
 
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
+generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..000000000000
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include <asm-generic/export.h>
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 68efb950a918..4d73e6e3c66c 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -5,12 +5,10 @@
 #define KSYM_FUNC(x) x
 #endif
 #ifdef CONFIG_64BIT
-#define __put .quad
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 8
 #endif
 #else
-#define __put .long
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
@@ -19,6 +17,16 @@
 #define KCRC_ALIGN 4
 #endif
 
+.macro __put, val, name
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	.long	\val - ., \name - .
+#elif defined(CONFIG_64BIT)
+	.quad	\val, \name
+#else
+	.long	\val, \name
+#endif
+.endm
+
 /*
  * note on .section use: @progbits vs %progbits nastiness doesn't matter,
  * since we immediately emit into those sections anyway.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c8eab637a2a7..681d866efb1e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -280,6 +280,25 @@ unsigned long read_word_at_a_time(const void *addr)
 
 #endif /* __KERNEL__ */
 
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+	static void * __attribute__((section(".discard.addressable"), used)) \
+		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
+
+/**
+ * offset_to_ptr - convert a relative memory offset to an absolute pointer
+ * @off:	the address of the 32-bit offset value
+ */
+static inline void *offset_to_ptr(const int *off)
+{
+	return (void *)((unsigned long)off + *off);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #ifndef __optimize
diff --git a/include/linux/export.h b/include/linux/export.h
index ea7df303d68d..ae072bc5aacf 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -18,12 +18,6 @@
 #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
 
 #ifndef __ASSEMBLY__
-struct kernel_symbol
-{
-	unsigned long value;
-	const char *name;
-};
-
 #ifdef MODULE
 extern struct module __this_module;
 #define THIS_MODULE (&__this_module)
@@ -54,17 +48,47 @@ extern struct module __this_module;
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#include <linux/compiler.h>
+/*
+ * Emit the ksymtab entry as a pair of relative references: this reduces
+ * the size by half on 64-bit architectures, and eliminates the need for
+ * absolute relocations that require runtime processing on relocatable
+ * kernels.
+ */
+#define __KSYMTAB_ENTRY(sym, sec)					\
+	__ADDRESSABLE(sym)						\
+	asm("	.section \"___ksymtab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.balign	8					\n"	\
+	    "__ksymtab_" #sym ":				\n"	\
+	    "	.long	" #sym "- .				\n"	\
+	    "	.long	__kstrtab_" #sym "- .			\n"	\
+	    "	.previous					\n")
+
+struct kernel_symbol {
+	int value_offset;
+	int name_offset;
+};
+#else
+#define __KSYMTAB_ENTRY(sym, sec)					\
+	static const struct kernel_symbol __ksymtab_##sym		\
+	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
+	= { (unsigned long)&sym, __kstrtab_##sym }
+
+struct kernel_symbol {
+	unsigned long value;
+	const char *name;
+};
+#endif
+
 /* For every exported symbol, place a struct in the __ksymtab section */
 #define ___EXPORT_SYMBOL(sym, sec)					\
 	extern typeof(sym) sym;						\
 	__CRC_SYMBOL(sym, sec)						\
 	static const char __kstrtab_##sym[]				\
-	__attribute__((section("__ksymtab_strings"), aligned(1)))	\
+	__attribute__((section("__ksymtab_strings"), used, aligned(1)))	\
 	= #sym;								\
-	static const struct kernel_symbol __ksymtab_##sym		\
-	__used								\
-	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
-	= { (unsigned long)&sym, __kstrtab_##sym }
+	__KSYMTAB_ENTRY(sym, sec)
 
 #if defined(__DISABLE_EXPORTS)
 
diff --git a/kernel/module.c b/kernel/module.c
index b046a32520d8..6746c85511fe 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -529,12 +529,30 @@ static bool check_symbol(const struct symsearch *syms,
 	return true;
 }
 
+static unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	return (unsigned long)offset_to_ptr(&sym->value_offset);
+#else
+	return sym->value;
+#endif
+}
+
+static const char *kernel_symbol_name(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	return offset_to_ptr(&sym->name_offset);
+#else
+	return sym->name;
+#endif
+}
+
 static int cmp_name(const void *va, const void *vb)
 {
 	const char *a;
 	const struct kernel_symbol *b;
 	a = va; b = vb;
-	return strcmp(a, b->name);
+	return strcmp(a, kernel_symbol_name(b));
 }
 
 static bool find_symbol_in_section(const struct symsearch *syms,
@@ -2170,7 +2188,7 @@ void *__symbol_get(const char *symbol)
 		sym = NULL;
 	preempt_enable();
 
-	return sym ? (void *)sym->value : NULL;
+	return sym ? (void *)kernel_symbol_value(sym) : NULL;
 }
 EXPORT_SYMBOL_GPL(__symbol_get);
 
@@ -2200,10 +2218,12 @@ static int verify_export_symbols(struct module *mod)
 
 	for (i = 0; i < ARRAY_SIZE(arr); i++) {
 		for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
-			if (find_symbol(s->name, &owner, NULL, true, false)) {
+			if (find_symbol(kernel_symbol_name(s), &owner, NULL,
+					true, false)) {
 				pr_err("%s: exports duplicate symbol %s"
 				       " (owned by %s)\n",
-				       mod->name, s->name, module_name(owner));
+				       mod->name, kernel_symbol_name(s),
+				       module_name(owner));
 				return -ENOEXEC;
 			}
 		}
@@ -2252,7 +2272,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 			ksym = resolve_symbol_wait(mod, info, name);
 			/* Ok if resolved.  */
 			if (ksym && !IS_ERR(ksym)) {
-				sym[i].st_value = ksym->value;
+				sym[i].st_value = kernel_symbol_value(ksym);
 				break;
 			}
 
@@ -2516,7 +2536,7 @@ static int is_exported(const char *name, unsigned long value,
 		ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
 	else
 		ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
-	return ks != NULL && ks->value == value;
+	return ks != NULL && kernel_symbol_value(ks) == value;
 }
 
 /* As per nm */
-- 
cgit v1.2.3


From 1b1eeca7e4c19fa76d409d4c7b338dba21f2df45 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:13 -0700
Subject: init: allow initcall tables to be emitted using relative references

Allow the initcall tables to be emitted using relative references that
are only half the size on 64-bit architectures and don't require fixups
at runtime on relocatable kernels.

Link: http://lkml.kernel.org/r/20180704083651.24360-5-ard.biesheuvel@linaro.org
Acked-by: James Morris <james.morris@microsoft.com>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h   | 44 +++++++++++++++++++++++++++++++++-----------
 init/main.c            | 32 ++++++++++++++++----------------
 kernel/printk/printk.c | 16 +++++++++-------
 security/security.c    | 17 ++++++++++-------
 4 files changed, 68 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index bc27cf03c41e..2538d176dd1f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -116,8 +116,24 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+	return offset_to_ptr(entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+	return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
 
 /* Used for contructor calls. */
 typedef void (*ctor_fn_t)(void);
@@ -167,9 +183,20 @@ extern bool initcall_debug;
  * as KEEP() in the linker script.
  */
 
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec)			\
+	__ADDRESSABLE(fn)					\
+	asm(".section	\"" #__sec ".init\", \"a\"	\n"	\
+	"__initcall_" #fn #id ":			\n"	\
+	    ".long	" #fn " - .			\n"	\
+	    ".previous					\n");
+#else
+#define ___define_initcall(fn, id, __sec) \
 	static initcall_t __initcall_##fn##id __used \
-	__attribute__((__section__(".initcall" #id ".init"))) = fn;
+		__attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
 
 /*
  * Early initcalls run before initializing SMP.
@@ -208,13 +235,8 @@ extern bool initcall_debug;
 #define __exitcall(fn)						\
 	static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn)					\
-	static initcall_t __initcall_##fn			\
-	__used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn)					\
-	static initcall_t __initcall_##fn			\
-	__used __section(.security_initcall.init) = fn
+#define console_initcall(fn)	___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn)	___define_initcall(fn,, .security_initcall)
 
 struct obs_kernel_param {
 	const char *str;
diff --git a/init/main.c b/init/main.c
index b729e1f22838..3a6ce89e128f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -902,18 +902,18 @@ int __init_or_module do_one_initcall(initcall_t fn)
 }
 
 
-extern initcall_t __initcall_start[];
-extern initcall_t __initcall0_start[];
-extern initcall_t __initcall1_start[];
-extern initcall_t __initcall2_start[];
-extern initcall_t __initcall3_start[];
-extern initcall_t __initcall4_start[];
-extern initcall_t __initcall5_start[];
-extern initcall_t __initcall6_start[];
-extern initcall_t __initcall7_start[];
-extern initcall_t __initcall_end[];
-
-static initcall_t *initcall_levels[] __initdata = {
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
+static initcall_entry_t *initcall_levels[] __initdata = {
 	__initcall0_start,
 	__initcall1_start,
 	__initcall2_start,
@@ -939,7 +939,7 @@ static char *initcall_level_names[] __initdata = {
 
 static void __init do_initcall_level(int level)
 {
-	initcall_t *fn;
+	initcall_entry_t *fn;
 
 	strcpy(initcall_command_line, saved_command_line);
 	parse_args(initcall_level_names[level],
@@ -950,7 +950,7 @@ static void __init do_initcall_level(int level)
 
 	trace_initcall_level(initcall_level_names[level]);
 	for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
-		do_one_initcall(*fn);
+		do_one_initcall(initcall_from_entry(fn));
 }
 
 static void __init do_initcalls(void)
@@ -981,11 +981,11 @@ static void __init do_basic_setup(void)
 
 static void __init do_pre_smp_initcalls(void)
 {
-	initcall_t *fn;
+	initcall_entry_t *fn;
 
 	trace_initcall_level("early");
 	for (fn = __initcall_start; fn < __initcall0_start; fn++)
-		do_one_initcall(*fn);
+		do_one_initcall(initcall_from_entry(fn));
 }
 
 /*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 90b6ab01db59..918f386b2f6e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2788,7 +2788,8 @@ EXPORT_SYMBOL(unregister_console);
 void __init console_init(void)
 {
 	int ret;
-	initcall_t *call;
+	initcall_t call;
+	initcall_entry_t *ce;
 
 	/* Setup the default TTY line discipline. */
 	n_tty_init();
@@ -2797,13 +2798,14 @@ void __init console_init(void)
 	 * set up the console device so that later boot sequences can
 	 * inform about problems etc..
 	 */
-	call = __con_initcall_start;
+	ce = __con_initcall_start;
 	trace_initcall_level("console");
-	while (call < __con_initcall_end) {
-		trace_initcall_start((*call));
-		ret = (*call)();
-		trace_initcall_finish((*call), ret);
-		call++;
+	while (ce < __con_initcall_end) {
+		call = initcall_from_entry(ce);
+		trace_initcall_start(call);
+		ret = call();
+		trace_initcall_finish(call, ret);
+		ce++;
 	}
 }
 
diff --git a/security/security.c b/security/security.c
index 47cfff01d7ec..736e78da1ab9 100644
--- a/security/security.c
+++ b/security/security.c
@@ -48,14 +48,17 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
 static void __init do_security_initcalls(void)
 {
 	int ret;
-	initcall_t *call;
-	call = __security_initcall_start;
+	initcall_t call;
+	initcall_entry_t *ce;
+
+	ce = __security_initcall_start;
 	trace_initcall_level("security");
-	while (call < __security_initcall_end) {
-		trace_initcall_start((*call));
-		ret = (*call) ();
-		trace_initcall_finish((*call), ret);
-		call++;
+	while (ce < __security_initcall_end) {
+		call = initcall_from_entry(ce);
+		trace_initcall_start(call);
+		ret = call();
+		trace_initcall_finish(call, ret);
+		ce++;
 	}
 }
 
-- 
cgit v1.2.3


From c9d8b55fa0191623fccb9ed67d2ff8f9159e9a89 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:18 -0700
Subject: PCI: Add support for relative addressing in quirk tables

Allow the PCI quirk tables to be emitted in a way that avoids absolute
references to the hook functions. This reduces the size of the entries,
and, more importantly, makes them invariant under runtime relocation
(e.g., for KASLR)

Link: http://lkml.kernel.org/r/20180704083651.24360-6-ard.biesheuvel@linaro.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/quirks.c | 12 +++++++++---
 include/linux/pci.h  | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 46f58a9771d7..ef7143a274e0 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -66,9 +66,15 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
 		     f->vendor == (u16) PCI_ANY_ID) &&
 		    (f->device == dev->device ||
 		     f->device == (u16) PCI_ANY_ID)) {
-			calltime = fixup_debug_start(dev, f->hook);
-			f->hook(dev);
-			fixup_debug_report(dev, calltime, f->hook);
+			void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+			hook = offset_to_ptr(&f->hook_offset);
+#else
+			hook = f->hook;
+#endif
+			calltime = fixup_debug_start(dev, hook);
+			hook(dev);
+			fixup_debug_report(dev, calltime, hook);
 		}
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9b87f1936906..e72ca8dd6241 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1809,7 +1809,11 @@ struct pci_fixup {
 	u16 device;			/* Or PCI_ANY_ID */
 	u32 class;			/* Or PCI_ANY_ID */
 	unsigned int class_shift;	/* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	int hook_offset;
+#else
 	void (*hook)(struct pci_dev *dev);
+#endif
 };
 
 enum pci_fixup_pass {
@@ -1823,12 +1827,28 @@ enum pci_fixup_pass {
 	pci_fixup_suspend_late,	/* pci_device_suspend_late() */
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				    class_shift, hook)			\
+	__ADDRESSABLE(hook)						\
+	asm(".section "	#sec ", \"a\"				\n"	\
+	    ".balign	16					\n"	\
+	    ".short "	#vendor ", " #device "			\n"	\
+	    ".long "	#class ", " #class_shift "		\n"	\
+	    ".long "	#hook " - .				\n"	\
+	    ".previous						\n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				  class_shift, hook)			\
+	__DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				  class_shift, hook)
+#else
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,	\
 				  class_shift, hook)			\
 	static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used	\
 	__attribute__((__section__(#section), aligned((sizeof(void *)))))    \
 		= { vendor, device, class, class_shift, hook };
+#endif
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,		\
 					 class_shift, hook)		\
-- 
cgit v1.2.3


From 46e0c9be206fa7b11aca75da2d6b8535d0139752 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:22 -0700
Subject: kernel: tracepoints: add support for relative references

To avoid the need for relocating absolute references to tracepoint
structures at boot time when running relocatable kernels (which may
take a disproportionate amount of space), add the option to emit
these tables as relative references instead.

Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracepoint.h | 19 ++++++++++++++----
 kernel/tracepoint.c        | 49 ++++++++++++++++++++++++----------------------
 2 files changed, 41 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d9a084c72541..7f2e16e76ac4 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -249,6 +249,19 @@ extern void syscall_unregfunc(void);
 		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name)					\
+	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
+	    "	.balign 4					\n"	\
+	    "	.long 	__tracepoint_" #name " - .		\n"	\
+	    "	.previous					\n")
+#else
+#define __TRACEPOINT_ENTRY(name)					 \
+	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
+		&__tracepoint_##name
+#endif
+
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
@@ -258,11 +271,9 @@ extern void syscall_unregfunc(void);
 	static const char __tpstrtab_##name[]				 \
 	__attribute__((section("__tracepoints_strings"))) = #name;	 \
 	struct tracepoint __tracepoint_##name				 \
-	__attribute__((section("__tracepoints"))) =			 \
+	__attribute__((section("__tracepoints"), used)) =		 \
 		{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
-	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
-	__attribute__((section("__tracepoints_ptrs"))) =		 \
-		&__tracepoint_##name;
+	__TRACEPOINT_ENTRY(name);
 
 #define DEFINE_TRACE(name)						\
 	DEFINE_TRACE_FN(name, NULL, NULL);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 96db841bf0fc..bf2c06ef9afc 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -371,6 +371,27 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+		struct tracepoint * const *end,
+		void (*fct)(struct tracepoint *tp, void *priv),
+		void *priv)
+{
+	if (!begin)
+		return;
+
+	if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+		const int *iter;
+
+		for (iter = (const int *)begin; iter < (const int *)end; iter++)
+			fct(offset_to_ptr(iter), priv);
+	} else {
+		struct tracepoint * const *iter;
+
+		for (iter = begin; iter < end; iter++)
+			fct(*iter, priv);
+	}
+}
+
 #ifdef CONFIG_MODULES
 bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -435,15 +456,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
  * Ensure the tracer unregistered the module's probes before the module
  * teardown is performed. Prevents leaks of probe and data pointers.
  */
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
-		struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
 {
-	struct tracepoint * const *iter;
-
-	if (!begin)
-		return;
-	for (iter = begin; iter < end; iter++)
-		WARN_ON_ONCE((*iter)->funcs);
+	WARN_ON_ONCE(tp->funcs);
 }
 
 static int tracepoint_module_coming(struct module *mod)
@@ -494,8 +509,9 @@ static void tracepoint_module_going(struct module *mod)
 			 * Called the going notifier before checking for
 			 * quiescence.
 			 */
-			tp_module_going_check_quiescent(mod->tracepoints_ptrs,
-				mod->tracepoints_ptrs + mod->num_tracepoints);
+			for_each_tracepoint_range(mod->tracepoints_ptrs,
+				mod->tracepoints_ptrs + mod->num_tracepoints,
+				tp_module_going_check_quiescent, NULL);
 			break;
 		}
 	}
@@ -547,19 +563,6 @@ static __init int init_tracepoints(void)
 __initcall(init_tracepoints);
 #endif /* CONFIG_MODULES */
 
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
-		struct tracepoint * const *end,
-		void (*fct)(struct tracepoint *tp, void *priv),
-		void *priv)
-{
-	struct tracepoint * const *iter;
-
-	if (!begin)
-		return;
-	for (iter = begin; iter < end; iter++)
-		fct(*iter, priv);
-}
-
 /**
  * for_each_kernel_tracepoint - iteration on all kernel tracepoints
  * @fct: callback
-- 
cgit v1.2.3


From 9144d75e22cad3c89e6b2ccab551db9ee28d250a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 21 Aug 2018 21:57:03 -0700
Subject: include/linux/bitops.h: introduce BITS_PER_TYPE

net_dim.h has a rather useful extension to BITS_PER_BYTE to compute the
number of bits in a type (BITS_PER_BYTE * sizeof(T)), so promote the macro
to bitops.h, alongside BITS_PER_BYTE, for wider usage.

Link: http://lkml.kernel.org/r/20180706094458.14116-1-chris@chris-wilson.co.uk
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Andy Gospodarek <gospo@broadcom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h  | 3 ++-
 include/linux/net_dim.h | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index af419012d77d..7ddb1349394d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -4,7 +4,8 @@
 #include <asm/types.h>
 #include <linux/bits.h>
 
-#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index db99240d00bd..c79e859408e6 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -363,7 +363,6 @@ static inline void net_dim_sample(u16 event_ctr,
 }
 
 #define NET_DIM_NEVENTS 64
-#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
 #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
 
 static inline void net_dim_calc_stats(struct net_dim_sample *start,
-- 
cgit v1.2.3


From feba04fd2cf8f6a74865338df2e3e1e94d6cfd13 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 21 Aug 2018 21:57:11 -0700
Subject: lib: add crc64 calculation routines

Patch series "add crc64 calculation as kernel library", v5.

This patchset adds basic implementation of crc64 calculation as a Linux
kernel library.  Since bcache already does crc64 by itself, this patchset
also modifies bcache code to use the new crc64 library routine.

Currently bcache is the only user of crc64 calculation, another potential
user is bcachefs which is on the way to be in mainline kernel.  Therefore
it makes sense to make crc64 calculation to be a public library.

bcache uses crc64 as storage checksum, if a change of crc lib routines
results an inconsistent result, the unmatched checksum may make bcache
'think' the on-disk is corrupted, such a change should be avoided or
detected as early as possible.  Therefore a patch is being prepared which
adds a crc test framework, to check consistency of different calculations.

This patch (of 2):

Add the re-write crc64 calculation routines for Linux kernel.  The CRC64
polynomical arithmetic follows ECMA-182 specification, inspired by CRC
paper of Dr.  Ross N.  Williams (see
http://www.ross.net/crc/download/crc_v3.txt) and other public domain
implementations.

All the changes work in this way,
- When Linux kernel is built, host program lib/gen_crc64table.c will be
  compiled to lib/gen_crc64table and executed.
- The output of gen_crc64table execution is an array called as lookup
  table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64-bit long
  numbers, this table is dumped into header file lib/crc64table.h.
- Then the header file is included by lib/crc64.c for normal 64bit crc
  calculation.
- Function declaration of the crc64 calculation routines is placed in
  include/linux/crc64.h

Currently bcache is the only user of crc64_be(), another potential user is
bcachefs which is on the way to be in mainline kernel.  Therefore it makes
sense to move crc64 calculation into lib/crc64.c as public code.

[colyli@suse.de: fix review comments from v4]
  Link: http://lkml.kernel.org/r/20180726053352.2781-2-colyli@suse.de
Link: http://lkml.kernel.org/r/20180718165545.1622-2-colyli@suse.de
Signed-off-by: Coly Li <colyli@suse.de>
Co-developed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Michael Lyle <mlyle@lyle.org>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Noah Massey <noah.massey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crc64.h | 11 +++++++++
 lib/.gitignore        |  2 ++
 lib/Kconfig           |  8 ++++++
 lib/Makefile          | 11 +++++++++
 lib/crc64.c           | 56 ++++++++++++++++++++++++++++++++++++++++++
 lib/gen_crc64table.c  | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 156 insertions(+)
 create mode 100644 include/linux/crc64.h
 create mode 100644 lib/crc64.c
 create mode 100644 lib/gen_crc64table.c

(limited to 'include/linux')

diff --git a/include/linux/crc64.h b/include/linux/crc64.h
new file mode 100644
index 000000000000..c756e65a1b58
--- /dev/null
+++ b/include/linux/crc64.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
diff --git a/lib/.gitignore b/lib/.gitignore
index 09aae85418ab..f2a39c9e5485 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -2,5 +2,7 @@
 # Generated files
 #
 gen_crc32table
+gen_crc64table
 crc32table.h
+crc64table.h
 oid_registry_data.c
diff --git a/lib/Kconfig b/lib/Kconfig
index 706836ec314d..9c10b9852563 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -170,6 +170,14 @@ config CRC32_BIT
 
 endchoice
 
+config CRC64
+	tristate "CRC64 functions"
+	help
+	  This option is provided for the case where no in-kernel-tree
+	  modules require CRC64 functions, but a module built outside
+	  the kernel tree does. Such modules that use library CRC64
+	  functions require M here.
+
 config CRC4
 	tristate "CRC4 functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index d95bb2525101..9baefb6cb1a1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
 obj-$(CONFIG_CRC_ITU_T)	+= crc-itu-t.o
 obj-$(CONFIG_CRC32)	+= crc32.o
+obj-$(CONFIG_CRC64)     += crc64.o
 obj-$(CONFIG_CRC32_SELFTEST)	+= crc32test.o
 obj-$(CONFIG_CRC4)	+= crc4.o
 obj-$(CONFIG_CRC7)	+= crc7.o
@@ -217,7 +218,9 @@ obj-$(CONFIG_FONT_SUPPORT) += fonts/
 obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
 
 hostprogs-y	:= gen_crc32table
+hostprogs-y	+= gen_crc64table
 clean-files	:= crc32table.h
+clean-files	+= crc64table.h
 
 $(obj)/crc32.o: $(obj)/crc32table.h
 
@@ -227,6 +230,14 @@ quiet_cmd_crc32 = GEN     $@
 $(obj)/crc32table.h: $(obj)/gen_crc32table
 	$(call cmd,crc32)
 
+$(obj)/crc64.o: $(obj)/crc64table.h
+
+quiet_cmd_crc64 = GEN     $@
+      cmd_crc64 = $< > $@
+
+$(obj)/crc64table.h: $(obj)/gen_crc64table
+	$(call cmd,crc64)
+
 #
 # Build a fast OID lookip registry from include/linux/oid_registry.h
 #
diff --git a/lib/crc64.c b/lib/crc64.c
new file mode 100644
index 000000000000..0ef8ae6ac047
--- /dev/null
+++ b/lib/crc64.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+	or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+	size_t i, t;
+
+	const unsigned char *_p = p;
+
+	for (i = 0; i < len; i++) {
+		t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+		crc = crc64table[t] ^ (crc << 8);
+	}
+
+	return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
new file mode 100644
index 000000000000..9011926e4162
--- /dev/null
+++ b/lib/gen_crc64table.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate lookup table for the table-driven CRC64 calculation.
+ *
+ * gen_crc64table is executed in kernel build time and generates
+ * lib/crc64table.h. This header is included by lib/crc64.c for
+ * the table-driven CRC64 calculation.
+ *
+ * See lib/crc64.c for more information about which specification
+ * and polynomial arithmetic that gen_crc64table.c follows to
+ * generate the lookup table.
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ */
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <linux/swab.h>
+
+#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
+
+static uint64_t crc64_table[256] = {0};
+
+static void generate_crc64_table(void)
+{
+	uint64_t i, j, c, crc;
+
+	for (i = 0; i < 256; i++) {
+		crc = 0;
+		c = i << 56;
+
+		for (j = 0; j < 8; j++) {
+			if ((crc ^ c) & 0x8000000000000000ULL)
+				crc = (crc << 1) ^ CRC64_ECMA182_POLY;
+			else
+				crc <<= 1;
+			c <<= 1;
+		}
+
+		crc64_table[i] = crc;
+	}
+}
+
+static void print_crc64_table(void)
+{
+	int i;
+
+	printf("/* this file is generated - do not edit */\n\n");
+	printf("#include <linux/types.h>\n");
+	printf("#include <linux/cache.h>\n\n");
+	printf("static const u64 ____cacheline_aligned crc64table[256] = {\n");
+	for (i = 0; i < 256; i++) {
+		printf("\t0x%016" PRIx64 "ULL", crc64_table[i]);
+		if (i & 0x1)
+			printf(",\n");
+		else
+			printf(", ");
+	}
+	printf("};\n");
+}
+
+int main(int argc, char *argv[])
+{
+	generate_crc64_table();
+	print_crc64_table();
+	return 0;
+}
-- 
cgit v1.2.3


From 52cba1a274818c3ee6299c1a1b476e7bc5037a2f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 21:59:51 -0700
Subject: signal: make force_sigsegv() void

Patch series "signal: refactor some functions", v3.

This series refactors a bunch of functions in signal.c to simplify parts
of the code.

The greatest single change is declaring the static do_sigpending() helper
as void which makes it possible to remove a bunch of unnecessary checks in
the syscalls later on.

This patch (of 17):

force_sigsegv() returned 0 unconditionally so it doesn't make sense to have
it return at all. In addition, there are no callers that check
force_sigsegv()'s return value.

Link: http://lkml.kernel.org/r/20180602103653.18181-2-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/signal.h | 2 +-
 kernel/signal.c              | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 113d1ad1ced7..e138ac16c650 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -314,7 +314,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey);
 int force_sig_ptrace_errno_trap(int errno, void __user *addr);
 
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
+extern void force_sigsegv(int sig, struct task_struct *p);
 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
diff --git a/kernel/signal.c b/kernel/signal.c
index 8d8a940422a8..8a828baa0f93 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1458,8 +1458,7 @@ send_sig(int sig, struct task_struct *p, int priv)
 	return send_sig_info(sig, __si_special(priv), p);
 }
 
-void
-force_sig(int sig, struct task_struct *p)
+void force_sig(int sig, struct task_struct *p)
 {
 	force_sig_info(sig, SEND_SIG_PRIV, p);
 }
@@ -1470,8 +1469,7 @@ force_sig(int sig, struct task_struct *p)
  * the problem was already a SIGSEGV, we'll want to
  * make sure we don't even try to deliver the signal..
  */
-int
-force_sigsegv(int sig, struct task_struct *p)
+void force_sigsegv(int sig, struct task_struct *p)
 {
 	if (sig == SIGSEGV) {
 		unsigned long flags;
@@ -1480,7 +1478,6 @@ force_sigsegv(int sig, struct task_struct *p)
 		spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	}
 	force_sig(SIGSEGV, p);
-	return 0;
 }
 
 int force_sig_fault(int sig, int code, void __user *addr
-- 
cgit v1.2.3


From 67a48a24478841525ba73ec6d64caaf079cf3b7c Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 22:00:34 -0700
Subject: signal: make unhandled_signal() return bool

unhandled_signal() already behaves like a boolean function.  Let's
actually declare it as such too.  All callers treat it as such too.

Link: http://lkml.kernel.org/r/20180602103653.18181-13-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 +-
 kernel/signal.c        | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3c5200137b24..fa23cae66758 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -287,7 +287,7 @@ static inline void disallow_signal(int sig)
 
 extern struct kmem_cache *sighand_cachep;
 
-int unhandled_signal(struct task_struct *tsk, int sig);
+extern bool unhandled_signal(struct task_struct *tsk, int sig);
 
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
diff --git a/kernel/signal.c b/kernel/signal.c
index 7eec2dba597e..c10c09fc4ec3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -505,13 +505,15 @@ flush_signal_handlers(struct task_struct *t, int force_default)
 	}
 }
 
-int unhandled_signal(struct task_struct *tsk, int sig)
+bool unhandled_signal(struct task_struct *tsk, int sig)
 {
 	void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
 	if (is_global_init(tsk))
-		return 1;
+		return true;
+
 	if (handler != SIG_IGN && handler != SIG_DFL)
-		return 0;
+		return false;
+
 	/* if ptraced, let the tracer determine */
 	return !tsk->ptrace;
 }
-- 
cgit v1.2.3


From 20ab7218d2507b2dbec9c053c2f1b96054e266b6 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 22:00:54 -0700
Subject: signal: make get_signal() return bool

make get_signal() already behaves like a boolean function.  Let's actually
declare it as such too.

Link: http://lkml.kernel.org/r/20180602103653.18181-18-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 +-
 kernel/signal.c        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index fa23cae66758..e3d9e0640e6e 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -265,7 +265,7 @@ extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-extern int get_signal(struct ksignal *ksig);
+extern bool get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
diff --git a/kernel/signal.c b/kernel/signal.c
index 9f9b4183178e..786bacc60649 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2287,7 +2287,7 @@ static int ptrace_signal(int signr, siginfo_t *info)
 	return signr;
 }
 
-int get_signal(struct ksignal *ksig)
+bool get_signal(struct ksignal *ksig)
 {
 	struct sighand_struct *sighand = current->sighand;
 	struct signal_struct *signal = current->signal;
@@ -2297,7 +2297,7 @@ int get_signal(struct ksignal *ksig)
 		task_work_run();
 
 	if (unlikely(uprobe_deny_signal()))
-		return 0;
+		return false;
 
 	/*
 	 * Do this once, we can't return to user-mode if freezing() == T.
-- 
cgit v1.2.3


From dc2c8c84def6ce450c63529e08c1db100020994e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 21 Aug 2018 22:01:52 -0700
Subject: ipc: get rid of ids->tables_initialized hack

In sysvipc we have an ids->tables_initialized regarding the rhashtable,
introduced in 0cfb6aee70bd ("ipc: optimize semget/shmget/msgget for lots
of keys")

It's there, specifically, to prevent nil pointer dereferences, from using
an uninitialized api.  Considering how rhashtable_init() can fail
(probably due to ENOMEM, if anything), this made the overall ipc
initialization capable of failure as well.  That alone is ugly, but fine,
however I've spotted a few issues regarding the semantics of
tables_initialized (however unlikely they may be):

- There is inconsistency in what we return to userspace: ipc_addid()
  returns ENOSPC which is certainly _wrong_, while ipc_obtain_object_idr()
  returns EINVAL.

- After we started using rhashtables, ipc_findkey() can return nil upon
  !tables_initialized, but the caller expects nil for when the ipc
  structure isn't found, and can therefore call into ipcget() callbacks.

Now that rhashtable initialization cannot fail, we can properly get rid of
the hack altogether.

[manfred@colorfullife.com: commit id extended to 12 digits]
Link: http://lkml.kernel.org/r/20180712185241.4017-10-manfred@colorfullife.com
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  1 -
 ipc/util.c                    | 23 ++++++++---------------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6cea726612b7..e9ccdfdb1928 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -16,7 +16,6 @@ struct user_namespace;
 struct ipc_ids {
 	int in_use;
 	unsigned short seq;
-	bool tables_initialized;
 	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
 	int max_id;
diff --git a/ipc/util.c b/ipc/util.c
index bd1863b6ed39..7c1387c9510d 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -126,7 +126,6 @@ int ipc_init_ids(struct ipc_ids *ids)
 	if (err)
 		return err;
 	idr_init(&ids->ipcs_idr);
-	ids->tables_initialized = true;
 	ids->max_id = -1;
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	ids->next_id = -1;
@@ -179,19 +178,16 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  */
 static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
-	struct kern_ipc_perm *ipcp = NULL;
+	struct kern_ipc_perm *ipcp;
 
-	if (likely(ids->tables_initialized))
-		ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+	ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
 					      ipc_kht_params);
+	if (!ipcp)
+		return NULL;
 
-	if (ipcp) {
-		rcu_read_lock();
-		ipc_lock_object(ipcp);
-		return ipcp;
-	}
-
-	return NULL;
+	rcu_read_lock();
+	ipc_lock_object(ipcp);
+	return ipcp;
 }
 
 /*
@@ -269,7 +265,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
 	if (limit > IPCMNI)
 		limit = IPCMNI;
 
-	if (!ids->tables_initialized || ids->in_use >= limit)
+	if (ids->in_use >= limit)
 		return -ENOSPC;
 
 	idr_preload(GFP_KERNEL);
@@ -578,9 +574,6 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
 	struct kern_ipc_perm *out;
 	int lid = ipcid_to_idx(id);
 
-	if (unlikely(!ids->tables_initialized))
-		return ERR_PTR(-EINVAL);
-
 	out = idr_find(&ids->ipcs_idr, lid);
 	if (!out)
 		return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From 27c331a174614208d0b539019583990967ad9479 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Tue, 21 Aug 2018 22:02:00 -0700
Subject: ipc/util.c: further variable name cleanups

The varable names got a mess, thus standardize them again:

id: user space id. Called semid, shmid, msgid if the type is known.
    Most functions use "id" already.
idx: "index" for the idr lookup
    Right now, some functions use lid, ipc_addid() already uses idx as
    the variable name.
seq: sequence number, to avoid quick collisions of the user space id
key: user space key, used for the rhash tree

Link: http://lkml.kernel.org/r/20180712185241.4017-12-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  2 +-
 ipc/msg.c                     |  6 +++---
 ipc/sem.c                     |  6 +++---
 ipc/shm.c                     |  4 ++--
 ipc/util.c                    | 26 +++++++++++++-------------
 ipc/util.h                    | 10 +++++-----
 6 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e9ccdfdb1928..6ab8c1bada3f 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -18,7 +18,7 @@ struct ipc_ids {
 	unsigned short seq;
 	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
-	int max_id;
+	int max_idx;
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	int next_id;
 #endif
diff --git a/ipc/msg.c b/ipc/msg.c
index ee78dad90460..883642cf2b27 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -456,7 +456,7 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 			 int cmd, struct msginfo *msginfo)
 {
 	int err;
-	int max_id;
+	int max_idx;
 
 	/*
 	 * We must not return kernel stack data.
@@ -483,9 +483,9 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_id = ipc_get_maxid(&msg_ids(ns));
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
 	up_read(&msg_ids(ns).rwsem);
-	return (max_id < 0) ? 0 : max_id;
+	return (max_idx < 0) ? 0 : max_idx;
 }
 
 static int msgctl_stat(struct ipc_namespace *ns, int msqid,
diff --git a/ipc/sem.c b/ipc/sem.c
index e4df102f3404..26f8e37fcdcb 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1294,7 +1294,7 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
 			 int cmd, void __user *p)
 {
 	struct seminfo seminfo;
-	int max_id;
+	int max_idx;
 	int err;
 
 	err = security_sem_semctl(NULL, cmd);
@@ -1318,11 +1318,11 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
 		seminfo.semusz = SEMUSZ;
 		seminfo.semaem = SEMAEM;
 	}
-	max_id = ipc_get_maxid(&sem_ids(ns));
+	max_idx = ipc_get_maxidx(&sem_ids(ns));
 	up_read(&sem_ids(ns).rwsem);
 	if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
 		return -EFAULT;
-	return (max_id < 0) ? 0 : max_id;
+	return (max_idx < 0) ? 0 : max_idx;
 }
 
 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
diff --git a/ipc/shm.c b/ipc/shm.c
index c4fb359f1e53..b0eb3757ab89 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -948,7 +948,7 @@ static int shmctl_ipc_info(struct ipc_namespace *ns,
 		shminfo->shmall = ns->shm_ctlall;
 		shminfo->shmmin = SHMMIN;
 		down_read(&shm_ids(ns).rwsem);
-		err = ipc_get_maxid(&shm_ids(ns));
+		err = ipc_get_maxidx(&shm_ids(ns));
 		up_read(&shm_ids(ns).rwsem);
 		if (err < 0)
 			err = 0;
@@ -968,7 +968,7 @@ static int shmctl_shm_info(struct ipc_namespace *ns,
 		shm_info->shm_tot = ns->shm_tot;
 		shm_info->swap_attempts = 0;
 		shm_info->swap_successes = 0;
-		err = ipc_get_maxid(&shm_ids(ns));
+		err = ipc_get_maxidx(&shm_ids(ns));
 		up_read(&shm_ids(ns).rwsem);
 		if (err < 0)
 			err = 0;
diff --git a/ipc/util.c b/ipc/util.c
index a2aae8c1410d..6f30ba80ca15 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -119,7 +119,7 @@ void ipc_init_ids(struct ipc_ids *ids)
 	init_rwsem(&ids->rwsem);
 	rhashtable_init(&ids->key_ht, &ipc_kht_params);
 	idr_init(&ids->ipcs_idr);
-	ids->max_id = -1;
+	ids->max_idx = -1;
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	ids->next_id = -1;
 #endif
@@ -237,7 +237,7 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
  * @limit: limit for the number of used ids
  *
  * Add an entry 'new' to the ipc ids idr. The permissions object is
- * initialised and the first free entry is set up and the id assigned
+ * initialised and the first free entry is set up and the index assigned
  * is returned. The 'new' entry is returned in a locked state on success.
  *
  * On failure the entry is not locked and a negative err-code is returned.
@@ -291,8 +291,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
 	}
 
 	ids->in_use++;
-	if (idx > ids->max_id)
-		ids->max_id = idx;
+	if (idx > ids->max_idx)
+		ids->max_idx = idx;
 	return idx;
 }
 
@@ -431,20 +431,20 @@ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
  */
 void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
-	int lid = ipcid_to_idx(ipcp->id);
+	int idx = ipcid_to_idx(ipcp->id);
 
-	idr_remove(&ids->ipcs_idr, lid);
+	idr_remove(&ids->ipcs_idr, idx);
 	ipc_kht_remove(ids, ipcp);
 	ids->in_use--;
 	ipcp->deleted = true;
 
-	if (unlikely(lid == ids->max_id)) {
+	if (unlikely(idx == ids->max_idx)) {
 		do {
-			lid--;
-			if (lid == -1)
+			idx--;
+			if (idx == -1)
 				break;
-		} while (!idr_find(&ids->ipcs_idr, lid));
-		ids->max_id = lid;
+		} while (!idr_find(&ids->ipcs_idr, idx));
+		ids->max_idx = idx;
 	}
 }
 
@@ -564,9 +564,9 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
 struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
 {
 	struct kern_ipc_perm *out;
-	int lid = ipcid_to_idx(id);
+	int idx = ipcid_to_idx(id);
 
-	out = idr_find(&ids->ipcs_idr, lid);
+	out = idr_find(&ids->ipcs_idr, idx);
 	if (!out)
 		return ERR_PTR(-EINVAL);
 
diff --git a/ipc/util.h b/ipc/util.h
index 6c5c77c61f85..e74564fe3375 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -113,12 +113,12 @@ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *);
 int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
 
 /**
- * ipc_get_maxid - get the last assigned id
+ * ipc_get_maxidx - get the highest assigned index
  * @ids: ipc identifier set
  *
  * Called with ipc_ids.rwsem held for reading.
  */
-static inline int ipc_get_maxid(struct ipc_ids *ids)
+static inline int ipc_get_maxidx(struct ipc_ids *ids)
 {
 	if (ids->in_use == 0)
 		return -1;
@@ -126,7 +126,7 @@ static inline int ipc_get_maxid(struct ipc_ids *ids)
 	if (ids->in_use == IPCMNI)
 		return IPCMNI - 1;
 
-	return ids->max_id;
+	return ids->max_idx;
 }
 
 /*
@@ -172,9 +172,9 @@ extern struct msg_msg *load_msg(const void __user *src, size_t len);
 extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
 extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
 
-static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int id)
 {
-	return uid / SEQ_MULTIPLIER != ipcp->seq;
+	return ipcid_to_seqx(id) != ipcp->seq;
 }
 
 static inline void ipc_lock_object(struct kern_ipc_perm *perm)
-- 
cgit v1.2.3


From 9abdda5ddab8a899ca8c4b859ef0a7710f40e0dd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 16 Aug 2018 12:05:59 -0400
Subject: sunrpc: Extract target name into svc_cred

NFSv4.0 callback needs to know the GSS target name the client used
when it established its lease. That information is available from
the GSS context created by gssproxy. Make it available in each
svc_cred.

Note this will also give us access to the real target service
principal name (which is typically "nfs", but spec does not require
that).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c                  |  7 ++--
 include/linux/sunrpc/svcauth.h       |  3 ++
 net/sunrpc/auth_gss/gss_rpc_upcall.c | 70 +++++++++++++++++++++++-------------
 3 files changed, 53 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c8e4a8f42bbe..3322fe35a8fe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1979,8 +1979,10 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 	target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL);
 	target->cr_raw_principal = kstrdup(source->cr_raw_principal,
 								GFP_KERNEL);
-	if ((source->cr_principal && ! target->cr_principal) ||
-	    (source->cr_raw_principal && ! target->cr_raw_principal))
+	target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL);
+	if ((source->cr_principal && !target->cr_principal) ||
+	    (source->cr_raw_principal && !target->cr_raw_principal) ||
+	    (source->cr_targ_princ && !target->cr_targ_princ))
 		return -ENOMEM;
 
 	target->cr_flavor = source->cr_flavor;
@@ -2057,6 +2059,7 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 		|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
 		|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
 		return false;
+	/* XXX: check that cr_targ_princ fields match ? */
 	if (cr1->cr_principal == cr2->cr_principal)
 		return true;
 	if (!cr1->cr_principal || !cr2->cr_principal)
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 7c3656505847..04e404a07882 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -31,6 +31,7 @@ struct svc_cred {
 	/* name of form servicetype@hostname, passed down by
 	 * rpc.svcgssd, or computed from the above: */
 	char			*cr_principal;
+	char			*cr_targ_princ;
 	struct gss_api_mech	*cr_gss_mech;
 };
 
@@ -39,6 +40,7 @@ static inline void init_svc_cred(struct svc_cred *cred)
 	cred->cr_group_info = NULL;
 	cred->cr_raw_principal = NULL;
 	cred->cr_principal = NULL;
+	cred->cr_targ_princ = NULL;
 	cred->cr_gss_mech = NULL;
 }
 
@@ -48,6 +50,7 @@ static inline void free_svc_cred(struct svc_cred *cred)
 		put_group_info(cred->cr_group_info);
 	kfree(cred->cr_raw_principal);
 	kfree(cred->cr_principal);
+	kfree(cred->cr_targ_princ);
 	gss_mech_put(cred->cr_gss_mech);
 	init_svc_cred(cred);
 }
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 1c7c49dbf8ba..73dcda060335 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -234,6 +234,35 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
 	return 0;
 }
 
+static char *gssp_stringify(struct xdr_netobj *netobj)
+{
+	return kstrndup(netobj->data, netobj->len, GFP_KERNEL);
+}
+
+static void gssp_hostbased_service(char **principal)
+{
+	char *c;
+
+	if (!*principal)
+		return;
+
+	/* terminate and remove realm part */
+	c = strchr(*principal, '@');
+	if (c) {
+		*c = '\0';
+
+		/* change service-hostname delimiter */
+		c = strchr(*principal, '/');
+		if (c)
+			*c = '@';
+	}
+	if (!c) {
+		/* not a service principal */
+		kfree(*principal);
+		*principal = NULL;
+	}
+}
+
 /*
  * Public functions
  */
@@ -262,6 +291,7 @@ int gssp_accept_sec_context_upcall(struct net *net,
 		 */
 		.exported_context_token.len = GSSX_max_output_handle_sz,
 		.mech.len = GSS_OID_MAX_LEN,
+		.targ_name.display_name.len = GSSX_max_princ_sz,
 		.src_name.display_name.len = GSSX_max_princ_sz
 	};
 	struct gssx_res_accept_sec_context res = {
@@ -275,6 +305,7 @@ int gssp_accept_sec_context_upcall(struct net *net,
 		.rpc_cred = NULL, /* FIXME ? */
 	};
 	struct xdr_netobj client_name = { 0 , NULL };
+	struct xdr_netobj target_name = { 0, NULL };
 	int ret;
 
 	if (data->in_handle.len != 0)
@@ -285,8 +316,6 @@ int gssp_accept_sec_context_upcall(struct net *net,
 	if (ret)
 		return ret;
 
-	/* use nfs/ for targ_name ? */
-
 	ret = gssp_call(net, &msg);
 
 	gssp_free_receive_pages(&arg);
@@ -304,6 +333,7 @@ int gssp_accept_sec_context_upcall(struct net *net,
 			kfree(rctxh.mech.data);
 		}
 		client_name = rctxh.src_name.display_name;
+		target_name = rctxh.targ_name.display_name;
 	}
 
 	if (res.options.count == 1) {
@@ -325,32 +355,22 @@ int gssp_accept_sec_context_upcall(struct net *net,
 	}
 
 	/* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
-	if (data->found_creds && client_name.data != NULL) {
-		char *c;
-
-		data->creds.cr_raw_principal = kstrndup(client_name.data,
-						client_name.len, GFP_KERNEL);
-
-		data->creds.cr_principal = kstrndup(client_name.data,
-						client_name.len, GFP_KERNEL);
-		if (data->creds.cr_principal) {
-			/* terminate and remove realm part */
-			c = strchr(data->creds.cr_principal, '@');
-			if (c) {
-				*c = '\0';
-
-				/* change service-hostname delimiter */
-				c = strchr(data->creds.cr_principal, '/');
-				if (c) *c = '@';
-			}
-			if (!c) {
-				/* not a service principal */
-				kfree(data->creds.cr_principal);
-				data->creds.cr_principal = NULL;
-			}
+	if (data->found_creds) {
+		if (client_name.data) {
+			data->creds.cr_raw_principal =
+					gssp_stringify(&client_name);
+			data->creds.cr_principal =
+					gssp_stringify(&client_name);
+			gssp_hostbased_service(&data->creds.cr_principal);
+		}
+		if (target_name.data) {
+			data->creds.cr_targ_princ =
+					gssp_stringify(&target_name);
+			gssp_hostbased_service(&data->creds.cr_targ_princ);
 		}
 	}
 	kfree(client_name.data);
+	kfree(target_name.data);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 815f0ddb346c196018d4d8f8f55c12b83da1de3f Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 22 Aug 2018 16:37:24 -0700
Subject: include/linux/compiler*.h: make compiler-*.h mutually exclusive

Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
recently exposed a brittle part of the build for supporting non-gcc
compilers.

Both Clang and ICC define __GNUC__, __GNUC_MINOR__, and
__GNUC_PATCHLEVEL__ for quick compatibility with code bases that haven't
added compiler specific checks for __clang__ or __INTEL_COMPILER.

This is brittle, as they happened to get compatibility by posing as a
certain version of GCC.  This broke when upgrading the minimal version
of GCC required to build the kernel, to a version above what ICC and
Clang claim to be.

Rather than always including compiler-gcc.h then undefining or
redefining macros in compiler-intel.h or compiler-clang.h, let's
separate out the compiler specific macro definitions into mutually
exclusive headers, do more proper compiler detection, and keep shared
definitions in compiler_types.h.

Fixes: cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
Reported-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Suggested-by: Eli Friedman <efriedma@codeaurora.org>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/asm-offsets.c     |  13 +--
 drivers/gpu/drm/i915/i915_utils.h |   2 +-
 drivers/watchdog/kempld_wdt.c     |   5 -
 include/linux/compiler-clang.h    |  20 ++--
 include/linux/compiler-gcc.h      |  88 --------------
 include/linux/compiler-intel.h    |  13 +--
 include/linux/compiler_types.h    | 238 ++++++++++++++++++--------------------
 mm/ksm.c                          |   4 +-
 mm/migrate.c                      |   3 +-
 9 files changed, 133 insertions(+), 253 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 974d8d7d1bcd..3968d6c22455 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -38,25 +38,14 @@
 #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32
 #endif
 /*
- * GCC 3.0, 3.1: general bad code generation.
- * GCC 3.2.0: incorrect function argument offset calculation.
- * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
- *            (http://gcc.gnu.org/PR8896) and incorrect structure
- *	      initialisation in fs/jffs2/erase.c
  * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854
  *	      miscompiles find_get_entry(), and can result in EXT3 and EXT4
  *	      filesystem corruption (possibly other FS too).
  */
-#ifdef __GNUC__
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-#error Your compiler is too buggy; it is known to miscompile kernels.
-#error    Known good compilers: 3.3, 4.x
-#endif
-#if GCC_VERSION >= 40800 && GCC_VERSION < 40803
+#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803
 #error Your compiler is too buggy; it is known to miscompile kernels
 #error and result in filesystem corruption and oopses.
 #endif
-#endif
 
 int main(void)
 {
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 00165ad55fb3..395dd2511568 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -43,7 +43,7 @@
 #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
 			     __stringify(x), (long)(x))
 
-#if GCC_VERSION >= 70000
+#if defined(GCC_VERSION) && GCC_VERSION >= 70000
 #define add_overflows(A, B) \
 	__builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0)
 #else
diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c
index 2f3b049ea301..e268add43010 100644
--- a/drivers/watchdog/kempld_wdt.c
+++ b/drivers/watchdog/kempld_wdt.c
@@ -146,12 +146,7 @@ static int kempld_wdt_set_stage_timeout(struct kempld_wdt_data *wdt_data,
 	u32 remainder;
 	u8 stage_cfg;
 
-#if GCC_VERSION < 40400
-	/* work around a bug compiling do_div() */
-	prescaler = READ_ONCE(kempld_prescaler[PRESCALER_21]);
-#else
 	prescaler = kempld_prescaler[PRESCALER_21];
-#endif
 
 	if (!stage)
 		return -EINVAL;
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 7087446c24c8..b1ce500fe8b3 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -6,11 +6,7 @@
 /* Some compiler specific definitions are overwritten here
  * for Clang compiler
  */
-
-#ifdef uninitialized_var
-#undef uninitialized_var
 #define uninitialized_var(x) x = *(&(x))
-#endif
 
 /* same as gcc, this was present in clang-2.6 so we can assume it works
  * with any version that can compile the kernel
@@ -25,14 +21,8 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#undef __no_sanitize_address
 #define __no_sanitize_address __attribute__((no_sanitize("address")))
 
-/* Clang doesn't have a way to turn it off per-function, yet. */
-#ifdef __noretpoline
-#undef __noretpoline
-#endif
-
 /*
  * Not all versions of clang implement the the type-generic versions
  * of the builtin overflow checkers. Fortunately, clang implements
@@ -40,9 +30,17 @@
  * checks. Unfortunately, we don't know which version of gcc clang
  * pretends to be, so the macro may or may not be defined.
  */
-#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
 #if __has_builtin(__builtin_mul_overflow) && \
     __has_builtin(__builtin_add_overflow) && \
     __has_builtin(__builtin_sub_overflow)
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
+
+/* The following are for compatibility with GCC, from compiler-gcc.h,
+ * and may be redefined here because they should not be shared with other
+ * compilers, like ICC.
+ */
+#define barrier() __asm__ __volatile__("" : : : "memory")
+#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#define __assume_aligned(a, ...)	\
+	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 250b9b7cfd60..763bbad1e258 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -75,48 +75,6 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
-/*
- * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
- * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
- * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
- * defined so the gnu89 semantics are the default.
- */
-#ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline	__attribute__((gnu_inline))
-#else
-# define __gnu_inline
-#endif
-
-/*
- * Force always-inline if the user requests it so via the .config,
- * or if gcc is too old.
- * GCC does not warn about unused static inline functions for
- * -Wunused-function.  This turns out to avoid the need for complex #ifdef
- * directives.  Suppress the warning in clang as well by using "unused"
- * function attribute, which is redundant but not harmful for gcc.
- * Prefer gnu_inline, so that extern inline functions do not emit an
- * externally visible function. This makes extern inline behave as per gnu89
- * semantics rather than c99. This prevents multiple symbol definition errors
- * of extern inline functions at link time.
- * A lot of inline functions can cause havoc with function tracing.
- */
-#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
-    !defined(CONFIG_OPTIMIZE_INLINING)
-#define inline \
-	inline __attribute__((always_inline, unused)) notrace __gnu_inline
-#else
-#define inline inline		__attribute__((unused)) notrace __gnu_inline
-#endif
-
-#define __inline__ inline
-#define __inline inline
-#define __always_inline	inline __attribute__((always_inline))
-#define  noinline	__attribute__((noinline))
-
-#define __packed	__attribute__((packed))
-#define __weak		__attribute__((weak))
-#define __alias(symbol)	__attribute__((alias(#symbol)))
-
 #ifdef RETPOLINE
 #define __noretpoline __attribute__((indirect_branch("keep")))
 #endif
@@ -135,55 +93,9 @@
  */
 #define __naked		__attribute__((naked)) noinline __noclone notrace
 
-#define __noreturn	__attribute__((noreturn))
-
-/*
- * From the GCC manual:
- *
- * Many functions have no effects except the return value and their
- * return value depends only on the parameters and/or global
- * variables.  Such a function can be subject to common subexpression
- * elimination and loop optimization just as an arithmetic operator
- * would be.
- * [...]
- */
-#define __pure			__attribute__((pure))
-#define __aligned(x)		__attribute__((aligned(x)))
-#define __aligned_largest	__attribute__((aligned))
-#define __printf(a, b)		__attribute__((format(printf, a, b)))
-#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
-#define __attribute_const__	__attribute__((__const__))
-#define __maybe_unused		__attribute__((unused))
-#define __always_unused		__attribute__((unused))
-#define __mode(x)               __attribute__((mode(x)))
-
-#define __must_check		__attribute__((warn_unused_result))
-#define __malloc		__attribute__((__malloc__))
-
-#define __used			__attribute__((__used__))
-#define __compiler_offsetof(a, b)					\
-	__builtin_offsetof(a, b)
-
-/* Mark functions as cold. gcc will assume any path leading to a call
- * to them will be unlikely.  This means a lot of manual unlikely()s
- * are unnecessary now for any paths leading to the usual suspects
- * like BUG(), printk(), panic() etc. [but let's keep them for now for
- * older compilers]
- *
- * Early snapshots of gcc 4.3 don't support this and we can't detect this
- * in the preprocessor, but we can live with this because they're unreleased.
- * Maketime probing would be overkill here.
- *
- * gcc also has a __attribute__((__hot__)) to move hot functions into
- * a special section, but I don't see any sense in this right now in
- * the kernel context
- */
-#define __cold			__attribute__((__cold__))
-
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
 #define __optimize(level)	__attribute__((__optimize__(level)))
-#define __nostackprotector	__optimize("no-stack-protector")
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 547cdc920a3c..4c7f9befa9f6 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -14,10 +14,6 @@
 /* Intel ECC compiler doesn't support gcc specific asm stmts.
  * It uses intrinsics to do the equivalent things.
  */
-#undef barrier
-#undef barrier_data
-#undef RELOC_HIDE
-#undef OPTIMIZER_HIDE_VAR
 
 #define barrier() __memory_barrier()
 #define barrier_data(ptr) barrier()
@@ -38,13 +34,12 @@
 
 #endif
 
-#ifndef __HAVE_BUILTIN_BSWAP16__
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
 #define __builtin_bswap16 _bswap16
-#endif
 
-/*
- * icc defines __GNUC__, but does not implement the builtin overflow checkers.
+/* The following are for compatibility with GCC, from compiler-gcc.h,
+ * and may be redefined here because they should not be shared with other
+ * compilers, like clang.
  */
-#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+#define __visible	__attribute__((externally_visible))
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index fbf337933fd8..90479a0f3986 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -54,32 +54,20 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
-#ifdef __GNUC__
-#include <linux/compiler-gcc.h>
-#endif
-
-#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
-#define notrace __attribute__((hotpatch(0,0)))
-#else
-#define notrace __attribute__((no_instrument_function))
-#endif
-
-/* Intel compiler defines __GNUC__. So we will overwrite implementations
- * coming from above header files here
- */
-#ifdef __INTEL_COMPILER
-# include <linux/compiler-intel.h>
-#endif
-
-/* Clang compiler defines __GNUC__. So we will overwrite implementations
- * coming from above header files here
- */
+/* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
+#elif defined(__INTEL_COMPILER)
+#include <linux/compiler-intel.h>
+#elif defined(__GNUC__)
+/* The above compilers also define __GNUC__, so order is important here. */
+#include <linux/compiler-gcc.h>
+#else
+#error "Unknown compiler"
 #endif
 
 /*
- * Generic compiler-dependent macros required for kernel
+ * Generic compiler-independent macros required for kernel
  * build go below this comment. Actual compiler/compiler version
  * specific implementations come from the above header files
  */
@@ -106,93 +94,19 @@ struct ftrace_likely_data {
 	unsigned long			constant;
 };
 
-#endif /* __KERNEL__ */
-
-#endif /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
-
 /* Don't. Just don't. */
 #define __deprecated
 #define __deprecated_for_modules
 
-#ifndef __must_check
-#define __must_check
-#endif
-
-#ifndef CONFIG_ENABLE_MUST_CHECK
-#undef __must_check
-#define __must_check
-#endif
-
-#ifndef __malloc
-#define __malloc
-#endif
-
-/*
- * Allow us to avoid 'defined but not used' warnings on functions and data,
- * as well as force them to be emitted to the assembly file.
- *
- * As of gcc 3.4, static functions that are not marked with attribute((used))
- * may be elided from the assembly file.  As of gcc 3.4, static data not so
- * marked will not be elided, but this may change in a future gcc version.
- *
- * NOTE: Because distributions shipped with a backported unit-at-a-time
- * compiler in gcc 3.3, we must define __used to be __attribute__((used))
- * for gcc >=3.3 instead of 3.4.
- *
- * In prior versions of gcc, such functions and data would be emitted, but
- * would be warned about except with attribute((unused)).
- *
- * Mark functions that are referenced only in inline assembly as __used so
- * the code is emitted even though it appears to be unreferenced.
- */
-#ifndef __used
-# define __used			/* unimplemented */
-#endif
-
-#ifndef __maybe_unused
-# define __maybe_unused		/* unimplemented */
-#endif
-
-#ifndef __always_unused
-# define __always_unused	/* unimplemented */
-#endif
-
-#ifndef noinline
-#define noinline
-#endif
-
-/*
- * Rather then using noinline to prevent stack consumption, use
- * noinline_for_stack instead.  For documentation reasons.
- */
-#define noinline_for_stack noinline
-
-#ifndef __always_inline
-#define __always_inline inline
-#endif
-
 #endif /* __KERNEL__ */
 
+#endif /* __ASSEMBLY__ */
+
 /*
- * From the GCC manual:
- *
- * Many functions do not examine any values except their arguments,
- * and have no effects except the return value.  Basically this is
- * just slightly more strict class than the `pure' attribute above,
- * since function is not allowed to read global memory.
- *
- * Note that a function that has pointer arguments and examines the
- * data pointed to must _not_ be declared `const'.  Likewise, a
- * function that calls a non-`const' function usually must not be
- * `const'.  It does not make sense for a `const' function to return
- * `void'.
+ * The below symbols may be defined for one or more, but not ALL, of the above
+ * compilers. We don't consider that to be an error, so set them to nothing.
+ * For example, some of them are for compiler specific plugins.
  */
-#ifndef __attribute_const__
-# define __attribute_const__	/* unimplemented */
-#endif
-
 #ifndef __designated_init
 # define __designated_init
 #endif
@@ -214,28 +128,10 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
-/*
- * Tell gcc if a function is cold. The compiler will assume any path
- * directly leading to the call is unlikely.
- */
-
-#ifndef __cold
-#define __cold
-#endif
-
-/* Simple shorthand for a section definition */
-#ifndef __section
-# define __section(S) __attribute__ ((__section__(#S)))
-#endif
-
 #ifndef __visible
 #define __visible
 #endif
 
-#ifndef __nostackprotector
-# define __nostackprotector
-#endif
-
 /*
  * Assume alignment of return value.
  */
@@ -243,17 +139,23 @@ struct ftrace_likely_data {
 #define __assume_aligned(a, ...)
 #endif
 
-
 /* Are two types/vars the same type (ignoring qualifiers)? */
-#ifndef __same_type
-# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-#endif
+#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
 /* Is this type a native word size -- useful for atomic operations */
-#ifndef __native_word
-# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+#define __native_word(t) \
+	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
+	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+
+#ifndef __attribute_const__
+#define __attribute_const__	__attribute__((__const__))
 #endif
 
+#ifndef __noclone
+#define __noclone
+#endif
+
+/* Helpers for emitting diagnostics in pragmas. */
 #ifndef __diag
 #define __diag(string)
 #endif
@@ -272,4 +174,92 @@ struct ftrace_likely_data {
 #define __diag_error(compiler, version, option, comment) \
 	__diag_ ## compiler(version, error, option)
 
+/*
+ * From the GCC manual:
+ *
+ * Many functions have no effects except the return value and their
+ * return value depends only on the parameters and/or global
+ * variables.  Such a function can be subject to common subexpression
+ * elimination and loop optimization just as an arithmetic operator
+ * would be.
+ * [...]
+ */
+#define __pure			__attribute__((pure))
+#define __aligned(x)		__attribute__((aligned(x)))
+#define __aligned_largest	__attribute__((aligned))
+#define __printf(a, b)		__attribute__((format(printf, a, b)))
+#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
+#define __maybe_unused		__attribute__((unused))
+#define __always_unused		__attribute__((unused))
+#define __mode(x)		__attribute__((mode(x)))
+#define __malloc		__attribute__((__malloc__))
+#define __used			__attribute__((__used__))
+#define __noreturn		__attribute__((noreturn))
+#define __packed		__attribute__((packed))
+#define __weak			__attribute__((weak))
+#define __alias(symbol)		__attribute__((alias(#symbol)))
+#define __cold			__attribute__((cold))
+#define __section(S)		__attribute__((__section__(#S)))
+
+
+#ifdef CONFIG_ENABLE_MUST_CHECK
+#define __must_check		__attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
+
+#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
+#define notrace			__attribute__((hotpatch(0, 0)))
+#else
+#define notrace			__attribute__((no_instrument_function))
+#endif
+
+#define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
+
+/*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
+ * Force always-inline if the user requests it so via the .config.
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well by using "unused"
+ * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
+ */
+#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
+	!defined(CONFIG_OPTIMIZE_INLINING)
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
+#else
+#define inline inline	__attribute__((unused)) notrace __gnu_inline
+#endif
+
+#define __inline__ inline
+#define __inline inline
+#define noinline	__attribute__((noinline))
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+#endif
+
+/*
+ * Rather then using noinline to prevent stack consumption, use
+ * noinline_for_stack instead.  For documentation reasons.
+ */
+#define noinline_for_stack noinline
+
 #endif /* __LINUX_COMPILER_TYPES_H */
diff --git a/mm/ksm.c b/mm/ksm.c
index 1bd514c9e5d0..5b0894b45ee5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -652,9 +652,9 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
 	 * list_head to stay clear from the rb_parent_color union
 	 * (aligned and different than any node) and also different
 	 * from &migrate_nodes. This will verify that future list.h changes
-	 * don't break STABLE_NODE_DUP_HEAD.
+	 * don't break STABLE_NODE_DUP_HEAD. Only recent gcc can handle it.
 	 */
-#if GCC_VERSION >= 40903 /* only recent gcc can handle it */
+#if defined(GCC_VERSION) && GCC_VERSION >= 40903
 	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
 	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
 #endif
diff --git a/mm/migrate.c b/mm/migrate.c
index 4a83268e23c2..c27e97b5b69d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1131,7 +1131,8 @@ out:
  * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move().  Work
  * around it.
  */
-#if (GCC_VERSION >= 40700 && GCC_VERSION < 40900) && defined(CONFIG_ARM)
+#if defined(CONFIG_ARM) && \
+	defined(GCC_VERSION) && GCC_VERSION < 40900 && GCC_VERSION >= 40700
 #define ICE_noinline noinline
 #else
 #define ICE_noinline
-- 
cgit v1.2.3


From 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Thu, 23 Aug 2018 17:00:35 -0700
Subject: namei: allow restricted O_CREAT of FIFOs and regular files

Disallows open of FIFOs or regular files not owned by the user in world
writable sticky directories, unless the owner is the same as that of the
directory or the file is opened without the O_CREAT flag.  The purpose
is to make data spoofing attacks harder.  This protection can be turned
on and off separately for FIFOs and regular files via sysctl, just like
the symlinks/hardlinks protection.  This patch is based on Openwall's
"HARDEN_FIFO" feature by Solar Designer.

This is a brief list of old vulnerabilities that could have been prevented
by this feature, some of them even allow for privilege escalation:

CVE-2000-1134
CVE-2007-3852
CVE-2008-0525
CVE-2009-0416
CVE-2011-4834
CVE-2015-1838
CVE-2015-7442
CVE-2016-7489

This list is not meant to be complete.  It's difficult to track down all
vulnerabilities of this kind because they were often reported without any
mention of this particular attack vector.  In fact, before
hardlinks/symlinks restrictions, fifos/regular files weren't the favorite
vehicle to exploit them.

[s.mesoraca16@gmail.com: fix bug reported by Dan Carpenter]
  Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda
  Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gmail.com
[keescook@chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future]
[keescook@chromium.org: adjust commit subjet]
Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast
Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Solar Designer <solar@openwall.com>
Suggested-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/fs.txt | 36 ++++++++++++++++++++++++++++++
 fs/namei.c                  | 53 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/fs.h          |  2 ++
 kernel/sysctl.c             | 18 +++++++++++++++
 4 files changed, 106 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 6c00c1e2743f..819caf8ca05f 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -34,7 +34,9 @@ Currently, these files are in /proc/sys/fs:
 - overflowgid
 - pipe-user-pages-hard
 - pipe-user-pages-soft
+- protected_fifos
 - protected_hardlinks
+- protected_regular
 - protected_symlinks
 - suid_dumpable
 - super-max
@@ -182,6 +184,24 @@ applied.
 
 ==============================================================
 
+protected_fifos:
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+==============================================================
+
 protected_hardlinks:
 
 A long-standing class of security issues is the hardlink-based
@@ -202,6 +222,22 @@ This protection is based on the restrictions in Openwall and grsecurity.
 
 ==============================================================
 
+protected_regular:
+
+This protection is similar to protected_fifos, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow O_CREAT open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+==============================================================
+
 protected_symlinks:
 
 A long-standing class of security issues is the symlink-based
diff --git a/fs/namei.c b/fs/namei.c
index ae6aa9ae757c..0cab6494978c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -887,6 +887,8 @@ static inline void put_link(struct nameidata *nd)
 
 int sysctl_protected_symlinks __read_mostly = 0;
 int sysctl_protected_hardlinks __read_mostly = 0;
+int sysctl_protected_fifos __read_mostly;
+int sysctl_protected_regular __read_mostly;
 
 /**
  * may_follow_link - Check symlink following for unsafe situations
@@ -1003,6 +1005,45 @@ static int may_linkat(struct path *link)
 	return -EPERM;
 }
 
+/**
+ * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
+ *			  should be allowed, or not, on files that already
+ *			  exist.
+ * @dir: the sticky parent directory
+ * @inode: the inode of the file to open
+ *
+ * Block an O_CREAT open of a FIFO (or a regular file) when:
+ *   - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
+ *   - the file already exists
+ *   - we are in a sticky directory
+ *   - we don't own the file
+ *   - the owner of the directory doesn't own the file
+ *   - the directory is world writable
+ * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
+ * the directory doesn't have to be world writable: being group writable will
+ * be enough.
+ *
+ * Returns 0 if the open is allowed, -ve on error.
+ */
+static int may_create_in_sticky(struct dentry * const dir,
+				struct inode * const inode)
+{
+	if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
+	    (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
+	    likely(!(dir->d_inode->i_mode & S_ISVTX)) ||
+	    uid_eq(inode->i_uid, dir->d_inode->i_uid) ||
+	    uid_eq(current_fsuid(), inode->i_uid))
+		return 0;
+
+	if (likely(dir->d_inode->i_mode & 0002) ||
+	    (dir->d_inode->i_mode & 0020 &&
+	     ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
+	      (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
+		return -EACCES;
+	}
+	return 0;
+}
+
 static __always_inline
 const char *get_link(struct nameidata *nd)
 {
@@ -3348,9 +3389,15 @@ finish_open:
 	if (error)
 		return error;
 	audit_inode(nd->name, nd->path.dentry, 0);
-	error = -EISDIR;
-	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
-		goto out;
+	if (open_flag & O_CREAT) {
+		error = -EISDIR;
+		if (d_is_dir(nd->path.dentry))
+			goto out;
+		error = may_create_in_sticky(dir,
+					     d_backing_inode(nd->path.dentry));
+		if (unlikely(error))
+			goto out;
+	}
 	error = -ENOTDIR;
 	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
 		goto out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5710541183b..33322702c910 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -74,6 +74,8 @@ extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
 extern int sysctl_protected_symlinks;
 extern int sysctl_protected_hardlinks;
+extern int sysctl_protected_fifos;
+extern int sysctl_protected_regular;
 
 typedef __kernel_rwf_t rwf_t;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71ceb6c13c1a..cc02050fd0c4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1807,6 +1807,24 @@ static struct ctl_table fs_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "protected_fifos",
+		.data		= &sysctl_protected_fifos,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "protected_regular",
+		.data		= &sysctl_protected_regular,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two,
+	},
 	{
 		.procname	= "suid_dumpable",
 		.data		= &suid_dumpable,
-- 
cgit v1.2.3


From d4ae9916ea2947341180d2b538f48875ff393a86 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 23 Aug 2018 17:00:42 -0700
Subject: mm: soft-offline: close the race against page allocation

A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.  This is
undesirable because soft-offline (which is about corrected error) is
less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good
reason like "system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

[akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment]
Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <zy.zhengyi@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  5 +++++
 include/linux/swapops.h    | 10 ----------
 mm/memory-failure.c        | 26 +++++++++++++++++++++-----
 mm/migrate.c               |  2 +-
 mm/page_alloc.c            | 30 ++++++++++++++++++++++++++++++
 5 files changed, 57 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 901943e4754b..74bee8cecf4c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+extern bool set_hwpoison_free_buddy_page(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison)
+static inline bool set_hwpoison_free_buddy_page(struct page *page)
+{
+	return 0;
+}
 #define __PG_HWPOISON 0
 #endif
 
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 1d3877c39a00..568a3553d918 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -340,11 +340,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return TestSetPageHWPoison(page);
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 	atomic_long_inc(&num_poisoned_pages);
@@ -367,11 +362,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
 	return 0;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return false;
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 49dc32c61137..192d0bbfc9ea 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -57,6 +57,7 @@
 #include <linux/mm_inline.h>
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
+#include <linux/page-isolation.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags)
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
 	int ret;
+	int mt;
 	struct page *hpage = compound_head(page);
 
 	if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, int flags)
 		put_hwpoison_page(hpage);
 	}
 
+	/*
+	 * Setting MIGRATE_ISOLATE here ensures that the page will be linked
+	 * to free list immediately (not via pcplist) when released after
+	 * successful page migration. Otherwise we can't guarantee that the
+	 * page is really free after put_page() returns, so
+	 * set_hwpoison_free_buddy_page() highly likely fails.
+	 */
+	mt = get_pageblock_migratetype(page);
+	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
 	if (PageHuge(page))
 		ret = soft_offline_huge_page(page, flags);
 	else
 		ret = __soft_offline_page(page, flags);
-
+	set_pageblock_migratetype(page, mt);
 	return ret;
 }
 
-static void soft_offline_free_page(struct page *page)
+static int soft_offline_free_page(struct page *page)
 {
 	int rc = 0;
 	struct page *head = compound_head(page);
 
 	if (PageHuge(head))
 		rc = dissolve_free_huge_page(page);
-	if (!rc && !TestSetPageHWPoison(page))
-		num_poisoned_pages_inc();
+	if (!rc) {
+		if (set_hwpoison_free_buddy_page(page))
+			num_poisoned_pages_inc();
+		else
+			rc = -EBUSY;
+	}
+	return rc;
 }
 
 /**
@@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags)
 	if (ret > 0)
 		ret = soft_offline_in_use_page(page, flags);
 	else if (ret == 0)
-		soft_offline_free_page(page);
+		ret = soft_offline_free_page(page);
 
 	return ret;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 91a99457127c..d6a2e89b086a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1212,7 +1212,7 @@ out:
 			 * intentionally. Although it's rather weird,
 			 * it's how HWPoison flag works at the moment.
 			 */
-			if (!test_set_page_hwpoison(page))
+			if (set_hwpoison_free_buddy_page(page))
 				num_poisoned_pages_inc();
 		}
 	} else {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c677c1506d73..e75865d58ba7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8096,3 +8096,33 @@ bool is_free_buddy_page(struct page *page)
 
 	return order < MAX_ORDER;
 }
+
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Set PG_hwpoison flag if a given page is confirmed to be a free page.  This
+ * test is performed under the zone lock to prevent a race against page
+ * allocation.
+ */
+bool set_hwpoison_free_buddy_page(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long flags;
+	unsigned int order;
+	bool hwpoisoned = false;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	for (order = 0; order < MAX_ORDER; order++) {
+		struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+		if (PageBuddy(page_head) && page_order(page_head) >= order) {
+			if (!TestSetPageHWPoison(page))
+				hwpoisoned = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	return hwpoisoned;
+}
+#endif
-- 
cgit v1.2.3


From 263fade51f7bdd5ad7ebbfe82113a44c5ea4c36c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Thu, 23 Aug 2018 17:01:15 -0700
Subject: docs/mm: make GFP flags descriptions usable as kernel-doc

This patch adds DOC: headings for GFP flag descriptions and adjusts the
formatting to fit sphinx expectations of paragraphs.

Link: http://lkml.kernel.org/r/1532626360-16650-7-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 291 +++++++++++++++++++++++++++-------------------------
 1 file changed, 154 insertions(+), 137 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a6afcec53795..24bcc5eec6b4 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -59,29 +59,32 @@ struct vm_area_struct;
 #define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
 
-/*
+/**
+ * DOC: Page mobility and placement hints
+ *
  * Page mobility and placement hints
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  * These flags provide hints about how mobile the page is. Pages with similar
  * mobility are placed within the same pageblocks to minimise problems due
  * to external fragmentation.
  *
- * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
- *   moved by page migration during memory compaction or can be reclaimed.
+ * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ * moved by page migration during memory compaction or can be reclaimed.
  *
- * __GFP_RECLAIMABLE is used for slab allocations that specify
- *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ * %__GFP_RECLAIMABLE is used for slab allocations that specify
+ * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
  *
- * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
- *   these pages will be spread between local zones to avoid all the dirty
- *   pages being in one zone (fair zone allocation policy).
+ * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ * these pages will be spread between local zones to avoid all the dirty
+ * pages being in one zone (fair zone allocation policy).
  *
- * __GFP_HARDWALL enforces the cpuset memory allocation policy.
+ * %__GFP_HARDWALL enforces the cpuset memory allocation policy.
  *
- * __GFP_THISNODE forces the allocation to be satisified from the requested
- *   node with no fallbacks or placement policy enforcements.
+ * %__GFP_THISNODE forces the allocation to be satisified from the requested
+ * node with no fallbacks or placement policy enforcements.
  *
- * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
  */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
@@ -89,54 +92,60 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)
 #define __GFP_ACCOUNT	((__force gfp_t)___GFP_ACCOUNT)
 
-/*
+/**
+ * DOC: Watermark modifiers
+ *
  * Watermark modifiers -- controls access to emergency reserves
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
- * __GFP_HIGH indicates that the caller is high-priority and that granting
- *   the request is necessary before the system can make forward progress.
- *   For example, creating an IO context to clean pages.
+ * %__GFP_HIGH indicates that the caller is high-priority and that granting
+ * the request is necessary before the system can make forward progress.
+ * For example, creating an IO context to clean pages.
  *
- * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
- *   high priority. Users are typically interrupt handlers. This may be
- *   used in conjunction with __GFP_HIGH
+ * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
+ * high priority. Users are typically interrupt handlers. This may be
+ * used in conjunction with %__GFP_HIGH
  *
- * __GFP_MEMALLOC allows access to all memory. This should only be used when
- *   the caller guarantees the allocation will allow more memory to be freed
- *   very shortly e.g. process exiting or swapping. Users either should
- *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ * %__GFP_MEMALLOC allows access to all memory. This should only be used when
+ * the caller guarantees the allocation will allow more memory to be freed
+ * very shortly e.g. process exiting or swapping. Users either should
+ * be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
  *
- * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
- *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
+ * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ * This takes precedence over the %__GFP_MEMALLOC flag if both are set.
  */
 #define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)
 #define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)
 #define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)
 #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
 
-/*
+/**
+ * DOC: Reclaim modifiers
+ *
  * Reclaim modifiers
+ * ~~~~~~~~~~~~~~~~~
  *
- * __GFP_IO can start physical IO.
+ * %__GFP_IO can start physical IO.
  *
- * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
- *   allocator recursing into the filesystem which might already be holding
- *   locks.
+ * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ * allocator recursing into the filesystem which might already be holding
+ * locks.
  *
- * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
- *   This flag can be cleared to avoid unnecessary delays when a fallback
- *   option is available.
+ * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ * This flag can be cleared to avoid unnecessary delays when a fallback
+ * option is available.
  *
- * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
- *   the low watermark is reached and have it reclaim pages until the high
- *   watermark is reached. A caller may wish to clear this flag when fallback
- *   options are available and the reclaim is likely to disrupt the system. The
- *   canonical example is THP allocation where a fallback is cheap but
- *   reclaim/compaction may cause indirect stalls.
+ * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ * the low watermark is reached and have it reclaim pages until the high
+ * watermark is reached. A caller may wish to clear this flag when fallback
+ * options are available and the reclaim is likely to disrupt the system. The
+ * canonical example is THP allocation where a fallback is cheap but
+ * reclaim/compaction may cause indirect stalls.
  *
- * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
  *
  * The default allocator behavior depends on the request size. We have a concept
- * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
  * !costly allocations are too essential to fail so they are implicitly
  * non-failing by default (with some exceptions like OOM victims might fail so
  * the caller still has to check for failures) while costly requests try to be
@@ -144,40 +153,40 @@ struct vm_area_struct;
  * The following three modifiers might be used to override some of these
  * implicit rules
  *
- * __GFP_NORETRY: The VM implementation will try only very lightweight
- *   memory direct reclaim to get some memory under memory pressure (thus
- *   it can sleep). It will avoid disruptive actions like OOM killer. The
- *   caller must handle the failure which is quite likely to happen under
- *   heavy memory pressure. The flag is suitable when failure can easily be
- *   handled at small cost, such as reduced throughput
- *
- * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
- *   procedures that have previously failed if there is some indication
- *   that progress has been made else where.  It can wait for other
- *   tasks to attempt high level approaches to freeing memory such as
- *   compaction (which removes fragmentation) and page-out.
- *   There is still a definite limit to the number of retries, but it is
- *   a larger limit than with __GFP_NORETRY.
- *   Allocations with this flag may fail, but only when there is
- *   genuinely little unused memory. While these allocations do not
- *   directly trigger the OOM killer, their failure indicates that
- *   the system is likely to need to use the OOM killer soon.  The
- *   caller must handle failure, but can reasonably do so by failing
- *   a higher-level request, or completing it only in a much less
- *   efficient manner.
- *   If the allocation does fail, and the caller is in a position to
- *   free some non-essential memory, doing so could benefit the system
- *   as a whole.
- *
- * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- *   cannot handle allocation failures. The allocation could block
- *   indefinitely but will never return with failure. Testing for
- *   failure is pointless.
- *   New users should be evaluated carefully (and the flag should be
- *   used only when there is no reasonable failure policy) but it is
- *   definitely preferable to use the flag rather than opencode endless
- *   loop around allocator.
- *   Using this flag for costly allocations is _highly_ discouraged.
+ * %__GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput
+ *
+ * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made else where.  It can wait for other
+ * tasks to attempt high level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with %__GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon.  The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
+ *
+ * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Using this flag for costly allocations is _highly_ discouraged.
  */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)
 #define __GFP_FS	((__force gfp_t)___GFP_FS)
@@ -188,14 +197,17 @@ struct vm_area_struct;
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 
-/*
+/**
+ * DOC: Action modifiers
+ *
  * Action modifiers
+ * ~~~~~~~~~~~~~~~~
  *
- * __GFP_NOWARN suppresses allocation failure reports.
+ * %__GFP_NOWARN suppresses allocation failure reports.
  *
- * __GFP_COMP address compound page metadata.
+ * %__GFP_COMP address compound page metadata.
  *
- * __GFP_ZERO returns a zeroed page on success.
+ * %__GFP_ZERO returns a zeroed page on success.
  */
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)
@@ -208,66 +220,71 @@ struct vm_area_struct;
 #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
-/*
+/**
+ * DOC: Useful GFP flag combinations
+ *
+ * Useful GFP flag combinations
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
  * Useful GFP flag combinations that are commonly used. It is recommended
  * that subsystems start with one of these combinations and then set/clear
- * __GFP_FOO flags as necessary.
- *
- * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
- *   watermark is applied to allow access to "atomic reserves"
- *
- * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
- *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
- *
- * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
- *   accounted to kmemcg.
- *
- * GFP_NOWAIT is for kernel allocations that should not stall for direct
- *   reclaim, start physical IO or use any filesystem callback.
- *
- * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
- *   that do not require the starting of any physical IO.
- *   Please try to avoid using this flag directly and instead use
- *   memalloc_noio_{save,restore} to mark the whole scope which cannot
- *   perform any IO with a short explanation why. All allocation requests
- *   will inherit GFP_NOIO implicitly.
- *
- * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
- *   Please try to avoid using this flag directly and instead use
- *   memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
- *   recurse into the FS layer with a short explanation why. All allocation
- *   requests will inherit GFP_NOFS implicitly.
- *
- * GFP_USER is for userspace allocations that also need to be directly
- *   accessibly by the kernel or hardware. It is typically used by hardware
- *   for buffers that are mapped to userspace (e.g. graphics) that hardware
- *   still must DMA to. cpuset limits are enforced for these allocations.
- *
- * GFP_DMA exists for historical reasons and should be avoided where possible.
- *   The flags indicates that the caller requires that the lowest zone be
- *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
- *   it would require careful auditing as some users really require it and
- *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
- *   lowest zone as a type of emergency reserve.
- *
- * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
- *   address.
- *
- * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
- *   do not need to be directly accessible by the kernel but that cannot
- *   move once in use. An example may be a hardware allocation that maps
- *   data directly into userspace but has no addressing limitations.
- *
- * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
- *   need direct access to but can use kmap() when access is required. They
- *   are expected to be movable via page reclaim or page migration. Typically,
- *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
- *
- * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are
- *   compound allocations that will generally fail quickly if memory is not
- *   available and will not wake kswapd/kcompactd on failure. The _LIGHT
- *   version does not attempt reclaim/compaction at all and is by default used
- *   in page fault path, while the non-light is used by khugepaged.
+ * %__GFP_FOO flags as necessary.
+ *
+ * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ * watermark is applied to allow access to "atomic reserves"
+ *
+ * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
+ * accounted to kmemcg.
+ *
+ * %GFP_NOWAIT is for kernel allocations that should not stall for direct
+ * reclaim, start physical IO or use any filesystem callback.
+ *
+ * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ * that do not require the starting of any physical IO.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_noio_{save,restore} to mark the whole scope which cannot
+ * perform any IO with a short explanation why. All allocation requests
+ * will inherit GFP_NOIO implicitly.
+ *
+ * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
+ * recurse into the FS layer with a short explanation why. All allocation
+ * requests will inherit GFP_NOFS implicitly.
+ *
+ * %GFP_USER is for userspace allocations that also need to be directly
+ * accessibly by the kernel or hardware. It is typically used by hardware
+ * for buffers that are mapped to userspace (e.g. graphics) that hardware
+ * still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * %GFP_DMA exists for historical reasons and should be avoided where possible.
+ * The flags indicates that the caller requires that the lowest zone be
+ * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ * it would require careful auditing as some users really require it and
+ * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the
+ * lowest zone as a type of emergency reserve.
+ *
+ * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
+ * address.
+ *
+ * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ * do not need to be directly accessible by the kernel but that cannot
+ * move once in use. An example may be a hardware allocation that maps
+ * data directly into userspace but has no addressing limitations.
+ *
+ * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ * need direct access to but can use kmap() when access is required. They
+ * are expected to be movable via page reclaim or page migration. Typically,
+ * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE.
+ *
+ * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They
+ * are compound allocations that will generally fail quickly if memory is not
+ * available and will not wake kswapd/kcompactd on failure. The _LIGHT
+ * version does not attempt reclaim/compaction at all and is by default used
+ * in page fault path, while the non-light is used by khugepaged.
  */
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
-- 
cgit v1.2.3


From 2b7403035459c75e193c6b04a293e518a4212de0 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Thu, 23 Aug 2018 17:01:36 -0700
Subject: mm: Change return type int to vm_fault_t for fault handlers

Use new return type vm_fault_t for fault handler.  For now, this is just
documenting that the function returns a VM_FAULT value rather than an
errno.  Once all instances are converted, vm_fault_t will become a
distinct type.

Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t")

The aim is to change the return type of finish_fault() and
handle_mm_fault() to vm_fault_t type.  As part of that clean up return
type of all other recursively called functions have been changed to
vm_fault_t type.

The places from where handle_mm_fault() is getting invoked will be
change to vm_fault_t type but in a separate patch.

vmf_error() is the newly introduce inline function in 4.17-rc6.

[akpm@linux-foundation.org: don't shadow outer local `ret' in __do_huge_pmd_anonymous_page()]
Link: http://lkml.kernel.org/r/20180604171727.GA20279@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c              |  6 +--
 include/linux/huge_mm.h       |  9 +++--
 include/linux/hugetlb.h       |  2 +-
 include/linux/mm.h            | 14 +++----
 include/linux/oom.h           |  2 +-
 include/linux/swapops.h       |  5 ++-
 include/linux/userfaultfd_k.h |  5 ++-
 kernel/memremap.c             |  2 +-
 mm/gup.c                      |  4 +-
 mm/huge_memory.c              | 31 +++++++--------
 mm/hugetlb.c                  | 29 +++++++-------
 mm/internal.h                 |  2 +-
 mm/khugepaged.c               |  3 +-
 mm/memory.c                   | 90 ++++++++++++++++++++++---------------------
 mm/shmem.c                    |  5 ++-
 15 files changed, 106 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f649023b19b5..bfa0ec69f924 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -340,17 +340,15 @@ out:
  * fatal_signal_pending()s, and the mmap_sem must be released before
  * returning it.
  */
-int handle_userfault(struct vm_fault *vmf, unsigned long reason)
+vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 {
 	struct mm_struct *mm = vmf->vma->vm_mm;
 	struct userfaultfd_ctx *ctx;
 	struct userfaultfd_wait_queue uwq;
-	int ret;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
 	bool must_wait, return_to_userland;
 	long blocking_state;
 
-	ret = VM_FAULT_SIGBUS;
-
 	/*
 	 * We don't do userfault handling for the final child pid update.
 	 *
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8a126259bc4..27e3e32135a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,7 +6,7 @@
 
 #include <linux/fs.h> /* only for vma_is_dax() */
 
-extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
+extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 			 struct vm_area_struct *vma);
@@ -23,7 +23,7 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 }
 #endif
 
-extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 					  unsigned long addr,
 					  pmd_t *pmd,
@@ -216,7 +216,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 		pud_t *pud, int flags);
 
-extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
 
@@ -321,7 +321,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 	return NULL;
 }
 
-static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
+static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf,
+		pmd_t orig_pmd)
 {
 	return 0;
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c39d9170a8a0..6b68e345f0ca 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -105,7 +105,7 @@ void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(int, char *);
 void hugetlb_show_meminfo(void);
 unsigned long hugetlb_total_pages(void);
-int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
 int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
 				struct vm_area_struct *dst_vma,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a9e733b5fb76..8fcc36660de6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -728,10 +728,10 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 	return pte;
 }
 
-int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
+vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
-int finish_fault(struct vm_fault *vmf);
-int finish_mkwrite_fault(struct vm_fault *vmf);
+vm_fault_t finish_fault(struct vm_fault *vmf);
+vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
 #endif
 
 /*
@@ -1403,8 +1403,8 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page);
 int invalidate_inode_page(struct page *page);
 
 #ifdef CONFIG_MMU
-extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags);
+extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
+			unsigned long address, unsigned int flags);
 extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags,
 			    bool *unlocked);
@@ -1413,7 +1413,7 @@ void unmap_mapping_pages(struct address_space *mapping,
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
 #else
-static inline int handle_mm_fault(struct vm_area_struct *vma,
+static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags)
 {
 	/* should never happen if there's no MMU */
@@ -2563,7 +2563,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_COW	0x4000	/* internal GUP flag */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
 
-static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
+static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
 {
 	if (vm_fault & VM_FAULT_OOM)
 		return -ENOMEM;
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 92f70e4c6252..69864a547663 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -88,7 +88,7 @@ static inline bool mm_is_oom_victim(struct mm_struct *mm)
  *
  * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise.
  */
-static inline int check_stable_address_space(struct mm_struct *mm)
+static inline vm_fault_t check_stable_address_space(struct mm_struct *mm)
 {
 	if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags)))
 		return VM_FAULT_SIGBUS;
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 568a3553d918..22af9d8a84ae 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -4,6 +4,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/bug.h>
+#include <linux/mm_types.h>
 
 /*
  * swapcache pages are stored in the swapper_space radix tree.  We want to
@@ -134,7 +135,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 	return pfn_to_page(swp_offset(entry));
 }
 
-int device_private_entry_fault(struct vm_area_struct *vma,
+vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 		       unsigned long addr,
 		       swp_entry_t entry,
 		       unsigned int flags,
@@ -169,7 +170,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 	return NULL;
 }
 
-static inline int device_private_entry_fault(struct vm_area_struct *vma,
+static inline vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 				     unsigned long addr,
 				     swp_entry_t entry,
 				     unsigned int flags,
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index e091f0a11b11..37c9eba75c98 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -28,7 +28,7 @@
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
-extern int handle_userfault(struct vm_fault *vmf, unsigned long reason);
+extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 			    unsigned long src_start, unsigned long len,
@@ -77,7 +77,8 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
-static inline int handle_userfault(struct vm_fault *vmf, unsigned long reason)
+static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
+				unsigned long reason)
 {
 	return VM_FAULT_SIGBUS;
 }
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 1f87ea6b6545..d57d58f77409 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -43,7 +43,7 @@ static unsigned long order_at(struct resource *res, unsigned long pgoff)
 			pgoff += 1UL << order, order = order_at((res), pgoff))
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
-int device_private_entry_fault(struct vm_area_struct *vma,
+vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 		       unsigned long addr,
 		       swp_entry_t entry,
 		       unsigned int flags,
diff --git a/mm/gup.c b/mm/gup.c
index fc5f98069f4e..1abc8b4afff6 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -497,7 +497,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		unsigned long address, unsigned int *flags, int *nonblocking)
 {
 	unsigned int fault_flags = 0;
-	int ret;
+	vm_fault_t ret;
 
 	/* mlock all present pages, but do not fault in new pages */
 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
@@ -818,7 +818,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 		     bool *unlocked)
 {
 	struct vm_area_struct *vma;
-	int ret, major = 0;
+	vm_fault_t ret, major = 0;
 
 	if (unlocked)
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 78427af91de9..08b544383d74 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -541,14 +541,14 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
 
-static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
-		gfp_t gfp)
+static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
+			struct page *page, gfp_t gfp)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
@@ -584,15 +584,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
 		/* Deliver the page fault to userland */
 		if (userfaultfd_missing(vma)) {
-			int ret;
+			vm_fault_t ret2;
 
 			spin_unlock(vmf->ptl);
 			mem_cgroup_cancel_charge(page, memcg, true);
 			put_page(page);
 			pte_free(vma->vm_mm, pgtable);
-			ret = handle_userfault(vmf, VM_UFFD_MISSING);
-			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
-			return ret;
+			ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
+			VM_BUG_ON(ret2 & VM_FAULT_FALLBACK);
+			return ret2;
 		}
 
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
@@ -663,7 +663,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 	return true;
 }
 
-int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
+vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	gfp_t gfp;
@@ -682,7 +682,7 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		pgtable_t pgtable;
 		struct page *zero_page;
 		bool set;
-		int ret;
+		vm_fault_t ret;
 		pgtable = pte_alloc_one(vma->vm_mm, haddr);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
@@ -1118,15 +1118,16 @@ unlock:
 	spin_unlock(vmf->ptl);
 }
 
-static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
-		struct page *page)
+static vm_fault_t do_huge_pmd_wp_page_fallback(struct vm_fault *vmf,
+			pmd_t orig_pmd, struct page *page)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
 	pmd_t _pmd;
-	int ret = 0, i;
+	int i;
+	vm_fault_t ret = 0;
 	struct page **pages;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
@@ -1236,7 +1237,7 @@ out_free_pages:
 	goto out;
 }
 
-int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
+vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL, *new_page;
@@ -1245,7 +1246,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 	gfp_t huge_gfp;			/* for allocation and charge */
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
 	VM_BUG_ON_VMA(!vma->anon_vma, vma);
@@ -1457,7 +1458,7 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
+vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct anon_vma *anon_vma = NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9f1c853f67b5..3c21775f196b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3501,14 +3501,15 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
  * cannot race with other handlers or page migration.
  * Keep the pte_same checks anyway to make transition from the mutex easier.
  */
-static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
+static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 		       unsigned long address, pte_t *ptep,
 		       struct page *pagecache_page, spinlock_t *ptl)
 {
 	pte_t pte;
 	struct hstate *h = hstate_vma(vma);
 	struct page *old_page, *new_page;
-	int ret = 0, outside_reserve = 0;
+	int outside_reserve = 0;
+	vm_fault_t ret = 0;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 	unsigned long haddr = address & huge_page_mask(h);
@@ -3572,8 +3573,7 @@ retry_avoidcopy:
 			return 0;
 		}
 
-		ret = (PTR_ERR(new_page) == -ENOMEM) ?
-			VM_FAULT_OOM : VM_FAULT_SIGBUS;
+		ret = vmf_error(PTR_ERR(new_page));
 		goto out_release_old;
 	}
 
@@ -3676,12 +3676,13 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 	return 0;
 }
 
-static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			   struct address_space *mapping, pgoff_t idx,
-			   unsigned long address, pte_t *ptep, unsigned int flags)
+static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			struct address_space *mapping, pgoff_t idx,
+			unsigned long address, pte_t *ptep, unsigned int flags)
 {
 	struct hstate *h = hstate_vma(vma);
-	int ret = VM_FAULT_SIGBUS;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
 	int anon_rmap = 0;
 	unsigned long size;
 	struct page *page;
@@ -3744,11 +3745,7 @@ retry:
 
 		page = alloc_huge_page(vma, haddr, 0);
 		if (IS_ERR(page)) {
-			ret = PTR_ERR(page);
-			if (ret == -ENOMEM)
-				ret = VM_FAULT_OOM;
-			else
-				ret = VM_FAULT_SIGBUS;
+			ret = vmf_error(PTR_ERR(page));
 			goto out;
 		}
 		clear_huge_page(page, address, pages_per_huge_page(h));
@@ -3872,12 +3869,12 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 }
 #endif
 
-int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags)
 {
 	pte_t *ptep, entry;
 	spinlock_t *ptl;
-	int ret;
+	vm_fault_t ret;
 	u32 hash;
 	pgoff_t idx;
 	struct page *page = NULL;
@@ -4207,7 +4204,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (absent || is_swap_pte(huge_ptep_get(pte)) ||
 		    ((flags & FOLL_WRITE) &&
 		      !huge_pte_write(huge_ptep_get(pte)))) {
-			int ret;
+			vm_fault_t ret;
 			unsigned int fault_flags = 0;
 
 			if (pte)
diff --git a/mm/internal.h b/mm/internal.h
index dab088cb6937..87256ae1bef8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -38,7 +38,7 @@
 
 void page_writeback_init(void);
 
-int do_swap_page(struct vm_fault *vmf);
+vm_fault_t do_swap_page(struct vm_fault *vmf);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 961cbe9062a5..a31d740e6cd1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -880,7 +880,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 					unsigned long address, pmd_t *pmd,
 					int referenced)
 {
-	int swapped_in = 0, ret = 0;
+	int swapped_in = 0;
+	vm_fault_t ret = 0;
 	struct vm_fault vmf = {
 		.vma = vma,
 		.address = address,
diff --git a/mm/memory.c b/mm/memory.c
index 19f47d7b9b86..42ebdc33268e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2384,9 +2384,9 @@ static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
  *
  * We do this without the lock held, so that it can sleep if it needs to.
  */
-static int do_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
 {
-	int ret;
+	vm_fault_t ret;
 	struct page *page = vmf->page;
 	unsigned int old_flags = vmf->flags;
 
@@ -2490,7 +2490,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
  *   held to the old page, as well as updating the rmap.
  * - In any case, unlock the PTL and drop the reference we took to the old page.
  */
-static int wp_page_copy(struct vm_fault *vmf)
+static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mm_struct *mm = vma->vm_mm;
@@ -2638,7 +2638,7 @@ oom:
  * The function expects the page to be locked or other protection against
  * concurrent faults / writeback (such as DAX radix tree locks).
  */
-int finish_mkwrite_fault(struct vm_fault *vmf)
+vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 {
 	WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
 	vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
@@ -2659,12 +2659,12 @@ int finish_mkwrite_fault(struct vm_fault *vmf)
  * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
  * mapping
  */
-static int wp_pfn_shared(struct vm_fault *vmf)
+static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
 	if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
-		int ret;
+		vm_fault_t ret;
 
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		vmf->flags |= FAULT_FLAG_MKWRITE;
@@ -2677,7 +2677,7 @@ static int wp_pfn_shared(struct vm_fault *vmf)
 	return VM_FAULT_WRITE;
 }
 
-static int wp_page_shared(struct vm_fault *vmf)
+static vm_fault_t wp_page_shared(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -2685,7 +2685,7 @@ static int wp_page_shared(struct vm_fault *vmf)
 	get_page(vmf->page);
 
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
-		int tmp;
+		vm_fault_t tmp;
 
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		tmp = do_page_mkwrite(vmf);
@@ -2728,7 +2728,7 @@ static int wp_page_shared(struct vm_fault *vmf)
  * but allow concurrent faults), with pte both mapped and locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_wp_page(struct vm_fault *vmf)
+static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -2904,7 +2904,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
  * We return with the mmap_sem locked or unlocked in the same cases
  * as does filemap_fault().
  */
-int do_swap_page(struct vm_fault *vmf)
+vm_fault_t do_swap_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL, *swapcache;
@@ -2913,7 +2913,7 @@ int do_swap_page(struct vm_fault *vmf)
 	pte_t pte;
 	int locked;
 	int exclusive = 0;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
 		goto out;
@@ -3124,12 +3124,12 @@ out_release:
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_anonymous_page(struct vm_fault *vmf)
+static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mem_cgroup *memcg;
 	struct page *page;
-	int ret = 0;
+	vm_fault_t ret = 0;
 	pte_t entry;
 
 	/* File mapping without ->vm_ops ? */
@@ -3239,10 +3239,10 @@ oom:
  * released depending on flags and vma->vm_ops->fault() return value.
  * See filemap_fault() and __lock_page_retry().
  */
-static int __do_fault(struct vm_fault *vmf)
+static vm_fault_t __do_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret;
+	vm_fault_t ret;
 
 	ret = vma->vm_ops->fault(vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
@@ -3276,7 +3276,7 @@ static int pmd_devmap_trans_unstable(pmd_t *pmd)
 	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
 }
 
-static int pte_alloc_one_map(struct vm_fault *vmf)
+static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
@@ -3352,13 +3352,14 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
 	vmf->prealloc_pte = NULL;
 }
 
-static int do_set_pmd(struct vm_fault *vmf, struct page *page)
+static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	pmd_t entry;
-	int i, ret;
+	int i;
+	vm_fault_t ret;
 
 	if (!transhuge_vma_suitable(vma, haddr))
 		return VM_FAULT_FALLBACK;
@@ -3408,7 +3409,7 @@ out:
 	return ret;
 }
 #else
-static int do_set_pmd(struct vm_fault *vmf, struct page *page)
+static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 {
 	BUILD_BUG();
 	return 0;
@@ -3429,13 +3430,13 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
  * Target users are page handler itself and implementations of
  * vm_ops->map_pages.
  */
-int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
+vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 	pte_t entry;
-	int ret;
+	vm_fault_t ret;
 
 	if (pmd_none(*vmf->pmd) && PageTransCompound(page) &&
 			IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
@@ -3494,10 +3495,10 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
  * The function expects the page to be locked and on success it consumes a
  * reference of a page being mapped (for the PTE which maps it).
  */
-int finish_fault(struct vm_fault *vmf)
+vm_fault_t finish_fault(struct vm_fault *vmf)
 {
 	struct page *page;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	/* Did we COW the page? */
 	if ((vmf->flags & FAULT_FLAG_WRITE) &&
@@ -3583,12 +3584,13 @@ late_initcall(fault_around_debugfs);
  * (and therefore to page order).  This way it's easier to guarantee
  * that we don't cross page table boundaries.
  */
-static int do_fault_around(struct vm_fault *vmf)
+static vm_fault_t do_fault_around(struct vm_fault *vmf)
 {
 	unsigned long address = vmf->address, nr_pages, mask;
 	pgoff_t start_pgoff = vmf->pgoff;
 	pgoff_t end_pgoff;
-	int off, ret = 0;
+	int off;
+	vm_fault_t ret = 0;
 
 	nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
 	mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
@@ -3638,10 +3640,10 @@ out:
 	return ret;
 }
 
-static int do_read_fault(struct vm_fault *vmf)
+static vm_fault_t do_read_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	/*
 	 * Let's call ->map_pages() first and use ->fault() as fallback
@@ -3665,10 +3667,10 @@ static int do_read_fault(struct vm_fault *vmf)
 	return ret;
 }
 
-static int do_cow_fault(struct vm_fault *vmf)
+static vm_fault_t do_cow_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret;
+	vm_fault_t ret;
 
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
@@ -3704,10 +3706,10 @@ uncharge_out:
 	return ret;
 }
 
-static int do_shared_fault(struct vm_fault *vmf)
+static vm_fault_t do_shared_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret, tmp;
+	vm_fault_t ret, tmp;
 
 	ret = __do_fault(vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@@ -3745,10 +3747,10 @@ static int do_shared_fault(struct vm_fault *vmf)
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int do_fault(struct vm_fault *vmf)
+static vm_fault_t do_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret;
+	vm_fault_t ret;
 
 	/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
 	if (!vma->vm_ops->fault)
@@ -3783,7 +3785,7 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
 	return mpol_misplaced(page, vma, addr);
 }
 
-static int do_numa_page(struct vm_fault *vmf)
+static vm_fault_t do_numa_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL;
@@ -3873,7 +3875,7 @@ out:
 	return 0;
 }
 
-static inline int create_huge_pmd(struct vm_fault *vmf)
+static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
 {
 	if (vma_is_anonymous(vmf->vma))
 		return do_huge_pmd_anonymous_page(vmf);
@@ -3883,7 +3885,7 @@ static inline int create_huge_pmd(struct vm_fault *vmf)
 }
 
 /* `inline' is required to avoid gcc 4.1.2 build error */
-static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
+static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	if (vma_is_anonymous(vmf->vma))
 		return do_huge_pmd_wp_page(vmf, orig_pmd);
@@ -3902,7 +3904,7 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
 	return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
 }
 
-static int create_huge_pud(struct vm_fault *vmf)
+static vm_fault_t create_huge_pud(struct vm_fault *vmf)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	/* No support for anonymous transparent PUD pages yet */
@@ -3914,7 +3916,7 @@ static int create_huge_pud(struct vm_fault *vmf)
 	return VM_FAULT_FALLBACK;
 }
 
-static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	/* No support for anonymous transparent PUD pages yet */
@@ -3941,7 +3943,7 @@ static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
  * The mmap_sem may have been released depending on flags and our return value.
  * See filemap_fault() and __lock_page_or_retry().
  */
-static int handle_pte_fault(struct vm_fault *vmf)
+static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 {
 	pte_t entry;
 
@@ -4029,8 +4031,8 @@ unlock:
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags)
+static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
+		unsigned long address, unsigned int flags)
 {
 	struct vm_fault vmf = {
 		.vma = vma,
@@ -4043,7 +4045,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
 	p4d_t *p4d;
-	int ret;
+	vm_fault_t ret;
 
 	pgd = pgd_offset(mm, address);
 	p4d = p4d_alloc(mm, pgd, address);
@@ -4118,10 +4120,10 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
+vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 		unsigned int flags)
 {
-	int ret;
+	vm_fault_t ret;
 
 	__set_current_state(TASK_RUNNING);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index fb04baacc9fa..0376c124b043 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -124,7 +124,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 		struct page **pagep, enum sgp_type sgp,
 		gfp_t gfp, struct vm_area_struct *vma,
-		struct vm_fault *vmf, int *fault_type);
+		struct vm_fault *vmf, vm_fault_t *fault_type);
 
 int shmem_getpage(struct inode *inode, pgoff_t index,
 		struct page **pagep, enum sgp_type sgp)
@@ -1620,7 +1620,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  */
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct page **pagep, enum sgp_type sgp, gfp_t gfp,
-	struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type)
+	struct vm_area_struct *vma, struct vm_fault *vmf,
+			vm_fault_t *fault_type)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-- 
cgit v1.2.3


From 0c36dd37d5b80b43f4e3dc5a1dbfe6dbd86e8f2a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 21 Aug 2018 17:02:40 +0200
Subject: i2c: remove deprecated attach_adapter callback

There aren't any users left. Remove this callback from the 2.4 times.
Phew, finally, that took years to reach...

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 11 +----------
 include/linux/i2c.h         |  6 ------
 2 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 5a937109a289..f15737763608 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -62,7 +62,7 @@
 
 /*
  * core_lock protects i2c_adapter_idr, and guarantees that device detection,
- * deletion of detected devices, and attach_adapter calls are serialized
+ * deletion of detected devices are serialized
  */
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
@@ -1124,15 +1124,6 @@ static int i2c_do_add_adapter(struct i2c_driver *driver,
 	/* Detect supported devices on that bus, and instantiate them */
 	i2c_detect(adap, driver);
 
-	/* Let legacy drivers scan this bus for matching devices */
-	if (driver->attach_adapter) {
-		dev_warn(&adap->dev, "%s: attach_adapter method is deprecated\n",
-			 driver->driver.name);
-		dev_warn(&adap->dev,
-			 "Please use another way to instantiate your i2c_client\n");
-		/* We ignore the return code; if it fails, too bad */
-		driver->attach_adapter(adap);
-	}
 	return 0;
 }
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 36f357ecdf67..b79387fd57da 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -231,7 +231,6 @@ enum i2c_alert_protocol {
 /**
  * struct i2c_driver - represent an I2C device driver
  * @class: What kind of i2c device we instantiate (for detect)
- * @attach_adapter: Callback for bus addition (deprecated)
  * @probe: Callback for device binding - soon to be deprecated
  * @probe_new: New callback for device binding
  * @remove: Callback for device unbinding
@@ -268,11 +267,6 @@ enum i2c_alert_protocol {
 struct i2c_driver {
 	unsigned int class;
 
-	/* Notifies the driver that a new bus has appeared. You should avoid
-	 * using this, it will be removed in a near future.
-	 */
-	int (*attach_adapter)(struct i2c_adapter *) __deprecated;
-
 	/* Standard driver model interfaces */
 	int (*probe)(struct i2c_client *, const struct i2c_device_id *);
 	int (*remove)(struct i2c_client *);
-- 
cgit v1.2.3


From 5d3a01a228c703dad971cad11f14c0512c4c2713 Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <peter@korsgaard.com>
Date: Wed, 22 Aug 2018 18:16:07 +0200
Subject: i2c: ocores: update my email address

The old @sunsite.dk address is no longer active, so update the references.

Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-ocores      | 2 +-
 MAINTAINERS                              | 2 +-
 drivers/i2c/busses/i2c-ocores.c          | 4 ++--
 include/linux/platform_data/i2c-ocores.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
index 4e713f4cdb2f..9caaf7df1b2f 100644
--- a/Documentation/i2c/busses/i2c-ocores
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -4,7 +4,7 @@ Supported adapters:
   * OpenCores.org I2C controller by Richard Herveille (see datasheet link)
     https://opencores.org/project/i2c/overview
 
-Author: Peter Korsgaard <jacmet@sunsite.dk>
+Author: Peter Korsgaard <peter@korsgaard.com>
 
 Description
 -----------
diff --git a/MAINTAINERS b/MAINTAINERS
index 8cd5c1bd6441..61abdf319d20 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10787,7 +10787,7 @@ F:	arch/*/boot/dts/
 F:	include/dt-bindings/
 
 OPENCORES I2C BUS DRIVER
-M:	Peter Korsgaard <jacmet@sunsite.dk>
+M:	Peter Korsgaard <peter@korsgaard.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	Documentation/i2c/busses/i2c-ocores
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 88444ef74943..87f9caacba85 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -2,7 +2,7 @@
  * i2c-ocores.c: I2C bus driver for OpenCores I2C controller
  * (https://opencores.org/project/i2c/overview)
  *
- * Peter Korsgaard <jacmet@sunsite.dk>
+ * Peter Korsgaard <peter@korsgaard.com>
  *
  * Support for the GRLIB port of the controller by
  * Andreas Larsson <andreas@gaisler.com>
@@ -576,7 +576,7 @@ static struct platform_driver ocores_i2c_driver = {
 
 module_platform_driver(ocores_i2c_driver);
 
-MODULE_AUTHOR("Peter Korsgaard <jacmet@sunsite.dk>");
+MODULE_AUTHOR("Peter Korsgaard <peter@korsgaard.com>");
 MODULE_DESCRIPTION("OpenCores I2C bus driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:ocores-i2c");
diff --git a/include/linux/platform_data/i2c-ocores.h b/include/linux/platform_data/i2c-ocores.h
index 01edd96fe1f7..113d6b12f650 100644
--- a/include/linux/platform_data/i2c-ocores.h
+++ b/include/linux/platform_data/i2c-ocores.h
@@ -1,7 +1,7 @@
 /*
  * i2c-ocores.h - definitions for the i2c-ocores interface
  *
- * Peter Korsgaard <jacmet@sunsite.dk>
+ * Peter Korsgaard <peter@korsgaard.com>
  *
  * This file is licensed under the terms of the GNU General Public License
  * version 2.  This program is licensed "as is" without any warranty of any
-- 
cgit v1.2.3